[MINOR] Fix a bunch of typos

palantir · ash211 · Jan 25, 2018 · Dec 13, 2017 · Dec 13, 2017 · Dec 13, 2017
commit c284c4e1f6f684ca8db1cc446fdcc43b46e3413c
diff --git a/bin/find-spark-home b/bin/find-spark-home
@@ -21,7 +21,7 @@
 
 FIND_SPARK_HOME_PYTHON_SCRIPT="$(cd "$(dirname "$0")"; pwd)/find_spark_home.py"
 
-# Short cirtuit if the user already has this set.
+# Short circuit if the user already has this set.
 if [ ! -z "${SPARK_HOME}" ]; then
    exit 0
 elif [ ! -f "$FIND_SPARK_HOME_PYTHON_SCRIPT" ]; then

diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
@@ -86,7 +86,7 @@ class LevelDBIterator<T> implements KVStoreIterator<T> {
         end = index.start(parent, params.last);
       }
       if (it.hasNext()) {
-        // When descending, the caller may have set up the start of iteration at a non-existant
+        // When descending, the caller may have set up the start of iteration at a non-existent
         // entry that is guaranteed to be after the desired entry. For example, if you have a
         // compound key (a, b) where b is a, integer, you may seek to the end of the elements that
         // have the same "a" value by specifying Integer.MAX_VALUE for "b", and that value may not

diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
@@ -47,7 +47,7 @@ class MessageWithHeader extends AbstractFileRegion {
   /**
    * When the write buffer size is larger than this limit, I/O will be done in chunks of this size.
    * The size should not be too large as it will waste underlying memory copy. e.g. If network
-   * avaliable buffer is smaller than this limit, the data cannot be sent within one single write
+   * available buffer is smaller than this limit, the data cannot be sent within one single write
    * operation while it still will make memory copy with this size.
    */
   private static final int NIO_BUFFER_LIMIT = 256 * 1024;
@@ -100,7 +100,7 @@ public long transferred() {
    * transferTo invocations in order to transfer a single MessageWithHeader to avoid busy waiting.
    *
    * The contract is that the caller will ensure position is properly set to the total number
-   * of bytes transferred so far (i.e. value returned by transfered()).
+   * of bytes transferred so far (i.e. value returned by transferred()).
    */
   @Override
   public long transferTo(final WritableByteChannel target, final long position) throws IOException {

diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
@@ -38,7 +38,7 @@
 import org.apache.spark.network.util.NettyUtils;
 
 /**
- * Provides SASL-based encription for transport channels. The single method exposed by this
+ * Provides SASL-based encryption for transport channels. The single method exposed by this
  * class installs the needed channel handlers on a connected channel.
  */
 class SaslEncryption {
@@ -166,7 +166,7 @@ static class EncryptedMessage extends AbstractFileRegion {
      * This makes assumptions about how netty treats FileRegion instances, because there's no way
      * to know beforehand what will be the size of the encrypted message. Namely, it assumes
      * that netty will try to transfer data from this message while
-     * <code>transfered() < count()</code>. So these two methods return, technically, wrong data,
+     * <code>transferred() < count()</code>. So these two methods return, technically, wrong data,
      * but netty doesn't know better.
      */
     @Override

diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
@@ -29,7 +29,7 @@
 /**
  * A customized frame decoder that allows intercepting raw data.
  * <p>
- * This behaves like Netty's frame decoder (with harcoded parameters that match this library's
+ * This behaves like Netty's frame decoder (with hard coded parameters that match this library's
  * needs), except it allows an interceptor to be installed to read data directly before it's
  * framed.
  * <p>

diff --git a/...fle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/...fle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -127,7 +127,7 @@ public void jsonSerializationOfExecutorRegistration() throws IOException {
       mapper.readValue(shuffleJson, ExecutorShuffleInfo.class);
     assertEquals(parsedShuffleInfo, shuffleInfo);
 
-    // Intentionally keep these hard-coded strings in here, to check backwards-compatability.
+    // Intentionally keep these hard-coded strings in here, to check backwards-compatibility.
     // its not legacy yet, but keeping this here in case anybody changes it
     String legacyAppIdJson = "{\"appId\":\"foo\", \"execId\":\"bar\"}";
     assertEquals(appId, mapper.readValue(legacyAppIdJson, AppExecId.class));

diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
@@ -34,7 +34,7 @@
  *   <li>{@link String}</li>
  * </ul>
  * The false positive probability ({@code FPP}) of a Bloom filter is defined as the probability that
- * {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that hasu
+ * {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that has
  * not actually been put in the {@code BloomFilter}.
  *
  * The implementation is largely based on the {@code BloomFilter} class from Guava.

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -66,7 +66,7 @@ public static boolean arrayEquals(
         i += 1;
       }
     }
-    // for architectures that suport unaligned accesses, chew it up 8 bytes at a time
+    // for architectures that support unaligned accesses, chew it up 8 bytes at a time
     if (unaligned || (((leftOffset + i) % 8 == 0) && ((rightOffset + i) % 8 == 0))) {
       while (i <= length - 8) {
         if (Platform.getLong(leftBase, leftOffset + i) !=

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2276,7 +2276,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Clean a closure to make it ready to serialized and send to tasks
+   * Clean a closure to make it ready to be serialized and send to tasks
    * (removes unreferenced variables in $outer's, updates REPL variables)
    * If <tt>checkSerializable</tt> is set, <tt>clean</tt> will also proactively
    * check to see if <tt>f</tt> is serializable and throw a <tt>SparkException</tt>

diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -61,7 +61,7 @@ private[spark] class ExecutorSummaryWrapper(val info: ExecutorSummary) {
 
 /**
  * Keep track of the existing stages when the job was submitted, and those that were
- * completed during the job's execution. This allows a more accurate acounting of how
+ * completed during the job's execution. This allows a more accurate accounting of how
  * many tasks were skipped for the job.
  */
 private[spark] class JobDataWrapper(

diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -356,7 +356,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
     generatedFiles
   }
 
-  /** Delete all the generated rolledover files */
+  /** Delete all the generated rolled over files */
   def cleanup() {
     testFile.getParentFile.listFiles.filter { file =>
       file.getName.startsWith(testFile.getName)

diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
@@ -43,7 +43,7 @@
 # "notification overload" when running for the first time.
 MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
 
-# File used as an opitimization to store maximum previously seen PR
+# File used as an optimization to store maximum previously seen PR
 # Used mostly because accessing ASF JIRA is slow, so we want to avoid checking
 # the state of JIRA's that are tied to PR's we've already looked at.
 MAX_FILE = ".github-jira-max"

diff --git a/dev/lint-python b/dev/lint-python
@@ -19,7 +19,7 @@
 
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
-# Exclude auto-geneated configuration file.
+# Exclude auto-generated configuration file.
 PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" )"
 PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
 PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"

diff --git a/examples/src/main/python/ml/linearsvc.py b/examples/src/main/python/ml/linearsvc.py
@@ -37,7 +37,7 @@
     # Fit the model
     lsvcModel = lsvc.fit(training)
 
-    # Print the coefficients and intercept for linearsSVC
+    # Print the coefficients and intercept for linear SVC
     print("Coefficients: " + str(lsvcModel.coefficients))
     print("Intercept: " + str(lsvcModel.intercept))
 

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -52,7 +52,7 @@ private[kafka010] case class KafkaSourceRDDPartition(
  * An RDD that reads data from Kafka based on offset ranges across multiple partitions.
  * Additionally, it allows preferred locations to be set for each topic + partition, so that
  * the [[KafkaSource]] can ensure the same executor always reads the same topic + partition
- * and cached KafkaConsuemrs (see [[CachedKafkaConsumer]] can be used read data efficiently.
+ * and cached KafkaConsumers (see [[CachedKafkaConsumer]] can be used read data efficiently.
  *
  * @param sc the [[SparkContext]]
  * @param executorKafkaParams Kafka configuration for creating KafkaConsumer on the executors

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -201,7 +201,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     verifyTopicDeletionWithRetries(zkUtils, topic, partitions, List(this.server))
   }
 
-  /** Add new paritions to a Kafka topic */
+  /** Add new partitions to a Kafka topic */
   def addPartitions(topic: String, partitions: Int): Unit = {
     AdminUtils.addPartitions(zkUtils, topic, partitions)
     // wait until metadata is propagated

diff --git a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java b/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
@@ -111,7 +111,7 @@ public String call(ConsumerRecord<String, String> r) {
         LocationStrategies.PreferConsistent()
     ).map(handler);
 
-    // just making sure the java user apis work; the scala tests handle logic corner cases
+    // just making sure the java user APIs work; the scala tests handle logic corner cases
     long count1 = rdd1.count();
     long count2 = rdd2.count();
     Assert.assertTrue(count1 > 0);

diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -164,7 +164,7 @@ object KinesisUtils {
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
    * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
    *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple princples assume
+   * @param stsSessionName Name to uniquely identify STS sessions if multiple principals assume
    *                       the same role.
    * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
    *                      trust policy.
@@ -434,7 +434,7 @@ object KinesisUtils {
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
    * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
    *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple princples assume
+   * @param stsSessionName Name to uniquely identify STS sessions if multiple princpals assume
    *                       the same role.
    * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
    *                      trust policy.

diff --git a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
@@ -71,7 +71,7 @@ void setChildProc(Process childProc, String loggerName, InputStream logStream) {
   }
 
   /**
-   * Wait for the child process to exit and update the handle's state if necessary, accoding to
+   * Wait for the child process to exit and update the handle's state if necessary, according to
    * the exit code.
    */
   void monitorChild() {

diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -94,7 +94,7 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
   def setSeed(value: Long): this.type = set(seed, value)
 
   /**
-   * Set the mamixum level of parallelism to evaluate models in parallel.
+   * Set the maximum level of parallelism to evaluate models in parallel.
    * Default is 1 for serial evaluation
    *
    * @group expertSetParam

diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py
@@ -212,7 +212,7 @@ def readImages(self, path, recursive=False, numPartitions=-1,
 ImageSchema = _ImageSchema()
 
 
-# Monkey patch to disallow instantization of this class.
+# Monkey patch to disallow instantiation of this class.
 def _disallow_instance(_):
     raise RuntimeError("Creating instance of _ImageSchema class is disallowed.")
 _ImageSchema.__init__ = _disallow_instance
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -95,7 +95,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
         "spark.executor.cores" -> "1",
         "spark.executor.memory" -> "512m",
         "spark.executor.instances" -> "2",
-        // Sending some senstive information, which we'll make sure gets redacted
+        // Sending some sensitive information, which we'll make sure gets redacted
         "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
         "spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
       ))

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -294,7 +294,7 @@ public void setNullAt(int ordinal) {
     assertIndexIsValid(ordinal);
     BitSetMethods.set(baseObject, baseOffset + 8, ordinal);
 
-    /* we assume the corrresponding column was already 0 or
+    /* we assume the corresponding column was already 0 or
        will be set to 0 later by the caller side */
   }
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.internal.SQLConf
  *         view resolution, in this way, we are able to get the correct view column ordering and
  *         omit the extra columns that we don't require);
  *    1.2. Else set the child output attributes to `queryOutput`.
- * 2. Map the `queryQutput` to view output by index, if the corresponding attributes don't match,
+ * 2. Map the `queryOutput` to view output by index, if the corresponding attributes don't match,
  *    try to up cast and alias the attribute in `queryOutput` to the attribute in the view output.
  * 3. Add a Project over the child, with the new output generated by the previous steps.
  * If the view output doesn't have the same number of columns neither with the child output, nor

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -51,7 +51,7 @@ trait InvokeLike extends Expression with NonSQLExpression {
    *
    * - generate codes for argument.
    * - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments.
-   * - avoid some of nullabilty checking which are not needed because the expression is not
+   * - avoid some of nullability checking which are not needed because the expression is not
    *   nullable.
    * - when needNullCheck == true, short circuit if we found one of arguments is null because
    *   preparing rest of arguments can be skipped in the case.
@@ -193,7 +193,8 @@ case class StaticInvoke(
  * @param targetObject An expression that will return the object to call the method on.
  * @param functionName The name of the method to call.
  * @param dataType The expected return type of the function.
- * @param arguments An optional list of expressions, whos evaluation will be passed to the function.
+ * @param arguments An optional list of expressions, whose evaluation will be passed to the
+  *                 function.
  * @param propagateNull When true, and any of the arguments is null, null will be returned instead
  *                      of calling the function.
  * @param returnNullable When false, indicating the invoked method will always return

diff --git a/...st/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala b/...st/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.sketch.CountMinSketch
 
 /**
- * Unit test suite for the count-min sketch SQL aggregate funciton [[CountMinSketchAgg]].
+ * Unit test suite for the count-min sketch SQL aggregate function [[CountMinSketchAgg]].
  */
 class CountMinSketchAggSuite extends SparkFunSuite {
   private val childExpression = BoundReference(0, IntegerType, nullable = true)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchWriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchWriteSupport.java
@@ -41,7 +41,7 @@ public interface MicroBatchWriteSupport extends BaseStreamingSink {
    * @param queryId A unique string for the writing query. It's possible that there are many writing
    *                queries running at the same time, and the returned {@link DataSourceV2Writer}
    *                can use this id to distinguish itself from others.
-   * @param epochId The uniquenumeric ID of the batch within this writing query. This is an
+   * @param epochId The unique numeric ID of the batch within this writing query. This is an
    *                incrementing counter representing a consistent set of data; the same batch may
    *                be started multiple times in failure recovery scenarios, but it will always
    *                contain the same records.

diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
@@ -32,7 +32,7 @@
   stroke-width: 1px;
 }
 
-/* Hightlight the SparkPlan node name */
+/* Highlight the SparkPlan node name */
 #plan-viz-graph svg text :first-child {
   font-weight: bold;
 }

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -77,7 +77,7 @@ trait FileFormat {
   }
 
   /**
-   * Returns whether a file with `path` could be splitted or not.
+   * Returns whether a file with `path` could be split or not.
    */
   def isSplitable(
       sparkSession: SparkSession,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -150,7 +150,7 @@ private[csv] object CSVInferSchema {
     if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
       TimestampType
     } else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
-      // We keep this for backwords competibility.
+      // We keep this for backwards compatibility.
       TimestampType
     } else {
       tryParseBoolean(field, options)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -368,7 +368,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   // The minimum key
   private var minKey = Long.MaxValue
 
-  // The maxinum key
+  // The maximum key
   private var maxKey = Long.MinValue
 
   // The array to store the key and offset of UnsafeRow in the page.

diff --git a/...ain/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/...ain/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -203,7 +203,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
   /**
    * A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks'
    * preferred location is based on which executors have the required join state stores already
-   * loaded. This is class is a modified verion of [[ZippedPartitionsRDD2]].
+   * loaded. This is class is a modified version of [[ZippedPartitionsRDD2]].
    */
   class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
       sc: SparkContext,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -175,7 +175,7 @@ class SQLAppStatusListener(
 
     // Check the execution again for whether the aggregated metrics data has been calculated.
     // This can happen if the UI is requesting this data, and the onExecutionEnd handler is
-    // running at the same time. The metrics calculcated for the UI can be innacurate in that
+    // running at the same time. The metrics calculated for the UI can be innacurate in that
     // case, since the onExecutionEnd handler will clean up tracked stage metrics.
     if (exec.metricsValues != null) {
       exec.metricsValues

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -86,7 +86,7 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
   def bufferEncoder: Encoder[BUF]
 
   /**
-   * Specifies the `Encoder` for the final ouput value type.
+   * Specifies the `Encoder` for the final output value type.
    * @since 2.0.0
    */
   def outputEncoder: Encoder[OUT]

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -152,7 +152,7 @@ class StreamingQueryProgress private[sql](
  * @param endOffset              The ending offset for data being read.
  * @param numInputRows           The number of records read from this source.
  * @param inputRowsPerSecond     The rate at which data is arriving from this source.
- * @param processedRowsPerSecond The rate at which data from this source is being procressed by
+ * @param processedRowsPerSecond The rate at which data from this source is being processed by
  *                               Spark.
  * @since 2.1.0
  */

diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
@@ -47,7 +47,7 @@ public MyDoubleAvg() {
     _inputDataType = DataTypes.createStructType(inputFields);
 
     // The buffer has two values, bufferSum for storing the current sum and
-    // bufferCount for storing the number of non-null input values that have been contribuetd
+    // bufferCount for storing the number of non-null input values that have been contributed
     // to the current sum.
     List<StructField> bufferFields = new ArrayList<>();
     bufferFields.add(DataTypes.createStructField("bufferSum", DataTypes.DoubleType, true));

diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql
@@ -32,7 +32,7 @@ SELECT 1.1 - '2.2' FROM t;
 SELECT 1.1 * '2.2' FROM t;
 SELECT 4.4 / '2.2' FROM t;
 
--- concatentation
+-- concatenation
 SELECT '$' || cast(1 as smallint) || '$' FROM t;
 SELECT '$' || 1 || '$' FROM t;
 SELECT '$' || cast(1 as bigint) || '$' FROM t;

diff --git a/...c/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/...c/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -92,7 +92,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
 
   test("deriveCompactInterval") {
     // latestCompactBatchId(4) + 1 <= default(5)
-    // then use latestestCompactBatchId + 1 === 5
+    // then use latestCompactBatchId + 1 === 5
     assert(5 === deriveCompactInterval(5, 4))
     // First divisor of 10 greater than 4 === 5
     assert(5 === deriveCompactInterval(4, 9))