Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
100 commits
Select commit Hold shift + click to select a range
86db9b2
[SPARK-22833][IMPROVEMENT] in SparkHive Scala Examples
chetkhatri Dec 23, 2017
ea2642e
[SPARK-20694][EXAMPLES] Update SQLDataSourceExample.scala
CNRui Dec 23, 2017
f6084a8
[HOTFIX] Fix Scala style checks
HyukjinKwon Dec 23, 2017
aeb45df
[SPARK-22844][R] Adds date_trunc in R API
HyukjinKwon Dec 23, 2017
1219d7a
[SPARK-22889][SPARKR] Set overwrite=T when install SparkR in tests
shivaram Dec 23, 2017
0bf1a74
[SPARK-22465][CORE] Add a safety-check to RDD defaultPartitioner
Dec 24, 2017
fba0313
[SPARK-22707][ML] Optimize CrossValidator memory occupation by models…
WeichenXu123 Dec 25, 2017
33ae243
[SPARK-22893][SQL] Unified the data type mismatch message
wangyum Dec 25, 2017
12d20dd
[SPARK-22874][PYSPARK][SQL][FOLLOW-UP] Modify error messages to show …
ueshin Dec 25, 2017
be03d3a
[SPARK-22893][SQL][HOTFIX] Fix a error message of VersionsSuite
dongjoon-hyun Dec 26, 2017
0e68330
[SPARK-20168][DSTREAM] Add changes to use kinesis fetches from specif…
yashs360 Dec 26, 2017
eb386be
[SPARK-21552][SQL] Add DecimalType support to ArrowWriter.
ueshin Dec 26, 2017
ff48b1b
[SPARK-22901][PYTHON] Add deterministic flag to pyspark UDF
mgaido91 Dec 26, 2017
9348e68
[SPARK-22833][EXAMPLE] Improvement SparkHive Scala Examples
cloud-fan Dec 26, 2017
91d1b30
[SPARK-22894][SQL] DateTimeOperations should accept SQL like string type
wangyum Dec 26, 2017
6674acd
[SPARK-22846][SQL] Fix table owner is null when creating table throug…
Dec 27, 2017
b8bfce5
[SPARK-22324][SQL][PYTHON][FOLLOW-UP] Update setup.py file.
ueshin Dec 27, 2017
774715d
[SPARK-22904][SQL] Add tests for decimal operations and string casts
mgaido91 Dec 27, 2017
753793b
[SPARK-22899][ML][STREAMING] Fix OneVsRestModel transform on streamin…
WeichenXu123 Dec 28, 2017
5683984
[SPARK-18016][SQL][FOLLOW-UP] Code Generation: Constant Pool Limit - …
kiszk Dec 28, 2017
32ec269
[SPARK-22909][SS] Move Structured Streaming v2 APIs to streaming folder
zsxwing Dec 28, 2017
171f6dd
[SPARK-22757][KUBERNETES] Enable use of remote dependencies (http, s3…
liyinan926 Dec 28, 2017
ded6d27
[SPARK-22648][K8S] Add documentation covering init containers and sec…
liyinan926 Dec 28, 2017
76e8a1d
[SPARK-22843][R] Adds localCheckpoint in R
HyukjinKwon Dec 28, 2017
1eebfbe
[SPARK-21208][R] Adds setLocalProperty and getLocalProperty in R
HyukjinKwon Dec 28, 2017
755f2f5
[SPARK-20392][SQL][FOLLOWUP] should not add extra AnalysisBarrier
cloud-fan Dec 28, 2017
2877817
[SPARK-22917][SQL] Should not try to generate histogram for empty/nul…
Dec 28, 2017
5536f31
[MINOR][BUILD] Fix Java linter errors
dongjoon-hyun Dec 28, 2017
8f6d573
[SPARK-22875][BUILD] Assembly build fails for a high user id
gerashegalov Dec 28, 2017
9c21ece
[SPARK-22836][UI] Show driver logs in UI when available.
Dec 28, 2017
613b71a
[SPARK-22890][TEST] Basic tests for DateTimeOperations
wangyum Dec 28, 2017
cfcd746
[SPARK-11035][CORE] Add in-process Spark app launcher.
Dec 28, 2017
ffe6fd7
[SPARK-22818][SQL] csv escape of quote escape
Dec 28, 2017
c745730
[SPARK-22905][MLLIB] Fix ChiSqSelectorModel save implementation
WeichenXu123 Dec 29, 2017
796e48c
[SPARK-22313][PYTHON][FOLLOWUP] Explicitly import warnings namespace …
HyukjinKwon Dec 29, 2017
67ea11e
[SPARK-22891][SQL] Make hive client creation thread safe
Dec 29, 2017
d4f0b1d
[SPARK-22834][SQL] Make insertion commands have real children to fix …
gengliangwang Dec 29, 2017
224375c
[SPARK-22892][SQL] Simplify some estimation logic by using double ins…
Dec 29, 2017
cc30ef8
[SPARK-22916][SQL] shouldn't bias towards build right if user does no…
Dec 29, 2017
fcf66a3
[SPARK-21657][SQL] optimize explode quadratic memory consumpation
uzadude Dec 29, 2017
dbd492b
[SPARK-22921][PROJECT-INFRA] Choices for Assigning Jira on Merge
squito Dec 29, 2017
11a849b
[SPARK-22370][SQL][PYSPARK][FOLLOW-UP] Fix a test failure when xmlrun…
ueshin Dec 29, 2017
8b49704
[SPARK-20654][CORE] Add config to limit disk usage of the history ser…
Dec 29, 2017
4e9e6ae
[SPARK-22864][CORE] Disable allocation schedule in ExecutorAllocation…
Dec 29, 2017
afc3641
[SPARK-22905][ML][FOLLOWUP] Fix GaussianMixtureModel save
zhengruifeng Dec 29, 2017
66a7d6b
[SPARK-22920][SPARKR] sql functions for current_date, current_timesta…
felixcheung Dec 29, 2017
ccda75b
[SPARK-22921][PROJECT-INFRA] Bug fix in jira assigning
squito Dec 29, 2017
30fcdc0
[SPARK-22922][ML][PYSPARK] Pyspark portion of the fit-multiple API
MrBago Dec 30, 2017
8169630
[SPARK-22734][ML][PYSPARK] Added Python API for VectorSizeHint.
MrBago Dec 30, 2017
2ea17af
[SPARK-22881][ML][TEST] ML regression package testsuite add Structure…
WeichenXu123 Dec 30, 2017
f2b3525
[SPARK-22771][SQL] Concatenate binary inputs into a binary output
maropu Dec 30, 2017
14c4a62
[SPARK-21475][Core]Revert "[SPARK-21475][CORE] Use NIO's Files API to…
zsxwing Dec 30, 2017
234d943
[TEST][MINOR] remove redundant `EliminateSubqueryAliases` in test code
wzhfy Dec 30, 2017
fd7d141
[SPARK-22919] Bump httpclient versions
Dec 30, 2017
ea0a5ee
[SPARK-22924][SPARKR] R API for sortWithinPartitions
felixcheung Dec 30, 2017
ee3af15
[SPARK-22363][SQL][TEST] Add unit test for Window spilling
gaborgsomogyi Dec 31, 2017
cfbe11e
[SPARK-22895][SQL] Push down the deterministic predicates that are af…
gatorsmile Dec 31, 2017
3d8837e
[SPARK-22397][ML] add multiple columns support to QuantileDiscretizer
huaxingao Dec 31, 2017
028ee40
[SPARK-22801][ML][PYSPARK] Allow FeatureHasher to treat numeric colum…
Dec 31, 2017
5955a2d
[MINOR][DOCS] s/It take/It takes/g
jkremser Dec 31, 2017
994065d
[SPARK-13030][ML] Create OneHotEncoderEstimator for OneHotEncoder as …
viirya Dec 31, 2017
f5b7714
[BUILD] Close stale PRs
srowen Jan 1, 2018
7a702d8
[SPARK-21616][SPARKR][DOCS] update R migration guide and vignettes
felixcheung Jan 1, 2018
c284c4e
[MINOR] Fix a bunch of typos
srowen Dec 31, 2017
1c9f95c
[SPARK-22530][PYTHON][SQL] Adding Arrow support for ArrayType
BryanCutler Jan 1, 2018
e734a4b
[SPARK-21893][SPARK-22142][TESTS][FOLLOWUP] Enables PySpark tests for…
HyukjinKwon Jan 1, 2018
e0c090f
[SPARK-22932][SQL] Refactor AnalysisContext
gatorsmile Jan 2, 2018
a6fc300
[SPARK-22897][CORE] Expose stageAttemptId in TaskContext
advancedxy Jan 2, 2018
247a089
[SPARK-22938] Assert that SQLConf.get is accessed only on the driver.
juliuszsompolski Jan 3, 2018
1a87a16
[SPARK-22934][SQL] Make optional clauses order insensitive for CREATE…
gatorsmile Jan 3, 2018
a66fe36
[SPARK-20236][SQL] dynamic partition overwrite
cloud-fan Jan 3, 2018
9a2b65a
[SPARK-22896] Improvement in String interpolation
chetkhatri Jan 3, 2018
b297029
[SPARK-20960][SQL] make ColumnVector public
cloud-fan Jan 3, 2018
7d045c5
[SPARK-22944][SQL] improve FoldablePropagation
cloud-fan Jan 4, 2018
df95a90
[SPARK-22933][SPARKR] R Structured Streaming API for withWatermark, t…
felixcheung Jan 4, 2018
9fa703e
[SPARK-22950][SQL] Handle ChildFirstURLClassLoader's parent
yaooqinn Jan 4, 2018
d5861ab
[SPARK-22945][SQL] add java UDF APIs in the functions object
cloud-fan Jan 4, 2018
5aadbc9
[SPARK-22939][PYSPARK] Support Spark UDF in registerFunction
gatorsmile Jan 4, 2018
6f68316
[SPARK-22771][SQL] Add a missing return statement in Concat.checkInpu…
maropu Jan 4, 2018
93f92c0
[SPARK-21475][CORE][2ND ATTEMPT] Change to use NIO's Files API for ex…
jerryshao Jan 4, 2018
d2cddc8
[SPARK-22850][CORE] Ensure queued events are delivered to all event q…
Jan 4, 2018
95f9659
[SPARK-22948][K8S] Move SparkPodInitContainer to correct package.
Jan 4, 2018
e288fc8
[SPARK-22953][K8S] Avoids adding duplicated secret volumes when init-…
liyinan926 Jan 4, 2018
0428368
[SPARK-22960][K8S] Make build-push-docker-images.sh more dev-friendly.
Jan 5, 2018
df7fc3e
[SPARK-22957] ApproxQuantile breaks if the number of rows exceeds MaxInt
juliuszsompolski Jan 5, 2018
52fc5c1
[SPARK-22825][SQL] Fix incorrect results of Casting Array to String
maropu Jan 5, 2018
cf0aa65
[SPARK-22949][ML] Apply CrossValidator approach to Driver/Distributed…
MrBago Jan 5, 2018
6cff7d1
[SPARK-22757][K8S] Enable spark.jars and spark.files in KUBERNETES mode
liyinan926 Jan 5, 2018
51c33bd
[SPARK-22961][REGRESSION] Constant columns should generate QueryPlanC…
adrian-ionescu Jan 5, 2018
c0b7424
[SPARK-22940][SQL] HiveExternalCatalogVersionsSuite should succeed on…
bersprockets Jan 5, 2018
930b90a
[SPARK-13030][ML] Follow-up cleanups for OneHotEncoderEstimator
jkbradley Jan 5, 2018
ea95683
[SPARK-22914][DEPLOY] Register history.ui.port
gerashegalov Jan 6, 2018
e8af7e8
[SPARK-22937][SQL] SQL elt output binary for binary inputs
maropu Jan 6, 2018
bf65cd3
[SPARK-22960][K8S] Revert use of ARG base_image in images
liyinan926 Jan 6, 2018
f2dd8b9
[SPARK-22930][PYTHON][SQL] Improve the description of Vectorized UDFs…
icexelloss Jan 6, 2018
be9a804
[SPARK-22793][SQL] Memory leak in Spark Thrift Server
Jan 6, 2018
7b78041
[SPARK-21786][SQL] When acquiring 'compressionCodecClassName' in 'Par…
fjh100456 Jan 6, 2018
993f215
[SPARK-22901][PYTHON][FOLLOWUP] Adds the doc for asNondeterministic f…
HyukjinKwon Jan 6, 2018
9a7048b
[HOTFIX] Fix style checking failure
gatorsmile Jan 6, 2018
18e9414
[SPARK-22973][SQL] Fix incorrect results of Casting Map to String
maropu Jan 7, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[MINOR] Fix a bunch of typos
  • Loading branch information
srowen authored and HyukjinKwon committed Jan 1, 2018
commit c284c4e1f6f684ca8db1cc446fdcc43b46e3413c
2 changes: 1 addition & 1 deletion bin/find-spark-home
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

FIND_SPARK_HOME_PYTHON_SCRIPT="$(cd "$(dirname "$0")"; pwd)/find_spark_home.py"

# Short cirtuit if the user already has this set.
# Short circuit if the user already has this set.
if [ ! -z "${SPARK_HOME}" ]; then
exit 0
elif [ ! -f "$FIND_SPARK_HOME_PYTHON_SCRIPT" ]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class LevelDBIterator<T> implements KVStoreIterator<T> {
end = index.start(parent, params.last);
}
if (it.hasNext()) {
// When descending, the caller may have set up the start of iteration at a non-existant
// When descending, the caller may have set up the start of iteration at a non-existent
// entry that is guaranteed to be after the desired entry. For example, if you have a
// compound key (a, b) where b is a, integer, you may seek to the end of the elements that
// have the same "a" value by specifying Integer.MAX_VALUE for "b", and that value may not
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class MessageWithHeader extends AbstractFileRegion {
/**
* When the write buffer size is larger than this limit, I/O will be done in chunks of this size.
* The size should not be too large as it will waste underlying memory copy. e.g. If network
* avaliable buffer is smaller than this limit, the data cannot be sent within one single write
* available buffer is smaller than this limit, the data cannot be sent within one single write
* operation while it still will make memory copy with this size.
*/
private static final int NIO_BUFFER_LIMIT = 256 * 1024;
Expand Down Expand Up @@ -100,7 +100,7 @@ public long transferred() {
* transferTo invocations in order to transfer a single MessageWithHeader to avoid busy waiting.
*
* The contract is that the caller will ensure position is properly set to the total number
* of bytes transferred so far (i.e. value returned by transfered()).
* of bytes transferred so far (i.e. value returned by transferred()).
*/
@Override
public long transferTo(final WritableByteChannel target, final long position) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import org.apache.spark.network.util.NettyUtils;

/**
* Provides SASL-based encription for transport channels. The single method exposed by this
* Provides SASL-based encryption for transport channels. The single method exposed by this
* class installs the needed channel handlers on a connected channel.
*/
class SaslEncryption {
Expand Down Expand Up @@ -166,7 +166,7 @@ static class EncryptedMessage extends AbstractFileRegion {
* This makes assumptions about how netty treats FileRegion instances, because there's no way
* to know beforehand what will be the size of the encrypted message. Namely, it assumes
* that netty will try to transfer data from this message while
* <code>transfered() < count()</code>. So these two methods return, technically, wrong data,
* <code>transferred() < count()</code>. So these two methods return, technically, wrong data,
* but netty doesn't know better.
*/
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
/**
* A customized frame decoder that allows intercepting raw data.
* <p>
* This behaves like Netty's frame decoder (with harcoded parameters that match this library's
* This behaves like Netty's frame decoder (with hard coded parameters that match this library's
* needs), except it allows an interceptor to be installed to read data directly before it's
* framed.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public void jsonSerializationOfExecutorRegistration() throws IOException {
mapper.readValue(shuffleJson, ExecutorShuffleInfo.class);
assertEquals(parsedShuffleInfo, shuffleInfo);

// Intentionally keep these hard-coded strings in here, to check backwards-compatability.
// Intentionally keep these hard-coded strings in here, to check backwards-compatibility.
// its not legacy yet, but keeping this here in case anybody changes it
String legacyAppIdJson = "{\"appId\":\"foo\", \"execId\":\"bar\"}";
assertEquals(appId, mapper.readValue(legacyAppIdJson, AppExecId.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* <li>{@link String}</li>
* </ul>
* The false positive probability ({@code FPP}) of a Bloom filter is defined as the probability that
* {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that hasu
* {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that has
* not actually been put in the {@code BloomFilter}.
*
* The implementation is largely based on the {@code BloomFilter} class from Guava.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public static boolean arrayEquals(
i += 1;
}
}
// for architectures that suport unaligned accesses, chew it up 8 bytes at a time
// for architectures that support unaligned accesses, chew it up 8 bytes at a time
if (unaligned || (((leftOffset + i) % 8 == 0) && ((rightOffset + i) % 8 == 0))) {
while (i <= length - 8) {
if (Platform.getLong(leftBase, leftOffset + i) !=
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2276,7 +2276,7 @@ class SparkContext(config: SparkConf) extends Logging {
}

/**
* Clean a closure to make it ready to serialized and send to tasks
* Clean a closure to make it ready to be serialized and send to tasks
* (removes unreferenced variables in $outer's, updates REPL variables)
* If <tt>checkSerializable</tt> is set, <tt>clean</tt> will also proactively
* check to see if <tt>f</tt> is serializable and throw a <tt>SparkException</tt>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ private[spark] class ExecutorSummaryWrapper(val info: ExecutorSummary) {

/**
* Keep track of the existing stages when the job was submitted, and those that were
* completed during the job's execution. This allows a more accurate acounting of how
* completed during the job's execution. This allows a more accurate accounting of how
* many tasks were skipped for the job.
*/
private[spark] class JobDataWrapper(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
generatedFiles
}

/** Delete all the generated rolledover files */
/** Delete all the generated rolled over files */
def cleanup() {
testFile.getParentFile.listFiles.filter { file =>
file.getName.startsWith(testFile.getName)
Expand Down
2 changes: 1 addition & 1 deletion dev/github_jira_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# "notification overload" when running for the first time.
MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))

# File used as an opitimization to store maximum previously seen PR
# File used as an optimization to store maximum previously seen PR
# Used mostly because accessing ASF JIRA is slow, so we want to avoid checking
# the state of JIRA's that are tied to PR's we've already looked at.
MAX_FILE = ".github-jira-max"
Expand Down
2 changes: 1 addition & 1 deletion dev/lint-python
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
# Exclude auto-geneated configuration file.
# Exclude auto-generated configuration file.
PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" )"
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
Expand Down
2 changes: 1 addition & 1 deletion examples/src/main/python/ml/linearsvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# Fit the model
lsvcModel = lsvc.fit(training)

# Print the coefficients and intercept for linearsSVC
# Print the coefficients and intercept for linear SVC
print("Coefficients: " + str(lsvcModel.coefficients))
print("Intercept: " + str(lsvcModel.intercept))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ private[kafka010] case class KafkaSourceRDDPartition(
* An RDD that reads data from Kafka based on offset ranges across multiple partitions.
* Additionally, it allows preferred locations to be set for each topic + partition, so that
* the [[KafkaSource]] can ensure the same executor always reads the same topic + partition
* and cached KafkaConsuemrs (see [[CachedKafkaConsumer]] can be used read data efficiently.
* and cached KafkaConsumers (see [[CachedKafkaConsumer]] can be used read data efficiently.
*
* @param sc the [[SparkContext]]
* @param executorKafkaParams Kafka configuration for creating KafkaConsumer on the executors
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
verifyTopicDeletionWithRetries(zkUtils, topic, partitions, List(this.server))
}

/** Add new paritions to a Kafka topic */
/** Add new partitions to a Kafka topic */
def addPartitions(topic: String, partitions: Int): Unit = {
AdminUtils.addPartitions(zkUtils, topic, partitions)
// wait until metadata is propagated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public String call(ConsumerRecord<String, String> r) {
LocationStrategies.PreferConsistent()
).map(handler);

// just making sure the java user apis work; the scala tests handle logic corner cases
// just making sure the java user APIs work; the scala tests handle logic corner cases
long count1 = rdd1.count();
long count2 = rdd2.count();
Assert.assertTrue(count1 > 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ object KinesisUtils {
* @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
* @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
* Kinesis stream.
* @param stsSessionName Name to uniquely identify STS sessions if multiple princples assume
* @param stsSessionName Name to uniquely identify STS sessions if multiple principals assume
* the same role.
* @param stsExternalId External ID that can be used to validate against the assumed IAM role's
* trust policy.
Expand Down Expand Up @@ -434,7 +434,7 @@ object KinesisUtils {
* @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
* @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
* Kinesis stream.
* @param stsSessionName Name to uniquely identify STS sessions if multiple princples assume
* @param stsSessionName Name to uniquely identify STS sessions if multiple princpals assume
* the same role.
* @param stsExternalId External ID that can be used to validate against the assumed IAM role's
* trust policy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void setChildProc(Process childProc, String loggerName, InputStream logStream) {
}

/**
* Wait for the child process to exit and update the handle's state if necessary, accoding to
* Wait for the child process to exit and update the handle's state if necessary, according to
* the exit code.
*/
void monitorChild() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
def setSeed(value: Long): this.type = set(seed, value)

/**
* Set the mamixum level of parallelism to evaluate models in parallel.
* Set the maximum level of parallelism to evaluate models in parallel.
* Default is 1 for serial evaluation
*
* @group expertSetParam
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/ml/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def readImages(self, path, recursive=False, numPartitions=-1,
ImageSchema = _ImageSchema()


# Monkey patch to disallow instantization of this class.
# Monkey patch to disallow instantiation of this class.
def _disallow_instance(_):
raise RuntimeError("Creating instance of _ImageSchema class is disallowed.")
_ImageSchema.__init__ = _disallow_instance
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
"spark.executor.cores" -> "1",
"spark.executor.memory" -> "512m",
"spark.executor.instances" -> "2",
// Sending some senstive information, which we'll make sure gets redacted
// Sending some sensitive information, which we'll make sure gets redacted
"spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
"spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ public void setNullAt(int ordinal) {
assertIndexIsValid(ordinal);
BitSetMethods.set(baseObject, baseOffset + 8, ordinal);

/* we assume the corrresponding column was already 0 or
/* we assume the corresponding column was already 0 or
will be set to 0 later by the caller side */
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import org.apache.spark.sql.internal.SQLConf
* view resolution, in this way, we are able to get the correct view column ordering and
* omit the extra columns that we don't require);
* 1.2. Else set the child output attributes to `queryOutput`.
* 2. Map the `queryQutput` to view output by index, if the corresponding attributes don't match,
* 2. Map the `queryOutput` to view output by index, if the corresponding attributes don't match,
* try to up cast and alias the attribute in `queryOutput` to the attribute in the view output.
* 3. Add a Project over the child, with the new output generated by the previous steps.
* If the view output doesn't have the same number of columns neither with the child output, nor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ trait InvokeLike extends Expression with NonSQLExpression {
*
* - generate codes for argument.
* - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments.
* - avoid some of nullabilty checking which are not needed because the expression is not
* - avoid some of nullability checking which are not needed because the expression is not
* nullable.
* - when needNullCheck == true, short circuit if we found one of arguments is null because
* preparing rest of arguments can be skipped in the case.
Expand Down Expand Up @@ -193,7 +193,8 @@ case class StaticInvoke(
* @param targetObject An expression that will return the object to call the method on.
* @param functionName The name of the method to call.
* @param dataType The expected return type of the function.
* @param arguments An optional list of expressions, whos evaluation will be passed to the function.
* @param arguments An optional list of expressions, whose evaluation will be passed to the
* function.
* @param propagateNull When true, and any of the arguments is null, null will be returned instead
* of calling the function.
* @param returnNullable When false, indicating the invoked method will always return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.sketch.CountMinSketch

/**
* Unit test suite for the count-min sketch SQL aggregate funciton [[CountMinSketchAgg]].
* Unit test suite for the count-min sketch SQL aggregate function [[CountMinSketchAgg]].
*/
class CountMinSketchAggSuite extends SparkFunSuite {
private val childExpression = BoundReference(0, IntegerType, nullable = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public interface MicroBatchWriteSupport extends BaseStreamingSink {
* @param queryId A unique string for the writing query. It's possible that there are many writing
* queries running at the same time, and the returned {@link DataSourceV2Writer}
* can use this id to distinguish itself from others.
* @param epochId The uniquenumeric ID of the batch within this writing query. This is an
* @param epochId The unique numeric ID of the batch within this writing query. This is an
* incrementing counter representing a consistent set of data; the same batch may
* be started multiple times in failure recovery scenarios, but it will always
* contain the same records.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
stroke-width: 1px;
}

/* Hightlight the SparkPlan node name */
/* Highlight the SparkPlan node name */
#plan-viz-graph svg text :first-child {
font-weight: bold;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ trait FileFormat {
}

/**
* Returns whether a file with `path` could be splitted or not.
* Returns whether a file with `path` could be split or not.
*/
def isSplitable(
sparkSession: SparkSession,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ private[csv] object CSVInferSchema {
if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
TimestampType
} else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
// We keep this for backwords competibility.
// We keep this for backwards compatibility.
TimestampType
} else {
tryParseBoolean(field, options)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
// The minimum key
private var minKey = Long.MaxValue

// The maxinum key
// The maximum key
private var maxKey = Long.MinValue

// The array to store the key and offset of UnsafeRow in the page.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
/**
* A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks'
* preferred location is based on which executors have the required join state stores already
* loaded. This is class is a modified verion of [[ZippedPartitionsRDD2]].
* loaded. This is class is a modified version of [[ZippedPartitionsRDD2]].
*/
class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
sc: SparkContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class SQLAppStatusListener(

// Check the execution again for whether the aggregated metrics data has been calculated.
// This can happen if the UI is requesting this data, and the onExecutionEnd handler is
// running at the same time. The metrics calculcated for the UI can be innacurate in that
// running at the same time. The metrics calculated for the UI can be innacurate in that
// case, since the onExecutionEnd handler will clean up tracked stage metrics.
if (exec.metricsValues != null) {
exec.metricsValues
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
def bufferEncoder: Encoder[BUF]

/**
* Specifies the `Encoder` for the final ouput value type.
* Specifies the `Encoder` for the final output value type.
* @since 2.0.0
*/
def outputEncoder: Encoder[OUT]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class StreamingQueryProgress private[sql](
* @param endOffset The ending offset for data being read.
* @param numInputRows The number of records read from this source.
* @param inputRowsPerSecond The rate at which data is arriving from this source.
* @param processedRowsPerSecond The rate at which data from this source is being procressed by
* @param processedRowsPerSecond The rate at which data from this source is being processed by
* Spark.
* @since 2.1.0
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public MyDoubleAvg() {
_inputDataType = DataTypes.createStructType(inputFields);

// The buffer has two values, bufferSum for storing the current sum and
// bufferCount for storing the number of non-null input values that have been contribuetd
// bufferCount for storing the number of non-null input values that have been contributed
// to the current sum.
List<StructField> bufferFields = new ArrayList<>();
bufferFields.add(DataTypes.createStructField("bufferSum", DataTypes.DoubleType, true));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ SELECT 1.1 - '2.2' FROM t;
SELECT 1.1 * '2.2' FROM t;
SELECT 4.4 / '2.2' FROM t;

-- concatentation
-- concatenation
SELECT '$' || cast(1 as smallint) || '$' FROM t;
SELECT '$' || 1 || '$' FROM t;
SELECT '$' || cast(1 as bigint) || '$' FROM t;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext

test("deriveCompactInterval") {
// latestCompactBatchId(4) + 1 <= default(5)
// then use latestestCompactBatchId + 1 === 5
// then use latestCompactBatchId + 1 === 5
assert(5 === deriveCompactInterval(5, 4))
// First divisor of 10 greater than 4 === 5
assert(5 === deriveCompactInterval(4, 9))
Expand Down
Loading