Skip to content

Commit da5958d

Browse files
authored
Switch to Spark 4 preview 2 (#136)
* Use scala 2.13.13 & Spark 4 snapshot & spark testing base snapshot. Add local maven resolver for snapshots Add an upsert example Update sbt version and plugins Update for Spark 4 / Scala 2.13 * Drop 11 from the build matrix * We need JDK17
1 parent c1adb45 commit da5958d

File tree

5 files changed

+30
-13
lines changed

5 files changed

+30
-13
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ jobs:
99
matrix:
1010
include:
1111
- java: 17
12-
- java: 11
1312
runs-on: ubuntu-latest
1413
steps:
1514
- name: Checkout
@@ -179,6 +178,12 @@ jobs:
179178
path: |
180179
data/fetched/*
181180
key: data-fetched
181+
- name: Setup JDK
182+
uses: actions/setup-java@v3
183+
with:
184+
distribution: temurin
185+
java-version: 17
186+
cache: sbt
182187
- name: Run PySpark examples
183188
run:
184189
./run_pyspark_examples.sh

build.sbt

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
scalaVersion := "2.13.8"
2-
31
lazy val root = (project in file("."))
42
.aggregate(core, native)
53

@@ -16,6 +14,7 @@ organization := "com.highperformancespark"
1614

1715
lazy val V = _root_.scalafix.sbt.BuildInfo
1816

17+
scalaVersion := "2.13.13"
1918
addCompilerPlugin(scalafixSemanticdb)
2019
scalacOptions ++= List(
2120
"-Yrangepos",
@@ -38,7 +37,8 @@ resolvers ++= Seq(
3837
"Typesafe repository" at "https://repo.typesafe.com/typesafe/releases/",
3938
"Second Typesafe repo" at "https://repo.typesafe.com/typesafe/maven-releases/",
4039
"Mesosphere Public Repository" at "https://downloads.mesosphere.io/maven",
41-
Resolver.sonatypeRepo("public")
40+
Resolver.sonatypeRepo("public"),
41+
Resolver.mavenLocal
4242
)
4343

4444
licenses := Seq("Apache License 2.0" -> url("http://www.apache.org/licenses/LICENSE-2.0.html"))
@@ -67,17 +67,18 @@ val sparkTestingVersion = settingKey[String]("Spark testing base version without
6767
lazy val core = (project in file("core")) // regular scala code with @native methods
6868
.dependsOn(native % Runtime)
6969
.settings(javah / target := (native / nativeCompile / sourceDirectory).value / "include")
70+
.settings(scalaVersion := "2.13.13")
7071
.settings(sbtJniCoreScope := Compile)
7172
.settings(
7273
scalaVersion := "2.13.8",
73-
javacOptions ++= Seq("-source", "1.8", "-target", "1.8"),
74+
javacOptions ++= Seq("-source", "17", "-target", "17"),
7475
parallelExecution in Test := false,
7576
fork := true,
7677
javaOptions ++= Seq("-Xms4048M", "-Xmx4048M", "-Djna.nosys=true"),
7778
Test / javaOptions ++= specialOptions,
7879
// 2.4.5 is the highest version we have with the old spark-testing-base deps
79-
sparkVersion := System.getProperty("sparkVersion", "3.5.1"),
80-
sparkTestingVersion := "1.5.2",
80+
sparkVersion := System.getProperty("sparkVersion", "4.0.0-preview2"),
81+
sparkTestingVersion := "2.0.1",
8182
// additional libraries
8283
libraryDependencies ++= Seq(
8384
"org.apache.spark" %% "spark-core" % sparkVersion.value % Provided,
@@ -95,12 +96,13 @@ lazy val core = (project in file("core")) // regular scala code with @native met
9596
"net.java.dev.jna" % "jna" % "5.12.1"),
9697
scalacOptions ++= Seq("-deprecation", "-unchecked"),
9798
pomIncludeRepository := { x => false },
99+
resolvers += Resolver.mavenLocal
98100
)
99101

100102
// JNI Magic!
101103
lazy val native = (project in file("native")) // native code and build script
102104
.settings(nativeCompile / sourceDirectory := sourceDirectory.value)
103-
.settings(scalaVersion := "2.13.8")
105+
.settings(scalaVersion := "2.13.13")
104106
.enablePlugins(JniNative) // JniNative needs to be explicitly enabled
105107

106108
//tag::xmlVersionConflict[]

core/src/main/scala/com/high-performance-spark-examples/dataframe/LoadSave.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,16 @@ case class LoadSave(sc: SparkContext, session: SparkSession) {
8888
}
8989
//end::saveAppend[]
9090

91+
def upsertPandas(input: DataFrame): Unit = {
92+
//tag::upsert[]
93+
input.mergeInto("pandaInfo", $"source.id" === $"target.id")
94+
.whenMatched() // Note you can override the general match condition above if desired
95+
.updateAll()
96+
.whenNotMatched()
97+
.insertAll()
98+
//end::upsert[]
99+
}
100+
91101
def createJDBC() = {
92102
session.read.jdbc("jdbc:dialect:serverName;user=user;password=pass",
93103
"table", new Properties)

core/src/main/scala/com/high-performance-spark-examples/ml/SimpleNaiveBayes.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SimpleNaiveBayes(val uid: String)
3838
// Note this estimator assumes they start at 0 and go to numClasses
3939
val numClasses = getNumClasses(ds)
4040
// Get the number of features by peaking at the first row
41-
val numFeatures: Integer = ds.select(col($(featuresCol))).head
41+
val numFeatures: Integer = ds.select(col($(featuresCol))).head()
4242
.get(0).asInstanceOf[Vector].size
4343
// Determine the number of records for each class
4444
val groupedByLabel = ds.select(col($(labelCol)).as[Double]).groupByKey(x => x)

project/plugins.sbt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@ resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositori
55
resolvers += "sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/"
66

77

8-
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1")
8+
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.2")
99

1010
addDependencyTreePlugin
1111

1212
//tag::scalaFix[]
13-
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.10.4")
13+
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.12.1")
1414
//end::scalaFix[]
1515

1616
//tag::sbtJNIPlugin[]
17-
addSbtPlugin("com.github.sbt" %% "sbt-jni" % "1.5.4")
17+
addSbtPlugin("com.github.sbt" %% "sbt-jni" % "1.7.0")
1818
//end::sbtJNIPlugin[]
1919

2020
//tag::xmlVersionConflict[]
@@ -24,4 +24,4 @@ ThisBuild / libraryDependencySchemes ++= Seq(
2424
)
2525
//end::xmlVersionConflict[]
2626

27-
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.3")
27+
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0")

0 commit comments

Comments
 (0)