-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-33084][CORE][SQL] Add jar support ivy path #29966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
afaf7bd
51daf9a
3579de0
d6e8caf
169e1f8
0e589ec
b3e3211
9161340
0e3c1ec
300ca56
63e877b
733e62c
b60ba1e
883b9d3
208afc2
ba9ea29
10b3737
7f878c2
d2c1950
2200076
5a9cc30
875d8a7
e921245
614a865
8c5cb7c
f460974
1f7dc01
050c410
ff611a6
03aca3b
653b919
6e48275
bdc5035
9c22882
9c88f8d
8220e5a
49ac62c
b69a62e
273a5ac
ebe1c9c
6034fb2
e22e398
afea73f
13000f2
bce3d40
d53f302
57c351d
8c53b83
4048c5b
aa53482
2ffb431
8c18cdf
6bd41cd
fbc236c
90491d5
75ff3ce
4c44dae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,10 +56,10 @@ private[spark] object DependencyUtils extends Logging { | |
| * Parse URI query string's parameter value of `transitive` and `exclude`. | ||
| * Other invalid parameters will be ignored. | ||
| * | ||
| * @param uri Ivy uri need to be downloaded. | ||
| * @param uri Ivy URI need to be downloaded. | ||
| * @return Tuple value of parameter `transitive` and `exclude` value. | ||
| * | ||
| * 1. transitive: whether to download dependency jar of ivy URI, default value is false | ||
| * 1. transitive: whether to download dependency jar of Ivy URI, default value is false | ||
| * and this parameter value is case-sensitive. Invalid value will be treat as false. | ||
| * Example: Input: exclude=org.mortbay.jetty:jetty&transitive=true | ||
| * Output: true | ||
|
|
@@ -77,56 +77,56 @@ private[spark] object DependencyUtils extends Logging { | |
| val mapTokens = uriQuery.split("&").map(_.split("=")) | ||
| if (mapTokens.exists(isInvalidQueryString)) { | ||
| throw new IllegalArgumentException( | ||
| s"Invalid query string in ivy uri ${uri.toString}: $uriQuery") | ||
| s"Invalid query string in Ivy URI ${uri.toString}: $uriQuery") | ||
| } | ||
| val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1) | ||
|
|
||
| // Parse transitive parameters (e.g., transitive=true) in an ivy URL, default value is false | ||
| // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is false | ||
| val transitiveParams = groupedParams.get("transitive") | ||
| if (transitiveParams.map(_.size).getOrElse(0) > 1) { | ||
| logWarning("It's best to specify `transitive` parameter in ivy URL query only once." + | ||
| logWarning("It's best to specify `transitive` parameter in ivy URI query only once." + | ||
| " If there are multiple `transitive` parameter, we will select the last one") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hive has the same behaviour with this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, but we can have this warning
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If so, what's the hive behaviour? it will throw an exception instead selecting the last one?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Select the last one
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, ok. |
||
| } | ||
| val transitive = | ||
| transitiveParams.flatMap(_.takeRight(1).map(_._2 == "true").headOption).getOrElse(false) | ||
AngersZhuuuu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Parse an excluded list (e.g., exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http) | ||
| // in an ivy URL. When download ivy URL jar, Spark won't download transitive jar | ||
| // in an Ivy URI. When download Ivy URI jar, Spark won't download transitive jar | ||
| // in a excluded list. | ||
| val exclusionList = groupedParams.get("exclude").map { params => | ||
| params.map(_._2).flatMap { excludeString => | ||
| val excludes = excludeString.split(",") | ||
| if (excludes.map(_.split(":")).exists(isInvalidQueryString)) { | ||
| throw new IllegalArgumentException( | ||
| s"Invalid exclude string in ivy uri ${uri.toString}:" + | ||
| s" expected 'org:module,org:module,..', found " + excludeString) | ||
| s"Invalid exclude string in Ivy URI ${uri.toString}:" + | ||
| " expected 'org:module,org:module,..', found " + excludeString) | ||
| } | ||
| excludes | ||
| }.mkString(",") | ||
| }.getOrElse("") | ||
|
|
||
| val validParams = Set("transitive", "exclude") | ||
| val invalidParams = groupedParams.keys.filterNot(validParams.contains).toSeq.sorted | ||
| val invalidParams = groupedParams.keys.filterNot(validParams.contains).toSeq | ||
| if (invalidParams.nonEmpty) { | ||
| logWarning(s"Invalid parameters `${invalidParams.mkString(",")}` found " + | ||
| s"in ivy uri query `$uriQuery`.") | ||
| logWarning(s"Invalid parameters `${invalidParams.sorted.mkString(",")}` found " + | ||
| s"in Ivy URI query `$uriQuery`.") | ||
| } | ||
|
|
||
| (transitive, exclusionList) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if an invalid param is given in hive, e.g.,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems like we should at least warn on invalid param? |
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Download Ivy URIs dependency jars. | ||
| * Download Ivy URI's dependency jars. | ||
| * | ||
| * @param uri Ivy uri need to be downloaded. The URI format should be: | ||
| * @param uri Ivy URI need to be downloaded. The URI format should be: | ||
| * `ivy://group:module:version[?query]` | ||
| * Ivy URI query part format should be: | ||
| * `parameter=value¶meter=value...` | ||
| * Note that currently ivy URI query part support two parameters: | ||
| * 1. transitive: whether to download dependent jars related to your ivy URL. | ||
| * Note that currently Ivy URI query part support two parameters: | ||
| * 1. transitive: whether to download dependent jars related to your Ivy URI. | ||
| * transitive=false or `transitive=true`, if not set, the default value is false. | ||
| * 2. exclude: exclusion list when download ivy URL jar and dependency jars. | ||
| * 2. exclude: exclusion list when download Ivy URI jar and dependency jars. | ||
| * The `exclude` parameter content is a ',' separated `group:module` pair string : | ||
| * `exclude=group:module,group:module...` | ||
| * @return Comma separated string list of jars downloaded. | ||
|
|
@@ -136,12 +136,12 @@ private[spark] object DependencyUtils extends Logging { | |
| val authority = uri.getAuthority | ||
| if (authority == null) { | ||
| throw new IllegalArgumentException( | ||
| s"Invalid ivy url authority in uri ${uri.toString}:" + | ||
| s" Expected 'org:module:version', found null.") | ||
| s"Invalid Ivy URI authority in uri ${uri.toString}:" + | ||
| " Expected 'org:module:version', found null.") | ||
| } | ||
| if (authority.split(":").length != 3) { | ||
| throw new IllegalArgumentException( | ||
| s"Invalid ivy uri authority in uri ${uri.toString}:" + | ||
| s"Invalid Ivy URI authority in uri ${uri.toString}:" + | ||
| s" Expected 'org:module:version', found $authority.") | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1058,7 +1058,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| } | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- default transitive = false") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- default transitive = false") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0") | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) | ||
|
|
@@ -1068,15 +1068,15 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- invalid transitive use default false") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- invalid transitive use default false") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=foo") | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) | ||
| assert(!sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar"))) | ||
| assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- transitive=true will download dependency jars") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- transitive=true will download dependency jars") { | ||
| val logAppender = new LogAppender("transitive=true will download dependency jars") | ||
| withLogAppender(logAppender) { | ||
| sc = new SparkContext( | ||
|
|
@@ -1090,23 +1090,23 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| dependencyJars.foreach(jar => assert(sc.listJars().exists(_.contains(jar)))) | ||
|
|
||
| assert(logAppender.loggingEvents.count(_.getRenderedMessage.contains( | ||
| "Added dependency jars of ivy uri" + | ||
| "Added dependency jars of Ivy URI" + | ||
| " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) == 1) | ||
|
|
||
| // test dependency jars exist | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true") | ||
| assert(logAppender.loggingEvents.count(_.getRenderedMessage.contains( | ||
| "The dependency jars of ivy uri" + | ||
| "The dependency jars of Ivy URI" + | ||
| " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) == 1) | ||
| val existMsg = logAppender.loggingEvents.filter(_.getRenderedMessage.contains( | ||
| "The dependency jars of ivy uri" + | ||
| "The dependency jars of Ivy URI" + | ||
| " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) | ||
| .head.getRenderedMessage | ||
| dependencyJars.foreach(jar => assert(existMsg.contains(jar))) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test exclude param when transitive=true") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test exclude param when transitive=true") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0" + | ||
| "?exclude=commons-lang:commons-lang&transitive=true") | ||
|
|
@@ -1115,15 +1115,15 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test different version") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test different version") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0") | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.6.0") | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.6.0.jar"))) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add the jars first, then check if the dependent jars co-exist in
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw, this behaviour is the same with hive?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Done |
||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test invalid param") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test invalid param") { | ||
| val logAppender = new LogAppender("test log when have invalid parameter") | ||
| withLogAppender(logAppender) { | ||
| sc = new SparkContext( | ||
|
|
@@ -1132,12 +1132,12 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| "invalidParam1=foo&invalidParam2=boo") | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) | ||
| assert(logAppender.loggingEvents.exists(_.getRenderedMessage.contains( | ||
| "Invalid parameters `invalidParam1,invalidParam2` found in ivy uri query" + | ||
| "Invalid parameters `invalidParam1,invalidParam2` found in Ivy URI query" + | ||
| " `invalidParam1=foo&invalidParam2=boo`."))) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test multiple transitive params") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test multiple transitive params") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| // transitive=invalidValue will win and treated as false | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" + | ||
|
|
@@ -1152,7 +1152,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test param case sensitive") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test param case sensitive") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?TRANSITIVE=true") | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) | ||
|
|
@@ -1163,7 +1163,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test transitive value case sensitive") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test transitive value case sensitive") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=TRUE") | ||
| assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar"))) | ||
|
|
@@ -1174,7 +1174,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar"))) | ||
| } | ||
|
|
||
| test("SPARK-33084: Add jar support ivy url -- test invalid ivy uri") { | ||
| test("SPARK-33084: Add jar support Ivy URI -- test invalid ivy uri") { | ||
| sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]")) | ||
| val e1 = intercept[IllegalArgumentException] { | ||
| sc.addJar("ivy://") | ||
|
|
@@ -1184,25 +1184,25 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu | |
| val e2 = intercept[IllegalArgumentException] { | ||
| sc.addJar("ivy://org.apache.hive:hive-contrib") | ||
| }.getMessage | ||
| assert(e2.contains("Invalid ivy uri authority in uri ivy://org.apache.hive:hive-contrib:" + | ||
| assert(e2.contains("Invalid Ivy URI authority in uri ivy://org.apache.hive:hive-contrib:" + | ||
| " Expected 'org:module:version', found org.apache.hive:hive-contrib.")) | ||
|
|
||
| val e3 = intercept[IllegalArgumentException] { | ||
| sc.addJar("ivy://org.apache.hive:hive-contrib:2.3.7?foo=") | ||
| }.getMessage | ||
| assert(e3.contains("Invalid query string in ivy uri" + | ||
| assert(e3.contains("Invalid query string in Ivy URI" + | ||
| " ivy://org.apache.hive:hive-contrib:2.3.7?foo=:")) | ||
|
|
||
| val e4 = intercept[IllegalArgumentException] { | ||
| sc.addJar("ivy://org.apache.hive:hive-contrib:2.3.7?bar=&baz=foo") | ||
| }.getMessage | ||
| assert(e4.contains("Invalid query string in ivy uri" + | ||
| assert(e4.contains("Invalid query string in Ivy URI" + | ||
| " ivy://org.apache.hive:hive-contrib:2.3.7?bar=&baz=foo: bar=&baz=foo")) | ||
|
|
||
| val e5 = intercept[IllegalArgumentException] { | ||
| sc.addJar("ivy://org.apache.hive:hive-contrib:2.3.7?exclude=org.pentaho") | ||
| }.getMessage | ||
| assert(e5.contains("Invalid exclude string in ivy uri" + | ||
| assert(e5.contains("Invalid exclude string in Ivy URI" + | ||
| " ivy://org.apache.hive:hive-contrib:2.3.7?exclude=org.pentaho:" + | ||
| " expected 'org:module,org:module,..', found org.pentaho")) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add tests to check if this warning message is shown only once by using
LogAppender?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure