This repository was archived by the owner on Dec 4, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 33
[DCOS-38138] Update Spark CLI for shell-escape fix #388
Merged
Merged
Changes from all commits
Commits
Show all changes
43 commits
Select commit
Hold shift + click to select a range
5150951
WIP add tests and update manifest with spark.tgz
samvantran 764e6c0
WIP fix cleanUpSubmitArgs to handle special chars and multiargs
samvantran 746969a
Cleanup
samvantran 62b542b
Add vendor pkg go-shellwords
samvantran a71e59f
Fix url
samvantran 9f1dee7
More cleanup + gofmt
samvantran 65a0f7c
Fix single quote error
samvantran 8d347b9
Fix descrip
samvantran 8336832
More fixes and tests
samvantran db177cd
Debug why single quote fails via spark run
samvantran c5df110
Fixes and cleanup
samvantran 55412bc
gofmt
samvantran 5a8e3f4
Comment out test, need to create app to print out options
samvantran 8297693
Add simple app + test for CI
samvantran 93f588a
Cleanup and fix test
samvantran 96ac0a2
Fixes
samvantran 3026e88
Cleanup test cases
samvantran 7c294c7
Address PR comments
samvantran 7b12b34
Fix expected test output
samvantran ff65589
Write confs to tempfile
samvantran 29701c3
Forgot arg in parent function
samvantran 376bf1b
Let's try escaping the quotes
samvantran f7ad435
Alternatively, wrap entire fn in file
samvantran 6b32e56
Add function isSparkApp
samvantran c711c6c
Print out all system.properties in app
samvantran 34dffbc
Run the actual file in test
samvantran d0a230e
Add run perms to tempfile
samvantran 1afd177
Octals are different in python3
samvantran 644c264
Subprocess.run needs shell=True
samvantran f267518
Sleep right after chmod (potentially old Docker bug)
samvantran e7035d0
Holy bejesus it finally works
samvantran 9f86664
Cleanup, move logic to test_spark and revert spark_utils
samvantran 0fef2c3
Simplify test_multi_arg_confs
samvantran 39434da
Address PR comments
samvantran cfadbf1
Cleanup
samvantran 9a69880
Oops, too hasty with the revert
samvantran 36faae7
Merge branch 'master' into DCOS-38138-shell-escape
samvantran fbc86dc
Use spark distro 2.6.5 created from default
samvantran 13bb7f0
Resync test.sh from dcos-commons: use DOCKER_IMAGE envvar
7865b6c
Skip test_jar test
samvantran 0ca555a
Merge branch 'master' into DCOS-38138-shell-escape
samvantran d0aae3c
Remove checking for bool values
samvantran 7b2bb6c
Move app extensions closer to method
samvantran File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,14 +6,16 @@ import ( | |
| "encoding/json" | ||
| "errors" | ||
| "fmt" | ||
| "github.com/mesosphere/dcos-commons/cli/client" | ||
| "github.com/mesosphere/dcos-commons/cli/config" | ||
| "gopkg.in/alecthomas/kingpin.v3-unstable" | ||
| "log" | ||
| "net/url" | ||
| "os" | ||
| "regexp" | ||
| "strings" | ||
|
|
||
| "github.com/mattn/go-shellwords" | ||
| "github.com/mesosphere/dcos-commons/cli/client" | ||
| "github.com/mesosphere/dcos-commons/cli/config" | ||
| "gopkg.in/alecthomas/kingpin.v3-unstable" | ||
| ) | ||
|
|
||
| var keyWhitespaceValPattern = regexp.MustCompile("(.+)\\s+(.+)") | ||
|
|
@@ -146,8 +148,10 @@ Args: | |
| StringVar(&args.mainClass) // note: spark-submit can autodetect, but only for file://local.jar | ||
| submit.Flag("properties-file", "Path to file containing whitespace-separated Spark property defaults."). | ||
| PlaceHolder("PATH").ExistingFileVar(&args.propertiesFile) | ||
| submit.Flag("conf", "Custom Spark configuration properties."). | ||
| PlaceHolder("PROP=VALUE").StringMapVar(&args.properties) | ||
| submit.Flag("conf", "Custom Spark configuration properties. "+ | ||
| "If submitting properties with multiple values, "+ | ||
| "wrap in single quotes e.g. --conf prop='val1 val2'"). | ||
| PlaceHolder("prop=value").StringMapVar(&args.properties) | ||
| submit.Flag("kerberos-principal", "Principal to be used to login to KDC."). | ||
| PlaceHolder("user@REALM").Default("").StringVar(&args.kerberosPrincipal) | ||
| submit.Flag("keytab-secret-path", "Path to Keytab in secret store to be used in the Spark drivers"). | ||
|
|
@@ -280,75 +284,84 @@ func parseApplicationFile(args *sparkArgs) error { | |
| return nil | ||
| } | ||
|
|
||
| func cleanUpSubmitArgs(argsStr string, boolVals []*sparkVal) ([]string, []string) { | ||
|
|
||
| // collapse two or more spaces to one. | ||
| argsCompacted := collapseSpacesPattern.ReplaceAllString(argsStr, " ") | ||
| // we use Kingpin to parse CLI commands and options | ||
| // spark-submit by convention uses '--arg val' while kingpin only supports --arg=val | ||
| // transformSubmitArgs turns the former into the latter | ||
| func transformSubmitArgs(argsStr string, boolVals []*sparkVal) ([]string, []string) { | ||
| // clean up any instances of shell-style escaped newlines: "arg1\\narg2" => "arg1 arg2" | ||
| argsCleaned := strings.TrimSpace(backslashNewlinePattern.ReplaceAllLiteralString(argsCompacted, " ")) | ||
| // HACK: spark-submit uses '--arg val' by convention, while kingpin only supports '--arg=val'. | ||
| // translate the former into the latter for kingpin to parse. | ||
| args := strings.Split(argsCleaned, " ") | ||
| argsEquals := make([]string, 0) | ||
| appFlags := make([]string, 0) | ||
| i := 0 | ||
| ARGLOOP: | ||
| for i < len(args) { | ||
| arg := args[i] | ||
| if !strings.HasPrefix(arg, "-") { | ||
| // looks like we've exited the flags entirely, and are now at the jar and/or args. | ||
| // any arguments without a dash at the front should've been joined to preceding keys. | ||
| // flush the rest and exit. | ||
| for i < len(args) { | ||
| arg = args[i] | ||
| // if we have a --flag going to the application we need to take the arg (flag) and the value ONLY | ||
| // if it's not of the format --flag=val which scopt allows | ||
| if strings.HasPrefix(arg, "-") { | ||
| appFlags = append(appFlags, arg) | ||
| if strings.Contains(arg, "=") || (i+1) >= len(args) { | ||
| i += 1 | ||
| } else { | ||
| // if there's a value with this flag, add it | ||
| if !strings.HasPrefix(args[i+1], "-") { | ||
| appFlags = append(appFlags, args[i+1]) | ||
| i += 1 | ||
| } | ||
| i += 1 | ||
| } | ||
| } else { | ||
| argsEquals = append(argsEquals, arg) | ||
| i += 1 | ||
| } | ||
| argsStr = strings.TrimSpace(backslashNewlinePattern.ReplaceAllLiteralString(argsStr, " ")) | ||
| // collapse two or more spaces to one | ||
| argsStr = collapseSpacesPattern.ReplaceAllString(argsStr, " ") | ||
| // parse argsStr into []string args maintaining shell escaped sequences | ||
| args, err := shellwords.Parse(argsStr) | ||
| if err != nil { | ||
| log.Fatalf("Could not parse string args correctly. Error: %v", err) | ||
| } | ||
| sparkArgs, appArgs := make([]string, 0), make([]string, 0) | ||
| LOOP: | ||
| for i := 0; i < len(args); { | ||
| current := strings.TrimSpace(args[i]) | ||
| switch { | ||
| // The main assumption with --submit-args is that all spark-submit flags come before the spark jar URL | ||
| // if current is a spark jar/app, we've processed all flags | ||
| case isSparkApp(current): | ||
| sparkArgs = append(sparkArgs, args[i]) | ||
| appArgs = append(appArgs, args[i+1:]...) | ||
| break LOOP | ||
| case strings.HasPrefix(current, "--"): | ||
| if isBoolFlag(boolVals, current) { | ||
| sparkArgs = append(sparkArgs, current) | ||
| i++ | ||
| continue LOOP | ||
| } | ||
| break | ||
| } | ||
| // join this arg to the next arg if...: | ||
| // 1. we're not at the last arg in the array | ||
| // 2. we start with "--" | ||
| // 3. we don't already contain "=" (already joined) | ||
| // 4. we aren't a boolean value (no val to join) | ||
| if i < len(args)-1 && strings.HasPrefix(arg, "--") && !strings.Contains(arg, "=") { | ||
| // check for boolean: | ||
| for _, boolVal := range boolVals { | ||
| if boolVal.flagName == arg[2:] { | ||
| argsEquals = append(argsEquals, arg) | ||
| i += 1 | ||
| continue ARGLOOP | ||
| } | ||
| if strings.Contains(current, "=") { | ||
| // already in the form arg=val, no merge required | ||
| sparkArgs = append(sparkArgs, current) | ||
| i++ | ||
| continue LOOP | ||
| } | ||
| // merge this --key against the following val to get --key=val | ||
| argsEquals = append(argsEquals, arg+"="+args[i+1]) | ||
| // otherwise, merge current with next into form arg=val; eg --driver-memory=512m | ||
| next := args[i+1] | ||
| sparkArgs = append(sparkArgs, current+"="+next) | ||
| i += 2 | ||
| } else { | ||
| // already joined or at the end, pass through: | ||
| argsEquals = append(argsEquals, arg) | ||
| i += 1 | ||
| default: | ||
| // if not a flag or jar, current is a continuation of the last arg and should not have been split | ||
| // eg extraJavaOptions="-Dparam1 -Dparam2" was parsed as [extraJavaOptions, -Dparam1, -Dparam2] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, this isn't handled by shellwords? (It sounded like it might based on the README.)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No unfortunately the library splits the string on non-escaped spaces so this |
||
| combined := sparkArgs[len(sparkArgs)-1] + " " + current | ||
| sparkArgs = append(sparkArgs[:len(sparkArgs)-1], combined) | ||
| i++ | ||
| } | ||
| } | ||
| client.PrintVerbose("Translated spark-submit arguments: '%s'", argsEquals) | ||
| client.PrintVerbose("Translated application arguments: '%s'", appFlags) | ||
| if config.Verbose { | ||
| client.PrintVerbose("Translated spark-submit arguments: '%s'", strings.Join(sparkArgs, ", ")) | ||
| client.PrintVerbose("Translated application arguments: '%s'", strings.Join(appArgs, ", ")) | ||
| } | ||
| return sparkArgs, appArgs | ||
| } | ||
|
|
||
| return argsEquals, appFlags | ||
| var acceptedSparkAppExtensions = []string{ | ||
| ".jar", | ||
| ".py", | ||
| ".R", | ||
| } | ||
|
|
||
| func isSparkApp(str string) bool { | ||
| for _, ext := range acceptedSparkAppExtensions { | ||
| if strings.HasSuffix(str, ext) { | ||
| return true | ||
| } | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| // check if string is a boolean flag (eg --supervise) | ||
| func isBoolFlag(boolVals []*sparkVal, str string) bool { | ||
| for _, boolVal := range boolVals { | ||
| if boolVal.flagName == str[2:] { | ||
| return true | ||
| } | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| func getValsFromPropertiesFile(path string) map[string]string { | ||
|
|
@@ -416,7 +429,7 @@ func buildSubmitJson(cmd *SparkCommand, marathonConfig map[string]interface{}) ( | |
| // then map flags | ||
| submit, args := sparkSubmitArgSetup() // setup | ||
| // convert and get application flags, add them to the args passed to the spark app | ||
| submitArgs, appFlags := cleanUpSubmitArgs(cmd.submitArgs, args.boolVals) | ||
| submitArgs, appFlags := transformSubmitArgs(cmd.submitArgs, args.boolVals) | ||
| args.appArgs = append(args.appArgs, appFlags...) | ||
| _, err := submit.Parse(submitArgs) | ||
|
|
||
|
|
@@ -509,7 +522,7 @@ func buildSubmitJson(cmd *SparkCommand, marathonConfig map[string]interface{}) ( | |
| } else { | ||
| client.PrintMessage("Using image '%s' for the driver and the executors (from %s).", | ||
| args.properties["spark.mesos.executor.docker.image"], imageSource) | ||
| client.PrintMessage("To disable this image on executors, set "+ | ||
| client.PrintMessage("To disable this image on executors, set " + | ||
| "spark.mesos.executor.docker.forcePullImage=false") | ||
| args.properties["spark.mesos.executor.docker.forcePullImage"] = "true" | ||
| } | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we create a ticket to address this. Kingpin supports both
--arg valand--arg=val.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here you go: https://jira.mesosphere.com/browse/DCOS-41107
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great! Thanks.