Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
31b51de
initial cut of refactored run-tests-jenkins script into python
Jul 13, 2015
f2a1dc6
fixed pep8 issues
Jul 14, 2015
d202c42
fixed list bug when finding log files
Jul 14, 2015
3ae1d49
correctly escape newlines for long strings
Jul 14, 2015
2a3d67d
fixed numerous bugs, updated to use urllib2 over curl, removed 'find'…
Jul 15, 2015
d83c535
fixed typos, style, and linting issues
Jul 15, 2015
3acac90
fixed indenting with list
Jul 15, 2015
395928d
fixed typo
Jul 15, 2015
30f90bc
fixed merge conflicts
Jul 15, 2015
4765dc8
added variables to remove indents, fixed ' : ' => ': ' in maps
Jul 15, 2015
e90fdeb
moved if statement for urllib2 into the try block
Jul 15, 2015
64c13c1
updated token passing for github oauth
Jul 15, 2015
85aaf76
added subprocess_check_call() as backport from python 2.7, moved subp…
Jul 15, 2015
f756098
removed unnecessary escapes
Jul 15, 2015
39f6a1f
changed test process to only return the returncode
Jul 15, 2015
7b11665
fixed serialization error between int and string, created an error re…
Jul 15, 2015
cf110bc
fixed reference error and pep8 bugs
Jul 15, 2015
c9e5c5f
commented out tests and fixed test result output
Jul 16, 2015
5272381
printing of the pr results
Jul 16, 2015
528279a
fixed strip issue with PR results, from strip => rstrip
Jul 16, 2015
6a9ed32
run-tests-jenkins now properly runs the tests, removed license from f…
Jul 16, 2015
22f9be8
reverted license
Jul 16, 2015
5d63bc5
removed mima to fail binary compatibility checks
Jul 16, 2015
33e3143
reverted mima excludes
Jul 16, 2015
d82567a
set accumulator test suite to fail
Jul 16, 2015
2117c64
reverted accumulator test in favor of graphx test to only trigger 'gr…
Jul 16, 2015
831376e
fixed scala style check
Jul 16, 2015
d76f559
fixed scala style checks
Jul 16, 2015
05d9ef4
reverted scala test failure, added pyspark failure
Jul 16, 2015
0ad51b4
updated to add --absolute-names with tar command for archiving logs
Jul 27, 2015
a218baf
reverting python test
Aug 4, 2015
2853b60
fixed merge conflicts
Oct 16, 2015
fe397fb
updated to account for PR #7883
Oct 16, 2015
206930e
removed 'send_archived_logs' function
Oct 16, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixed numerous bugs, updated to use urllib2 over curl, removed 'find'…
… in place of os.walk
  • Loading branch information
Brennon York committed Jul 15, 2015
commit 2a3d67d9c0cea49a25c8da86bbfea49ecfac2335
2 changes: 1 addition & 1 deletion dev/lint-python
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
PATHS_TO_CHECK="./python/pyspark/ ./ec2/spark_ec2.py ./examples/src/main/python/ ./dev/sparktestsupport"
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/run-tests.py"
PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/run-tests.py ./dev/run-tests-jenkins.py"
PYTHON_LINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/python-lint-report.txt"

cd "$SPARK_ROOT_DIR"
Expand Down
167 changes: 85 additions & 82 deletions dev/run-tests-jenkins.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,75 +21,68 @@
import os
import sys
import json
import urllib2
import functools
import subprocess

from sparktestsupport import SPARK_HOME, ERROR_CODES
from sparktestsupport.shellutils import exit_from_command_with_retcode, run_cmd, rm_r
from sparktestsupport.shellutils import run_cmd, rm_r


def print_err(*args):
def print_err(msg):
"""
Given a set of arguments, will print them to the STDERR stream
"""
print(*args, file=sys.stderr)
print(msg, file=sys.stderr)


def post_message(mssg, comments_url):
http_code_header = "HTTP Response Code: "
posted_message = json.dumps({"body": mssg})

def post_message_to_github(msg, ghprb_pull_id):
print("Attempting to post to Github...")

# we don't want to call `run_cmd` here as, in the event of an error, we DO NOT
# want to print the GITHUB_OAUTH_KEY into the public Jenkins logs
curl_proc = subprocess.Popen(['curl',
'--silent',
'--user', 'x-oauth-basic:' + os.environ['GITHUB_OATH_KEY'],
'--request', 'POST',
'--data', posted_message,
'--write-out', http_code_header + '%{http_code}',
'--header', 'Content-Type: application/json',
comments_url],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
curl_stdout, curl_stderr = curl_proc.communicate()
curl_returncode = curl_proc.returncode
# find all lines relevant to the Github API response
api_response = "\n".join([l for l in curl_stdout.split('\n')
if l and not l.startswith(http_code_header)])
# find the line where `http_code_header` exists, split on ':' to get the
# HTTP response code, and cast to an int
http_code = int(curl_stdout[curl_stdout.find(http_code_header):].split(':')[1])

if not curl_returncode == 0:
posted_message = json.dumps({"body": msg})
request = urllib2.Request("https://api.github.com/repos/apache/spark/issues/" +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this request need to use a POST HTTP method instead of GET (the default)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, disregard this comment if data != none implies POST.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formatting nit: do you mind defining some extra variables here so that we can indent slightly less? Take a look at some of the code from the spark-pr-dashboard as an example:

https://github.com/databricks/spark-pr-dashboard/blob/master/sparkprs/github_api.py

ghprb_pull_id + "/comments",
headers={
"Authorization": "x-oauth-basic:" + os.environ['GITHUB_OATH_KEY'],
"Content-Type": "application/json"
},
data=posted_message)
try:
response = urllib2.urlopen(request)
except urllib2.URLError as url_e:
print_err("Failed to post message to GitHub.")
print_err(" > curl_status:", curl_returncode)
print_err(" > curl_output:", curl_stdout)
print_err(" > data:", posted_message)

if http_code and not http_code == 201:
print_err(" > http_code:", http_code)
print_err(" > api_response:", api_response)
print_err(" > data:", posted_message)
print_err(" > urllib2_status: %s" % url_e.reason[1])
print_err(" > data: %s" % posted_message)
except urllib2.HTTPError as http_e:
print_err("Failed to post message to GitHub.")
print_err(" > http_code: %s" % http_e.code)
print_err(" > api_response: %s" % http_e.read())
print_err(" > data: %s" % posted_message)

if curl_returncode == 0 and http_code == 201:
if response.getcode() == 201:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check should be moved inside of the try block.

print(" > Post successful.")


def send_archived_logs():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we should keep this feature for now, since I don't necessarily trust the stability / configurability of the Jenkins plugin that also did this. Disregard my earlier comments / proposal to remove this. Let's refactor the code a bit, but we should definitely keep this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI we no longer need this feature and are going to remove it. Just commenting here so that it's not accidentally re-added as part of this PR.

print("Archiving unit tests logs...")

log_files = run_cmd(['find', '.',
'-name', 'unit-tests.log',
'-o', '-path', './sql/hive/target/HiveCompatibilitySuite.failed',
'-o', '-path', './sql/hive/target/HiveCompatibilitySuite.hiveFailed',
'-o', '-path', './sql/hive/target/HiveCompatibilitySuite.wrong'],
return_output=True)
# find any files rescursively with the name 'unit-tests.log'
log_files = [os.path.join(path, f)
for path, _, filenames in os.walk(SPARK_HOME)
for f in filesnames if f == 'unit-tests.log']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filesnames is undefined here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it's just a typo.

# ensure we have a default list if no 'unit-tests.log' files were found
log_files = log_files if log_files else list()

# check if any of the three explicit paths exist on the system
log_files += [f for f in ['./sql/hive/target/HiveCompatibilitySuite.failed',
'./sql/hive/target/HiveCompatibilitySuite.hiveFailed',
'./sql/hive/target/HiveCompatibilitySuite.wrong']
if os.path.isfile(f)]

if log_files:
log_archive = "unit-tests-logs.tar.gz"

run_cmd(['tar', 'czf', log-archive] + log_files.strip().split('\n'))
run_cmd(['tar', 'czf', log_archive] + log_files.strip().split('\n'))

jenkins_build_dir = os.environ["JENKINS_HOME"] + "/jobs/" + os.environ["JOB_NAME"] + \
"/builds/" + os.environ["BUILD_NUMBER"]
Expand All @@ -103,8 +96,8 @@ def send_archived_logs():

if not scp_returncode == 0:
print_err("Failed to send archived unit tests logs to Jenkins master.")
print_err(" > scp_status:", scp_returncode)
print_err(" > scp_output:", scp_stdout)
print_err(" > scp_status: %s" % scp_returncode)
print_err(" > scp_output: %s" % scp_stdout)
else:
print(" > Send successful.")
else:
Expand All @@ -113,7 +106,12 @@ def send_archived_logs():
rm_r(log_archive)


def run_pr_tests(pr_tests, ghprb_actual_commit, sha1):
def run_pr_checks(pr_tests, ghprb_actual_commit, sha1):
"""
Executes a set of pull request checks to ease development and report issues with various
components such as style, linting, dependencies, compatibilities, etc.
@return a list of messages to post back to Github
"""
# Ensure we save off the current HEAD to revert to
current_pr_head = run_cmd(['git', 'rev-parse', 'HEAD'], return_output=True).strip()
pr_results = list()
Expand All @@ -127,31 +125,38 @@ def run_pr_tests(pr_tests, ghprb_actual_commit, sha1):
return pr_results


def bind_message_base(build_display_name, build_url, ghprb_pull_id, short_commit_hash, commit_url):
"""
Given base parameters to generate a strong Github message response, binds those
parameters into a closure without the specific message and returns a function
able to generate strong messages for a specific description.
"""
return lambda mssg, post_mssg="": \
'**[Test build ' + build_display_name + ' ' + mssg + '](' + build_url + \
'console)** for PR ' + ghprb_pull_id + ' at commit [\`' + short_commit_hash + '\`](' + \
commit_url + ')' + str(' ' + post_mssg + '.') if post_mssg else '.'


def success_result_note(mssg):
return ' * This patch ' + mssg + '.'


def failure_result_note(mssg):
return ' * This patch **fails ' + mssg + '**.'
def pr_message(build_display_name,
build_url,
ghprb_pull_id,
short_commit_hash,
commit_url,
msg,
post_msg=''):
# align the arguments properly for string formatting
str_args = (build_display_name,
msg,
build_url,
ghprb_pull_id,
short_commit_hash,
commit_url,
str(' ' + post_msg + '.') if post_msg else '.')
return '**[Test build %s %s](%sconsole)** for PR %s at commit [\`%s\`](%s)%s' % str_args


def run_tests(tests_timeout):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return value should be documented.

"""
Runs the `dev/run-tests` script and responds with the correct error message
under the various failure scenarios.
@return a tuple containing the test result code and the result note to post to Github
"""

test_proc = subprocess.Popen(['timeout',
tests_timeout,
os.path.join(SPARK_HOME, 'dev', 'run-tests')]).wait()
test_result = test_proc.returncode
test_result_code = test_proc.returncode
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_proc already holds the return code, I think, so we should either move the wait() to here to rename test_proc to test_proc_return_code:

Traceback (most recent call last):
  File "./dev/run-tests.py", line 486, in <module>
    main()
  File "./dev/run-tests.py", line 404, in main
    os.environ["CURRENT_BLOCK"] = ERROR_CODES["BLOCK_GENERAL"]
  File "/home/anaconda/lib/python2.7/os.py", line 473, in __setitem__
    putenv(key, item)
TypeError: must be string, not int
Traceback (most recent call last):
  File "./dev/run-tests-jenkins.py", line 256, in <module>
    main()
  File "./dev/run-tests-jenkins.py", line 243, in main
    test_result_code, test_result_note = run_tests(tests_timeout)
  File "./dev/run-tests-jenkins.py", line 158, in run_tests
    test_result_code = test_proc.returncode
AttributeError: 'int' object has no attribute 'returncode'


def failure_result_note(msg):
' * This patch **fails ' + msg + '**.'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing a return statement here, although I think we should just inline this after some refactoring; see https://github.com/apache/spark/pull/7401/files#r34702372


failure_note_by_errcode = {
ERROR_CODES["BLOCK_GENERAL"]: failure_result_note('some tests'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was a bit inprecise in my earlier comment, but I was thinking that we could change this so that failure_note_by_errcode holds the fragments, e.g.

ERROR_CODES["BLOCK_GENERAL"]: 'some tests',

and we could then index into this array with the test result error code, e.g.

' * This patch **fails %s **.' % failure_note_by_errcode[test_result]

which I think is slightly clearer.

Expand All @@ -169,12 +174,12 @@ def run_tests(tests_timeout):
}

if test_result == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be test_result_code, not test_result, as the latter is not defined.

test_result_note = success_result_note('passes all tests')
test_result_note = ' * This patch passes all tests.'
else:
test_result_note = failure_note_by_errcode(test_result)
test_result_note = failure_note_by_errcode[test_result]
send_archived_logs()

return test_result_note
return [test_result_code, test_result_note]


def main():
Expand All @@ -198,8 +203,6 @@ def main():
build_display_name = os.environ["BUILD_DISPLAY_NAME"]
build_url = os.environ["BUILD_URL"]

comments_url = "https://api.github.com/repos/apache/spark/issues/" + ghprb_pull_id + "/comments"
pull_request_url = "https://github.com/apache/spark/pull/" + ghprb_pull_id
commit_url = "https://github.com/apache/spark/commit/" + ghprb_actual_commit

# GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
Expand All @@ -209,8 +212,8 @@ def main():
# must be less than the timeout configured on Jenkins (currently 180m)
tests_timeout = "175m"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This number has been bumped up recently:

# must be less than the timeout configured on Jenkins (currently 300m)
TESTS_TIMEOUT="250m"


# Array to capture all tests to run on the pull request. These tests are held under the
# dev/tests/ directory.
# Array to capture all test names to run on the pull request. These tests are represented
# by their file equivalents in the dev/tests/ directory.
#
# To write a PR test:
# * the file must reside within the dev/tests directory
Expand All @@ -225,28 +228,28 @@ def main():
]

# `bind_message_base` returns a function to generate messages for Github posting
github_message = bind_message_base(build_display_name,
github_message = functools.partial(pr_message,
build_display_name,
build_url,
ghprb_pull_id,
short_commit_hash,
commit_url)

# post start message
post_message(github_message('has started'))
post_message_to_github(github_message('has started'), ghprb_pull_id)

pr_test_results = run_pr_tests(pr_tests, ghprb_actual_commit, sha1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

run_pr_tests is undefined; we need to change this to run_pr_checks. (This reminds me: now that we have the infrastructure scripts to run PyLint checks in Jenkins we should enable the undefined-variable check so that this type of issue is caught automatically, although that wouldn't help here because bugs in this script would prevent those checks from ever running in the first place).


test_results = run_tests(tests_timeout)
test_result_code, test_result_note = run_tests(tests_timeout)

# post end message
result_message = github_message('has finished')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, it would be nice to have a template that uses string formatting / interpolation.

result_message += '\n' + test_results
for pr_result in pr_test_results:
result_message += pr_result
result_message += '\n' + test_result_note
result_message += " ".join(pr_results)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be pr_check_results, not pr_results.


post_message(result_message)
post_message_to_github(result_message, ghprb_pull_id)

sys.exit(test_result)
sys.exit(test_result_code)


if __name__ == "__main__":
Expand Down
24 changes: 13 additions & 11 deletions dev/sparktestsupport/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@

SPARK_HOME = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../"))
USER_HOME = os.environ.get("HOME")
ERROR_CODES = { "BLOCK_GENERAL" : 10,
"BLOCK_RAT" : 11,
"BLOCK_SCALA_STYLE" : 12,
"BLOCK_PYTHON_STYLE" : 13,
"BLOCK_DOCUMENTATION" : 14,
"BLOCK_BUILD" : 15,
"BLOCK_MIMA" : 16,
"BLOCK_SPARK_UNIT_TESTS" : 17,
"BLOCK_PYSPARK_UNIT_TESTS" : 18,
"BLOCK_SPARKR_UNIT_TESTS" : 19,
"BLOCK_TIMEOUT" : 124 }
ERROR_CODES = {
"BLOCK_GENERAL" : 10,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need the space before :?

"BLOCK_RAT" : 11,
"BLOCK_SCALA_STYLE" : 12,
"BLOCK_PYTHON_STYLE" : 13,
"BLOCK_DOCUMENTATION" : 14,
"BLOCK_BUILD" : 15,
"BLOCK_MIMA" : 16,
"BLOCK_SPARK_UNIT_TESTS" : 17,
"BLOCK_PYSPARK_UNIT_TESTS" : 18,
"BLOCK_SPARKR_UNIT_TESTS" : 19,
"BLOCK_TIMEOUT" : 124
}