From 289c834bedac07761f0cd33aee9854c4efb0c5e5 Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 10 May 2021 10:21:37 +0200 Subject: [PATCH 01/70] separate SelectorThread into its own object so it can be used on an existing asyncio loop without replacing the loop itself --- tornado/platform/asyncio.py | 112 +++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 292d9b66a4..b07af3a953 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -51,7 +51,7 @@ def fileno(self) -> int: # Collection of selector thread event loops to shut down on exit. -_selector_loops = set() # type: Set[AddThreadSelectorEventLoop] +_selector_loops = set() # type: Set[SelectorThread] def _atexit_callback() -> None: @@ -401,54 +401,17 @@ def get_event_loop(self) -> asyncio.AbstractEventLoop: return loop -class AddThreadSelectorEventLoop(asyncio.AbstractEventLoop): - """Wrap an event loop to add implementations of the ``add_reader`` method family. +class SelectorThread: + """Define ``add_reader`` methods to be called in a background select thread. Instances of this class start a second thread to run a selector. - This thread is completely hidden from the user; all callbacks are - run on the wrapped event loop's thread. - - This class is used automatically by Tornado; applications should not need - to refer to it directly. - - It is safe to wrap any event loop with this class, although it only makes sense - for event loops that do not implement the ``add_reader`` family of methods - themselves (i.e. ``WindowsProactorEventLoop``) - - Closing the ``AddThreadSelectorEventLoop`` also closes the wrapped event loop. + This thread is completely hidden from the user; + all callbacks are run on the wrapped event loop's thread. + Typically used via ``AddThreadSelectorEventLoop``, + but can be attached to a running asyncio loop. """ - # This class is a __getattribute__-based proxy. All attributes other than those - # in this set are proxied through to the underlying loop. - MY_ATTRIBUTES = { - "_consume_waker", - "_select_cond", - "_select_args", - "_closing_selector", - "_thread", - "_handle_event", - "_readers", - "_real_loop", - "_start_select", - "_run_select", - "_handle_select", - "_wake_selector", - "_waker_r", - "_waker_w", - "_writers", - "add_reader", - "add_writer", - "close", - "remove_reader", - "remove_writer", - } - - def __getattribute__(self, name: str) -> Any: - if name in AddThreadSelectorEventLoop.MY_ATTRIBUTES: - return super().__getattribute__(name) - return getattr(self._real_loop, name) - def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None: self._real_loop = real_loop @@ -501,7 +464,6 @@ def close(self) -> None: _selector_loops.discard(self) self._waker_r.close() self._waker_w.close() - self._real_loop.close() def _wake_selector(self) -> None: try: @@ -613,3 +575,63 @@ def remove_reader(self, fd: "_FileDescriptorLike") -> None: def remove_writer(self, fd: "_FileDescriptorLike") -> None: del self._writers[fd] self._wake_selector() + + +class AddThreadSelectorEventLoop(asyncio.AbstractEventLoop): + """Wrap an event loop to add implementations of the ``add_reader`` method family. + + Instances of this class start a second thread to run a selector. + This thread is completely hidden from the user; all callbacks are + run on the wrapped event loop's thread. + + This class is used automatically by Tornado; applications should not need + to refer to it directly. + + It is safe to wrap any event loop with this class, although it only makes sense + for event loops that do not implement the ``add_reader`` family of methods + themselves (i.e. ``WindowsProactorEventLoop``) + + Closing the ``AddThreadSelectorEventLoop`` also closes the wrapped event loop. + + """ + + # This class is a __getattribute__-based proxy. All attributes other than those + # in this set are proxied through to the underlying loop. + MY_ATTRIBUTES = { + "_real_loop", + "_selector", + "add_reader", + "add_writer", + "close", + "remove_reader", + "remove_writer", + } + + def __getattribute__(self, name: str) -> Any: + if name in AddThreadSelectorEventLoop.MY_ATTRIBUTES: + return super().__getattribute__(name) + return getattr(self._real_loop, name) + + def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None: + self._real_loop = real_loop + self._selector = SelectorThread(real_loop) + + def close(self) -> None: + self._selector.close() + self._real_loop.close() + + def add_reader( + self, fd: "_FileDescriptorLike", callback: Callable[..., None], *args: Any + ) -> None: + return self._selector.add_reader(fd, callback, *args) + + def add_writer( + self, fd: "_FileDescriptorLike", callback: Callable[..., None], *args: Any + ) -> None: + return self._selector.add_writer(fd, callback, *args) + + def remove_reader(self, fd: "_FileDescriptorLike") -> None: + return self._selector.remove_reader(fd) + + def remove_writer(self, fd: "_FileDescriptorLike") -> None: + return self._selector.remove_writer(fd) From 63ae59dda4bc3d83f7a1265f3c17807d4bc7e78f Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 19 May 2021 09:58:09 +0200 Subject: [PATCH 02/70] Make AddThreadEventLoop.close() idempotent according to spec: https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.close The call to _wake_selector would fail with EBADF when close is called a second time --- tornado/platform/asyncio.py | 5 +++++ tornado/test/asyncio_test.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index b07af3a953..3863e78f2e 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -412,6 +412,8 @@ class SelectorThread: but can be attached to a running asyncio loop. """ + _closed = False + def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None: self._real_loop = real_loop @@ -456,6 +458,8 @@ def __del__(self) -> None: self._waker_w.close() def close(self) -> None: + if self._closed: + return with self._select_cond: self._closing_selector = True self._select_cond.notify() @@ -464,6 +468,7 @@ def close(self) -> None: _selector_loops.discard(self) self._waker_r.close() self._waker_w.close() + self._closed = True def _wake_selector(self) -> None: try: diff --git a/tornado/test/asyncio_test.py b/tornado/test/asyncio_test.py index 3f9f3389a2..59825e8f22 100644 --- a/tornado/test/asyncio_test.py +++ b/tornado/test/asyncio_test.py @@ -20,6 +20,7 @@ AsyncIOLoop, to_asyncio_future, AnyThreadEventLoopPolicy, + AddThreadSelectorEventLoop, ) from tornado.testing import AsyncTestCase, gen_test @@ -107,6 +108,11 @@ async def native_coroutine_with_adapter2(): 42, ) + def test_add_thread_close_idempotent(self): + loop = AddThreadSelectorEventLoop(asyncio.get_event_loop()) + loop.close() + loop.close() + class LeakTest(unittest.TestCase): def setUp(self): From 0e43884f44a4aee716bd8705b3059ae8a5046f81 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 1 May 2023 16:18:31 -0400 Subject: [PATCH 03/70] Bump main branch version number to 6.4.dev1 --- tornado/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tornado/__init__.py b/tornado/__init__.py index afbd715053..3df733ad98 100644 --- a/tornado/__init__.py +++ b/tornado/__init__.py @@ -22,8 +22,8 @@ # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "6.3.1" -version_info = (6, 3, 1, 0) +version = "6.4.dev1" +version_info = (6, 4, 0, -100) import importlib import typing From d78292594ea62fe7ff1f5ec5587aeb525121603c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 1 May 2023 17:10:27 -0400 Subject: [PATCH 04/70] test: Streamline test configurations - LANG tests were no longer having the intended effect because C locales now default to utf8 instead of ascii. There's a new warning we can turn on with an env var instead. (after cleaing up some tests) - The tox install_command issue was reverted in tox 1.9 - Python now guarantees that __file__ is absolute - Remove some obsolete warning manipulations --- tornado/test/autoreload_test.py | 14 ++++++++++---- tornado/test/log_test.py | 18 +++--------------- tornado/test/runtests.py | 31 ------------------------------- tornado/test/web_test.py | 8 ++++---- tox.ini | 23 +++++------------------ 5 files changed, 22 insertions(+), 72 deletions(-) diff --git a/tornado/test/autoreload_test.py b/tornado/test/autoreload_test.py index be481e106f..fff602811b 100644 --- a/tornado/test/autoreload_test.py +++ b/tornado/test/autoreload_test.py @@ -43,8 +43,12 @@ def test_reload_module(self): # Create temporary test application os.mkdir(os.path.join(self.path, "testapp")) - open(os.path.join(self.path, "testapp/__init__.py"), "w").close() - with open(os.path.join(self.path, "testapp/__main__.py"), "w") as f: + open( + os.path.join(self.path, "testapp/__init__.py"), "w", encoding="utf-8" + ).close() + with open( + os.path.join(self.path, "testapp/__main__.py"), "w", encoding="utf-8" + ) as f: f.write(main) # Make sure the tornado module under test is available to the test @@ -59,6 +63,7 @@ def test_reload_module(self): cwd=self.path, env=dict(os.environ, PYTHONPATH=pythonpath), universal_newlines=True, + encoding="utf-8", ) out = p.communicate()[0] self.assertEqual(out, "Starting\nStarting\n") @@ -94,9 +99,9 @@ def test_reload_wrapper_preservation(self): # Create temporary test application os.mkdir(os.path.join(self.path, "testapp")) init_file = os.path.join(self.path, "testapp", "__init__.py") - open(init_file, "w").close() + open(init_file, "w", encoding="utf-8").close() main_file = os.path.join(self.path, "testapp", "__main__.py") - with open(main_file, "w") as f: + with open(main_file, "w", encoding="utf-8") as f: f.write(main) # Make sure the tornado module under test is available to the test @@ -111,6 +116,7 @@ def test_reload_wrapper_preservation(self): cwd=self.path, env=dict(os.environ, PYTHONPATH=pythonpath), universal_newlines=True, + encoding="utf-8", ) # This timeout needs to be fairly generous for pypy due to jit diff --git a/tornado/test/log_test.py b/tornado/test/log_test.py index 9130ae7e8d..fec4c389e2 100644 --- a/tornado/test/log_test.py +++ b/tornado/test/log_test.py @@ -66,11 +66,7 @@ def tearDown(self): os.rmdir(self.tempdir) def make_handler(self, filename): - # Base case: default setup without explicit encoding. - # In python 2, supports arbitrary byte strings and unicode objects - # that contain only ascii. In python 3, supports ascii-only unicode - # strings (but byte strings will be repr'd automatically). - return logging.FileHandler(filename) + return logging.FileHandler(filename, encoding="utf-8") def get_output(self): with open(self.filename, "rb") as f: @@ -116,14 +112,6 @@ def test_bytes_exception_logging(self): # The traceback contains newlines, which should not have been escaped. self.assertNotIn(rb"\n", output) - -class UnicodeLogFormatterTest(LogFormatterTest): - def make_handler(self, filename): - # Adding an explicit encoding configuration allows non-ascii unicode - # strings in both python 2 and 3, without changing the behavior - # for byte strings. - return logging.FileHandler(filename, encoding="utf8") - def test_unicode_logging(self): self.logger.error("\u00e9") self.assertEqual(self.get_output(), utf8("\u00e9")) @@ -147,7 +135,7 @@ def test_log_file(self): self.logger.handlers[0].flush() filenames = glob.glob(tmpdir + "/test_log*") self.assertEqual(1, len(filenames)) - with open(filenames[0]) as f: + with open(filenames[0], encoding="utf-8") as f: self.assertRegex(f.read(), r"^\[E [^]]*\] hello$") finally: for handler in self.logger.handlers: @@ -167,7 +155,7 @@ def test_log_file_with_timed_rotating(self): self.logger.handlers[0].flush() filenames = glob.glob(tmpdir + "/test_log*") self.assertEqual(1, len(filenames)) - with open(filenames[0]) as f: + with open(filenames[0], encoding="utf-8") as f: self.assertRegex(f.read(), r"^\[E [^]]*\] hello$") finally: for handler in self.logger.handlers: diff --git a/tornado/test/runtests.py b/tornado/test/runtests.py index 6075b1e2bd..58cecd383e 100644 --- a/tornado/test/runtests.py +++ b/tornado/test/runtests.py @@ -128,37 +128,6 @@ def main(): warnings.filterwarnings( "error", category=PendingDeprecationWarning, module=r"tornado\..*" ) - # The unittest module is aggressive about deprecating redundant methods, - # leaving some without non-deprecated spellings that work on both - # 2.7 and 3.2 - warnings.filterwarnings( - "ignore", category=DeprecationWarning, message="Please use assert.* instead" - ) - warnings.filterwarnings( - "ignore", - category=PendingDeprecationWarning, - message="Please use assert.* instead", - ) - # Twisted 15.0.0 triggers some warnings on py3 with -bb. - warnings.filterwarnings("ignore", category=BytesWarning, module=r"twisted\..*") - if (3,) < sys.version_info < (3, 6): - # Prior to 3.6, async ResourceWarnings were rather noisy - # and even - # `python3.4 -W error -c 'import asyncio; asyncio.get_event_loop()'` - # would generate a warning. - warnings.filterwarnings( - "ignore", category=ResourceWarning, module=r"asyncio\..*" - ) - # This deprecation warning is introduced in Python 3.8 and is - # triggered by pycurl. Unforunately, because it is raised in the C - # layer it can't be filtered by module and we must match the - # message text instead (Tornado's C module uses PY_SSIZE_T_CLEAN - # so it's not at risk of running into this issue). - warnings.filterwarnings( - "ignore", - category=DeprecationWarning, - message="PY_SSIZE_T_CLEAN will be required", - ) logging.getLogger("tornado.access").setLevel(logging.CRITICAL) diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index c2d057c538..56900d9a00 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -1271,7 +1271,7 @@ def test_static_with_range_full_file(self): # to ``Range: bytes=0-`` :( self.assertEqual(response.code, 200) robots_file_path = os.path.join(self.static_dir, "robots.txt") - with open(robots_file_path) as f: + with open(robots_file_path, encoding="utf-8") as f: self.assertEqual(response.body, utf8(f.read())) self.assertEqual(response.headers.get("Content-Length"), "26") self.assertEqual(response.headers.get("Content-Range"), None) @@ -1282,7 +1282,7 @@ def test_static_with_range_full_past_end(self): ) self.assertEqual(response.code, 200) robots_file_path = os.path.join(self.static_dir, "robots.txt") - with open(robots_file_path) as f: + with open(robots_file_path, encoding="utf-8") as f: self.assertEqual(response.body, utf8(f.read())) self.assertEqual(response.headers.get("Content-Length"), "26") self.assertEqual(response.headers.get("Content-Range"), None) @@ -1293,7 +1293,7 @@ def test_static_with_range_partial_past_end(self): ) self.assertEqual(response.code, 206) robots_file_path = os.path.join(self.static_dir, "robots.txt") - with open(robots_file_path) as f: + with open(robots_file_path, encoding="utf-8") as f: self.assertEqual(response.body, utf8(f.read()[1:])) self.assertEqual(response.headers.get("Content-Length"), "25") self.assertEqual(response.headers.get("Content-Range"), "bytes 1-25/26") @@ -1320,7 +1320,7 @@ def test_static_with_range_neg_past_start(self): ) self.assertEqual(response.code, 200) robots_file_path = os.path.join(self.static_dir, "robots.txt") - with open(robots_file_path) as f: + with open(robots_file_path, encoding="utf-8") as f: self.assertEqual(response.body, utf8(f.read())) self.assertEqual(response.headers.get("Content-Length"), "26") self.assertEqual(response.headers.get("Content-Range"), None) diff --git a/tox.ini b/tox.ini index f7b5bd384d..73a14a7fd5 100644 --- a/tox.ini +++ b/tox.ini @@ -47,7 +47,7 @@ deps = setenv = # Treat the extension as mandatory in testing (but not on pypy) - {py3,py38,py39,py310,py311}: TORNADO_EXTENSION=1 + {py3,py38,py39,py310,py311,py312}: TORNADO_EXTENSION=1 # CI workers are often overloaded and can cause our tests to exceed # the default timeout of 5s. ASYNC_TEST_TIMEOUT=25 @@ -60,7 +60,10 @@ setenv = # possible to set environment variables during that phase of # tox). {py3,py38,py39,py310,py311,pypy3}: PYTHONWARNINGS=error:::tornado - + # Warn if we try to open a file with an unspecified encoding. + # (New in python 3.10, becomes obsolete when utf8 becomes the + # default in 3.15) + PYTHONWARNDEFAULTENCODING=1 # Allow shell commands in tests allowlist_externals = sh @@ -77,33 +80,17 @@ commands = # the trap of relying on an assertion's side effects or using # them for things that should be runtime errors. full: python -O -m tornado.test - # In python 3, opening files in text mode uses a - # system-dependent encoding by default. Run the tests with "C" - # (ascii) and "utf-8" locales to ensure we don't have hidden - # dependencies on this setting. - full: sh -c 'LANG=C python -m tornado.test' - full: sh -c 'LANG=en_US.utf-8 python -m tornado.test' # Note that httpclient_test is always run with both client # implementations; this flag controls which client all the # other tests use. full: python -m tornado.test --httpclient=tornado.curl_httpclient.CurlAsyncHTTPClient full: python -m tornado.test --resolver=tornado.platform.caresresolver.CaresResolver - # Run the tests once from the source directory to detect issues - # involving relative __file__ paths; see - # https://github.com/tornadoweb/tornado/issues/1780 - full: sh -c '(cd {toxinidir} && unset TORNADO_EXTENSION && python -m tornado.test)' - # python will import relative to the current working directory by default, # so cd into the tox working directory to avoid picking up the working # copy of the files (especially important for the speedups module). changedir = {toxworkdir} -# tox 1.6 passes --pre to pip by default, which currently has problems -# installing pycurl (https://github.com/pypa/pip/issues/1405). -# Remove it (it's not a part of {opts}) to only install real releases. -install_command = pip install {opts} {packages} - [testenv:docs] changedir = docs # For some reason the extension fails to load in this configuration, From b9eb764264c79cd2ec7b0c87796be3f50f46d32d Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 2 May 2023 12:54:20 -0400 Subject: [PATCH 05/70] ioloop: Deprecate add_callback_from_signal I don't believe this method is currently working as intended, and I'm not sure it ever has since the move to asyncio. I think this is responsible for occasional test failures in CI. Fixes #3225 --- maint/benchmark/benchmark.py | 35 ++++++++--------------------------- tornado/ioloop.py | 12 +++++++----- tornado/platform/asyncio.py | 1 + tornado/process.py | 14 ++++++-------- tornado/test/ioloop_test.py | 6 ++++-- 5 files changed, 26 insertions(+), 42 deletions(-) diff --git a/maint/benchmark/benchmark.py b/maint/benchmark/benchmark.py index d1f32d33ef..845c3ff2e8 100755 --- a/maint/benchmark/benchmark.py +++ b/maint/benchmark/benchmark.py @@ -14,18 +14,11 @@ # % sort time # % stats 20 -from tornado.ioloop import IOLoop from tornado.options import define, options, parse_command_line from tornado.web import RequestHandler, Application +import asyncio import random -import signal -import subprocess - -try: - xrange -except NameError: - xrange = range # choose a random port to avoid colliding with TIME_WAIT sockets left over # from previous runs. @@ -44,8 +37,6 @@ # --n=15000 for its JIT to reach full effectiveness define("num_runs", type=int, default=1) -define("ioloop", type=str, default=None) - class RootHandler(RequestHandler): def get(self): @@ -55,24 +46,16 @@ def _log(self): pass -def handle_sigchld(sig, frame): - IOLoop.current().add_callback_from_signal(IOLoop.current().stop) - - def main(): parse_command_line() - if options.ioloop: - IOLoop.configure(options.ioloop) - for i in xrange(options.num_runs): - run() + for i in range(options.num_runs): + asyncio.run(run()) -def run(): - io_loop = IOLoop(make_current=True) +async def run(): app = Application([("/", RootHandler)]) port = random.randrange(options.min_port, options.max_port) - app.listen(port, address='127.0.0.1') - signal.signal(signal.SIGCHLD, handle_sigchld) + app.listen(port, address="127.0.0.1") args = ["ab"] args.extend(["-n", str(options.n)]) args.extend(["-c", str(options.c)]) @@ -82,11 +65,9 @@ def run(): # just stops the progress messages printed to stderr args.append("-q") args.append("http://127.0.0.1:%d/" % port) - subprocess.Popen(args) - io_loop.start() - io_loop.close() - io_loop.clear_current() + proc = await asyncio.create_subprocess_exec(*args) + await proc.wait() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tornado/ioloop.py b/tornado/ioloop.py index bcdcca097b..6fbe9ee192 100644 --- a/tornado/ioloop.py +++ b/tornado/ioloop.py @@ -632,9 +632,6 @@ def add_callback(self, callback: Callable, *args: Any, **kwargs: Any) -> None: other interaction with the `IOLoop` must be done from that `IOLoop`'s thread. `add_callback()` may be used to transfer control from other threads to the `IOLoop`'s thread. - - To add a callback from a signal handler, see - `add_callback_from_signal`. """ raise NotImplementedError() @@ -643,8 +640,13 @@ def add_callback_from_signal( ) -> None: """Calls the given callback on the next I/O loop iteration. - Safe for use from a Python signal handler; should not be used - otherwise. + Intended to be afe for use from a Python signal handler; should not be + used otherwise. + + .. deprecated:: 6.4 + Use ``asyncio.AbstractEventLoop.add_signal_handler`` instead. + This method is suspected to have been broken since Tornado 5.0 and + will be removed in version 7.0. """ raise NotImplementedError() diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index a15a74df4d..5248c1f951 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -239,6 +239,7 @@ def add_callback(self, callback: Callable, *args: Any, **kwargs: Any) -> None: def add_callback_from_signal( self, callback: Callable, *args: Any, **kwargs: Any ) -> None: + warnings.warn("add_callback_from_signal is deprecated", DeprecationWarning) try: self.asyncio_loop.call_soon_threadsafe( self._run_callback, functools.partial(callback, *args, **kwargs) diff --git a/tornado/process.py b/tornado/process.py index 26428feb77..12e3eb648d 100644 --- a/tornado/process.py +++ b/tornado/process.py @@ -17,6 +17,7 @@ the server into multiple processes and managing subprocesses. """ +import asyncio import os import multiprocessing import signal @@ -210,7 +211,6 @@ class Subprocess(object): _initialized = False _waiting = {} # type: ignore - _old_sigchld = None def __init__(self, *args: Any, **kwargs: Any) -> None: self.io_loop = ioloop.IOLoop.current() @@ -322,11 +322,8 @@ def initialize(cls) -> None: """ if cls._initialized: return - io_loop = ioloop.IOLoop.current() - cls._old_sigchld = signal.signal( - signal.SIGCHLD, - lambda sig, frame: io_loop.add_callback_from_signal(cls._cleanup), - ) + loop = asyncio.get_event_loop() + loop.add_signal_handler(signal.SIGCHLD, cls._cleanup) cls._initialized = True @classmethod @@ -334,7 +331,8 @@ def uninitialize(cls) -> None: """Removes the ``SIGCHLD`` handler.""" if not cls._initialized: return - signal.signal(signal.SIGCHLD, cls._old_sigchld) + loop = asyncio.get_event_loop() + loop.remove_signal_handler(signal.SIGCHLD) cls._initialized = False @classmethod @@ -352,7 +350,7 @@ def _try_cleanup_process(cls, pid: int) -> None: return assert ret_pid == pid subproc = cls._waiting.pop(pid) - subproc.io_loop.add_callback_from_signal(subproc._set_returncode, status) + subproc.io_loop.add_callback(subproc._set_returncode, status) def _set_returncode(self, status: int) -> None: if sys.platform == "win32": diff --git a/tornado/test/ioloop_test.py b/tornado/test/ioloop_test.py index 7de392f83e..9485afeaeb 100644 --- a/tornado/test/ioloop_test.py +++ b/tornado/test/ioloop_test.py @@ -127,7 +127,8 @@ def test_remove_without_add(self): def test_add_callback_from_signal(self): # cheat a little bit and just run this normally, since we can't # easily simulate the races that happen with real signal handlers - self.io_loop.add_callback_from_signal(self.stop) + with ignore_deprecation(): + self.io_loop.add_callback_from_signal(self.stop) self.wait() def test_add_callback_from_signal_other_thread(self): @@ -137,7 +138,8 @@ def test_add_callback_from_signal_other_thread(self): other_ioloop = IOLoop() thread = threading.Thread(target=other_ioloop.start) thread.start() - other_ioloop.add_callback_from_signal(other_ioloop.stop) + with ignore_deprecation(): + other_ioloop.add_callback_from_signal(other_ioloop.stop) thread.join() other_ioloop.close() From 012499d33bfe958e5206227680a62aef831a197d Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 2 May 2023 19:12:04 -0400 Subject: [PATCH 06/70] ci: Enable manual dispatch for build workflow Replace the use of a special branch name for testing. --- .github/workflows/build.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fdf7784580..73bd458bc4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,12 +7,15 @@ name: Build on: push: branches: - # Run on release branches. + # Run on release branches. This gives us a chance to detect rot in this + # configuration before pushing a tag (which we'd rather not have to undo). - "branch[0-9]*" - # Also run on certain other branches for testing. - - "build-workflow*" tags: + # The main purpose of this workflow is to build wheels for release tags. + # It runs automatically on tags matching this pattern and pushes to pypi. - "v*" + workflow_dispatch: + # Allow this workflow to be run manually (pushing to testpypi instead of pypi) env: python-version: '3.9' @@ -68,7 +71,7 @@ jobs: name: Upload to PyPI (test) needs: [build_wheels, build_sdist] runs-on: ubuntu-22.04 - if: github.repository == 'tornadoweb/tornado' && github.event_name == 'push' && startsWith(github.ref_name, 'build-workflow') + if: github.repository == 'tornadoweb/tornado' && github.event_name == 'workflow_dispatch' steps: - uses: actions/download-artifact@v3 with: From f40f8e75a3927b3d3a2ff0768fb74ca24d721932 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 3 May 2023 12:46:28 -0400 Subject: [PATCH 07/70] setup: Include tox.ini in sdist Also remove the demos directory from sdist. This inclusion was incomplete and even if it were incomplete I don't think the sdist is a great way to distribute these demos. Fixes #3253 --- MANIFEST.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index d99e4bb930..b99a2e2c82 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ -recursive-include demos *.py *.yaml *.html *.css *.js *.xml *.sql README recursive-include docs * prune docs/build include tornado/py.typed @@ -19,4 +18,7 @@ include tornado/test/test.crt include tornado/test/test.key include LICENSE include README.rst +include requirements.in +include requirements.txt include runtests.sh +include tox.ini From 321a9cdcd8f42e54aacd3ec5e865c64cd3530179 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sun, 7 May 2023 17:03:33 -0400 Subject: [PATCH 08/70] websocket: Add warning if client connection isn't closed cleanly This gives a warning that is not dependent on GC for the issue in #3257. This new warning covers all websocket client connections, while the previous GC-dependent warning only affected those with ping_interval set. This unfortunately introduces an effective requirement to close all websocket clients explicitly for those who are strict about warnings. --- tornado/test/websocket_test.py | 83 ++++++++++++++++++++++------------ tornado/websocket.py | 10 ++++ 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/tornado/test/websocket_test.py b/tornado/test/websocket_test.py index 0a29ae6460..4d39f37046 100644 --- a/tornado/test/websocket_test.py +++ b/tornado/test/websocket_test.py @@ -1,4 +1,5 @@ import asyncio +import contextlib import functools import socket import traceback @@ -213,11 +214,21 @@ def open(self): class WebSocketBaseTestCase(AsyncHTTPTestCase): + def setUp(self): + super().setUp() + self.conns_to_close = [] + + def tearDown(self): + for conn in self.conns_to_close: + conn.close() + super().tearDown() + @gen.coroutine def ws_connect(self, path, **kwargs): ws = yield websocket_connect( "ws://127.0.0.1:%d%s" % (self.get_http_port(), path), **kwargs ) + self.conns_to_close.append(ws) raise gen.Return(ws) @@ -397,39 +408,49 @@ def test_websocket_network_fail(self): @gen_test def test_websocket_close_buffered_data(self): - ws = yield websocket_connect("ws://127.0.0.1:%d/echo" % self.get_http_port()) - ws.write_message("hello") - ws.write_message("world") - # Close the underlying stream. - ws.stream.close() + with contextlib.closing( + (yield websocket_connect("ws://127.0.0.1:%d/echo" % self.get_http_port())) + ) as ws: + ws.write_message("hello") + ws.write_message("world") + # Close the underlying stream. + ws.stream.close() @gen_test def test_websocket_headers(self): # Ensure that arbitrary headers can be passed through websocket_connect. - ws = yield websocket_connect( - HTTPRequest( - "ws://127.0.0.1:%d/header" % self.get_http_port(), - headers={"X-Test": "hello"}, + with contextlib.closing( + ( + yield websocket_connect( + HTTPRequest( + "ws://127.0.0.1:%d/header" % self.get_http_port(), + headers={"X-Test": "hello"}, + ) + ) ) - ) - response = yield ws.read_message() - self.assertEqual(response, "hello") + ) as ws: + response = yield ws.read_message() + self.assertEqual(response, "hello") @gen_test def test_websocket_header_echo(self): # Ensure that headers can be returned in the response. # Specifically, that arbitrary headers passed through websocket_connect # can be returned. - ws = yield websocket_connect( - HTTPRequest( - "ws://127.0.0.1:%d/header_echo" % self.get_http_port(), - headers={"X-Test-Hello": "hello"}, + with contextlib.closing( + ( + yield websocket_connect( + HTTPRequest( + "ws://127.0.0.1:%d/header_echo" % self.get_http_port(), + headers={"X-Test-Hello": "hello"}, + ) + ) + ) + ) as ws: + self.assertEqual(ws.headers.get("X-Test-Hello"), "hello") + self.assertEqual( + ws.headers.get("X-Extra-Response-Header"), "Extra-Response-Value" ) - ) - self.assertEqual(ws.headers.get("X-Test-Hello"), "hello") - self.assertEqual( - ws.headers.get("X-Extra-Response-Header"), "Extra-Response-Value" - ) @gen_test def test_server_close_reason(self): @@ -495,10 +516,12 @@ def test_check_origin_valid_no_path(self): url = "ws://127.0.0.1:%d/echo" % port headers = {"Origin": "http://127.0.0.1:%d" % port} - ws = yield websocket_connect(HTTPRequest(url, headers=headers)) - ws.write_message("hello") - response = yield ws.read_message() - self.assertEqual(response, "hello") + with contextlib.closing( + (yield websocket_connect(HTTPRequest(url, headers=headers))) + ) as ws: + ws.write_message("hello") + response = yield ws.read_message() + self.assertEqual(response, "hello") @gen_test def test_check_origin_valid_with_path(self): @@ -507,10 +530,12 @@ def test_check_origin_valid_with_path(self): url = "ws://127.0.0.1:%d/echo" % port headers = {"Origin": "http://127.0.0.1:%d/something" % port} - ws = yield websocket_connect(HTTPRequest(url, headers=headers)) - ws.write_message("hello") - response = yield ws.read_message() - self.assertEqual(response, "hello") + with contextlib.closing( + (yield websocket_connect(HTTPRequest(url, headers=headers))) + ) as ws: + ws.write_message("hello") + response = yield ws.read_message() + self.assertEqual(response, "hello") @gen_test def test_check_origin_invalid_partial_url(self): diff --git a/tornado/websocket.py b/tornado/websocket.py index d0abd42595..165cc316d6 100644 --- a/tornado/websocket.py +++ b/tornado/websocket.py @@ -20,6 +20,7 @@ import struct import tornado from urllib.parse import urlparse +import warnings import zlib from tornado.concurrent import Future, future_set_result_unless_cancelled @@ -1410,6 +1411,15 @@ def __init__( 104857600, ) + def __del__(self) -> None: + if self.protocol is not None: + # Unclosed client connections can sometimes log "task was destroyed but + # was pending" warnings if shutdown strikes at the wrong time (such as + # while a ping is being processed due to ping_interval). Log our own + # warning to make it a little more deterministic (although it's still + # dependent on GC timing). + warnings.warn("Unclosed WebSocketClientConnection", ResourceWarning) + def close(self, code: Optional[int] = None, reason: Optional[str] = None) -> None: """Closes the websocket connection. From 8f35b31ab82f5b665460966cec7c1bef323137c1 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 13 May 2023 20:58:52 -0400 Subject: [PATCH 09/70] web: Fix an open redirect in StaticFileHandler Under some configurations the default_filename redirect could be exploited to redirect to an attacker-controlled site. This change refuses to redirect to URLs that could be misinterpreted. A test case for the specific vulnerable configuration will follow after the patch has been available. --- tornado/web.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tornado/web.py b/tornado/web.py index 3b676e3c25..565140493e 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -2879,6 +2879,15 @@ def validate_absolute_path(self, root: str, absolute_path: str) -> Optional[str] # but there is some prefix to the path that was already # trimmed by the routing if not self.request.path.endswith("/"): + if self.request.path.startswith("//"): + # A redirect with two initial slashes is a "protocol-relative" URL. + # This means the next path segment is treated as a hostname instead + # of a part of the path, making this effectively an open redirect. + # Reject paths starting with two slashes to prevent this. + # This is only reachable under certain configurations. + raise HTTPError( + 403, "cannot redirect path with two initial slashes" + ) self.redirect(self.request.path + "/", permanent=True) return None absolute_path = os.path.join(absolute_path, self.default_filename) From f41d78dccb55d7ff632dfdb62024cc9934e6a024 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 13 May 2023 21:31:48 -0400 Subject: [PATCH 10/70] Version 6.3.2 --- docs/releases.rst | 1 + docs/releases/v6.3.2.rst | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 docs/releases/v6.3.2.rst diff --git a/docs/releases.rst b/docs/releases.rst index dd53b12ffe..fc7e41654f 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 2 + releases/v6.3.2 releases/v6.3.1 releases/v6.3.0 releases/v6.2.0 diff --git a/docs/releases/v6.3.2.rst b/docs/releases/v6.3.2.rst new file mode 100644 index 0000000000..250a6e4eb4 --- /dev/null +++ b/docs/releases/v6.3.2.rst @@ -0,0 +1,11 @@ +What's new in Tornado 6.3.2 +=========================== + +May 13, 2023 +------------ + +Security improvements +~~~~~~~~~~~~~~~~~~~~~ + +- Fixed an open redirect vulnerability in StaticFileHandler under certain + configurations. \ No newline at end of file From bffed1aaed56ca29eb1dd261afc2a8bb1380cd7a Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sun, 14 May 2023 21:03:52 -0400 Subject: [PATCH 11/70] gen: Hold strong references to all asyncio.Tasks Per the warning in the asyncio documentation, we need to hold a strong reference to all asyncio Tasks to prevent premature GC. Following discussions in cpython (https://github.com/python/cpython/issues/91887), we hold these references on the IOLoop instance to ensure that they are strongly held but do not cause leaks if the event loop itself is discarded. This is expected to fix all of the various "task was destroyed but it is pending" warnings that have been reported. The IOLoop._pending_tasks set is expected to become obsolete if corresponding changes are made to asyncio in Python 3.13. Fixes #3209 Fixes #3047 Fixes #2763 Some issues involve this warning as their most visible symptom, but have an underlying cause that should still be addressed. Updates #2914 Updates #2356 --- tornado/gen.py | 18 +++++++++++------- tornado/ioloop.py | 20 +++++++++++++++++++- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tornado/gen.py b/tornado/gen.py index 4819b85715..dab4fd09db 100644 --- a/tornado/gen.py +++ b/tornado/gen.py @@ -840,13 +840,17 @@ def handle_exception( return False -# Convert Awaitables into Futures. -try: - _wrap_awaitable = asyncio.ensure_future -except AttributeError: - # asyncio.ensure_future was introduced in Python 3.4.4, but - # Debian jessie still ships with 3.4.2 so try the old name. - _wrap_awaitable = getattr(asyncio, "async") +def _wrap_awaitable(awaitable: Awaitable) -> Future: + # Convert Awaitables into Futures. + # Note that we use ensure_future, which handles both awaitables + # and coroutines, rather than create_task, which only accepts + # coroutines. (ensure_future calls create_task if given a coroutine) + fut = asyncio.ensure_future(awaitable) + # See comments on IOLoop._pending_tasks. + loop = IOLoop.current() + loop._register_task(fut) + fut.add_done_callback(lambda f: loop._unregister_task(f)) + return fut def convert_yielded(yielded: _Yieldable) -> Future: diff --git a/tornado/ioloop.py b/tornado/ioloop.py index 6fbe9ee192..450fbf9484 100644 --- a/tornado/ioloop.py +++ b/tornado/ioloop.py @@ -50,7 +50,7 @@ from typing import Union, Any, Type, Optional, Callable, TypeVar, Tuple, Awaitable if typing.TYPE_CHECKING: - from typing import Dict, List # noqa: F401 + from typing import Dict, List, Set # noqa: F401 from typing_extensions import Protocol else: @@ -159,6 +159,18 @@ async def main(): # In Python 3, _ioloop_for_asyncio maps from asyncio loops to IOLoops. _ioloop_for_asyncio = dict() # type: Dict[asyncio.AbstractEventLoop, IOLoop] + # Maintain a set of all pending tasks to follow the warning in the docs + # of asyncio.create_tasks: + # https://docs.python.org/3.11/library/asyncio-task.html#asyncio.create_task + # This ensures that all pending tasks have a strong reference so they + # will not be garbage collected before they are finished. + # (Thus avoiding "task was destroyed but it is pending" warnings) + # An analogous change has been proposed in cpython for 3.13: + # https://github.com/python/cpython/issues/91887 + # If that change is accepted, this can eventually be removed. + # If it is not, we will consider the rationale and may remove this. + _pending_tasks = set() # type: Set[Future] + @classmethod def configure( cls, impl: "Union[None, str, Type[Configurable]]", **kwargs: Any @@ -805,6 +817,12 @@ def close_fd(self, fd: Union[int, _Selectable]) -> None: except OSError: pass + def _register_task(self, f: Future) -> None: + self._pending_tasks.add(f) + + def _unregister_task(self, f: Future) -> None: + self._pending_tasks.discard(f) + class _Timeout(object): """An IOLoop timeout, a UNIX timestamp and a callback""" From 0fdb39cb3376296a035198eb3e8f724a277229c6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 May 2023 05:57:30 +0000 Subject: [PATCH 12/70] build(deps): bump requests from 2.28.2 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 349db21a4f..4ab4ed529c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -83,7 +83,7 @@ pyproject-hooks==1.0.0 # via build pytz==2022.7.1 # via babel -requests==2.28.2 +requests==2.31.0 # via sphinx snowballstemmer==2.2.0 # via sphinx @@ -108,13 +108,6 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -tomli==2.0.1 - # via - # black - # build - # mypy - # pyproject-api - # tox tox==4.3.5 # via -r requirements.in types-pycurl==7.45.2.0 From 7640b8c37a0e2612808c99367a45e1c1f1d97ab5 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 14 Jun 2023 21:31:32 -0400 Subject: [PATCH 13/70] ci: Disable Python 3.12 in CI Current betas have a bug in GzipFile we can't easily work around. https://github.com/python/cpython/issues/105808 --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ff38e66520..94be6fe985 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,8 +51,9 @@ jobs: tox_env: py311-full - python: '3.11.0' tox_env: py311-full - - python: '3.12.0-alpha - 3.12' - tox_env: py312-full + # 3.12 is disabled until https://github.com/python/cpython/issues/105808 is fixed + #- python: '3.12.0-alpha - 3.12' + # tox_env: py312-full - python: 'pypy-3.8' # Pypy is a lot slower due to jit warmup costs, so don't run the # "full" test config there. From 3cca32d593263922030060b7f4781bcb1367ec2a Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 16 May 2023 20:57:50 -0400 Subject: [PATCH 14/70] asyncio: Manage the selector thread with an async generator Async generators have a special shutdown protocol which allows us to detect the end of the event loop and stop our thread. This lets us clean up the thread reliably when the event loop is started/stopped via the tornado IOLoop interfaces (which explicitly know about the selector thread), or when the latest asyncio interfaces are used (asyncio.run or manually calling shutdown_asyncgens). The thread is still leaked when older versions of the asyncio interfaces are used (loop.close *without* shutdown_asyncgens), but I've been unable to find a solution that does not print leak warnings even in the event of a clean shutdown. Use of shutdown_asyncgens is now effectively required for apps combining asyncio and tornado. This is unfortunate since leaking a thread is relatively expensive compared to the usual consequences of failing to call shutdown_asyncgens, but it seems to be the best we can do. Fixes #3173 --- tornado/platform/asyncio.py | 77 +++++++++++++++++++++++------------- tornado/test/asyncio_test.py | 51 ++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 28 deletions(-) diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 2e95b801a3..8f30b2add1 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -64,11 +64,12 @@ def _atexit_callback() -> None: loop._waker_w.send(b"a") except BlockingIOError: pass - # If we don't join our (daemon) thread here, we may get a deadlock - # during interpreter shutdown. I don't really understand why. This - # deadlock happens every time in CI (both travis and appveyor) but - # I've never been able to reproduce locally. - loop._thread.join() + if loop._thread is not None: + # If we don't join our (daemon) thread here, we may get a deadlock + # during interpreter shutdown. I don't really understand why. This + # deadlock happens every time in CI (both travis and appveyor) but + # I've never been able to reproduce locally. + loop._thread.join() _selector_loops.clear() @@ -443,24 +444,25 @@ class SelectorThread: def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None: self._real_loop = real_loop - # Create a thread to run the select system call. We manage this thread - # manually so we can trigger a clean shutdown from an atexit hook. Note - # that due to the order of operations at shutdown, only daemon threads - # can be shut down in this way (non-daemon threads would require the - # introduction of a new hook: https://bugs.python.org/issue41962) self._select_cond = threading.Condition() self._select_args = ( None ) # type: Optional[Tuple[List[_FileDescriptorLike], List[_FileDescriptorLike]]] self._closing_selector = False - self._thread = threading.Thread( - name="Tornado selector", - daemon=True, - target=self._run_select, + self._thread = None # type: Optional[threading.Thread] + self._thread_manager_handle = self._thread_manager() + + async def thread_manager_anext() -> None: + # the anext builtin wasn't added until 3.10. We just need to iterate + # this generator one step. + await self._thread_manager_handle.__anext__() + + # When the loop starts, start the thread. Not too soon because we can't + # clean up if we get to this point but the event loop is closed without + # starting. + self._real_loop.call_soon( + lambda: self._real_loop.create_task(thread_manager_anext()) ) - self._thread.start() - # Start the select loop once the loop is started. - self._real_loop.call_soon(self._start_select) self._readers = {} # type: Dict[_FileDescriptorLike, Callable] self._writers = {} # type: Dict[_FileDescriptorLike, Callable] @@ -473,16 +475,6 @@ def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None: _selector_loops.add(self) self.add_reader(self._waker_r, self._consume_waker) - def __del__(self) -> None: - # If the top-level application code uses asyncio interfaces to - # start and stop the event loop, no objects created in Tornado - # can get a clean shutdown notification. If we're just left to - # be GC'd, we must explicitly close our sockets to avoid - # logging warnings. - _selector_loops.discard(self) - self._waker_r.close() - self._waker_w.close() - def close(self) -> None: if self._closed: return @@ -490,13 +482,42 @@ def close(self) -> None: self._closing_selector = True self._select_cond.notify() self._wake_selector() - self._thread.join() + if self._thread is not None: + self._thread.join() _selector_loops.discard(self) + self.remove_reader(self._waker_r) self._waker_r.close() self._waker_w.close() self._closed = True + async def _thread_manager(self) -> typing.AsyncGenerator[None, None]: + # Create a thread to run the select system call. We manage this thread + # manually so we can trigger a clean shutdown from an atexit hook. Note + # that due to the order of operations at shutdown, only daemon threads + # can be shut down in this way (non-daemon threads would require the + # introduction of a new hook: https://bugs.python.org/issue41962) + self._thread = threading.Thread( + name="Tornado selector", + daemon=True, + target=self._run_select, + ) + self._thread.start() + self._start_select() + try: + # The presense of this yield statement means that this coroutine + # is actually an asynchronous generator, which has a special + # shutdown protocol. We wait at this yield point until the + # event loop's shutdown_asyncgens method is called, at which point + # we will get a GeneratorExit exception and can shut down the + # selector thread. + yield + except GeneratorExit: + self.close() + raise + def _wake_selector(self) -> None: + if self._closed: + return try: self._waker_w.send(b"a") except BlockingIOError: diff --git a/tornado/test/asyncio_test.py b/tornado/test/asyncio_test.py index f6b719bb43..e4c603aedf 100644 --- a/tornado/test/asyncio_test.py +++ b/tornado/test/asyncio_test.py @@ -11,6 +11,8 @@ # under the License. import asyncio +import threading +import time import unittest import warnings @@ -157,6 +159,55 @@ def test_asyncio_close_leak(self): self.assertEqual(new_count, 1) +class SelectorThreadLeakTest(unittest.TestCase): + # These tests are only relevant on windows, but they should pass anywhere. + def setUp(self): + # As a precaution, ensure that we've run an event loop at least once + # so if it spins up any singleton threads they're already there. + asyncio.run(self.dummy_tornado_coroutine()) + self.orig_thread_count = threading.active_count() + + def assert_no_thread_leak(self): + # For some reason we see transient failures here, but I haven't been able + # to catch it to identify which thread is causing it. Whatever thread it + # is, it appears to quickly clean up on its own, so just retry a few times. + deadline = time.time() + 1 + while time.time() < deadline: + threads = list(threading.enumerate()) + if len(threads) == self.orig_thread_count: + break + time.sleep(0.1) + self.assertEqual(self.orig_thread_count, len(threads), threads) + + async def dummy_tornado_coroutine(self): + # Just access the IOLoop to initialize the selector thread. + IOLoop.current() + + def test_asyncio_run(self): + for i in range(10): + # asyncio.run calls shutdown_asyncgens for us. + asyncio.run(self.dummy_tornado_coroutine()) + self.assert_no_thread_leak() + + def test_asyncio_manual(self): + for i in range(10): + loop = asyncio.new_event_loop() + loop.run_until_complete(self.dummy_tornado_coroutine()) + # Without this step, we'd leak the thread. + loop.run_until_complete(loop.shutdown_asyncgens()) + loop.close() + self.assert_no_thread_leak() + + def test_tornado(self): + for i in range(10): + # The IOLoop interfaces are aware of the selector thread and + # (synchronously) shut it down. + loop = IOLoop(make_current=False) + loop.run_sync(self.dummy_tornado_coroutine) + loop.close() + self.assert_no_thread_leak() + + class AnyThreadEventLoopPolicyTest(unittest.TestCase): def setUp(self): self.orig_policy = asyncio.get_event_loop_policy() From 6c7d44bde933632506e9f74cbd5a4950bbbe2926 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 19 Jun 2023 15:24:54 -0400 Subject: [PATCH 15/70] asyncio: Modernize type annotations --- tornado/platform/asyncio.py | 68 +++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 8f30b2add1..679068e69b 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -36,23 +36,33 @@ from tornado.gen import convert_yielded from tornado.ioloop import IOLoop, _Selectable -from typing import Any, TypeVar, Awaitable, Callable, Union, Optional, List, Dict - -if typing.TYPE_CHECKING: - from typing import Set, Tuple # noqa: F401 - from typing_extensions import Protocol +from typing import ( + Any, + Awaitable, + Callable, + Dict, + List, + Optional, + Protocol, + Set, + Tuple, + TypeVar, + Union, +) + + +class _HasFileno(Protocol): + def fileno(self) -> int: + pass - class _HasFileno(Protocol): - def fileno(self) -> int: - pass - _FileDescriptorLike = Union[int, _HasFileno] +_FileDescriptorLike = Union[int, _HasFileno] _T = TypeVar("_T") # Collection of selector thread event loops to shut down on exit. -_selector_loops = set() # type: Set[SelectorThread] +_selector_loops: Set["SelectorThread"] = set() def _atexit_callback() -> None: @@ -88,16 +98,16 @@ def initialize( # type: ignore # as windows where the default event loop does not implement these methods. self.selector_loop = asyncio_loop if hasattr(asyncio, "ProactorEventLoop") and isinstance( - asyncio_loop, asyncio.ProactorEventLoop # type: ignore + asyncio_loop, asyncio.ProactorEventLoop ): # Ignore this line for mypy because the abstract method checker # doesn't understand dynamic proxies. self.selector_loop = AddThreadSelectorEventLoop(asyncio_loop) # type: ignore # Maps fd to (fileobj, handler function) pair (as in IOLoop.add_handler) - self.handlers = {} # type: Dict[int, Tuple[Union[int, _Selectable], Callable]] + self.handlers: Dict[int, Tuple[Union[int, _Selectable], Callable]] = {} # Set of fds listening for reads/writes - self.readers = set() # type: Set[int] - self.writers = set() # type: Set[int] + self.readers: Set[int] = set() + self.writers: Set[int] = set() self.closing = False # If an asyncio loop was closed through an asyncio interface # instead of IOLoop.close(), we'd never hear about it and may @@ -419,9 +429,7 @@ def __init__(self) -> None: def get_event_loop(self) -> asyncio.AbstractEventLoop: try: return super().get_event_loop() - except (RuntimeError, AssertionError): - # This was an AssertionError in Python 3.4.2 (which ships with Debian Jessie) - # and changed to a RuntimeError in 3.4.3. + except RuntimeError: # "There is no current event loop in thread %r" loop = self.new_event_loop() self.set_event_loop(loop) @@ -445,11 +453,11 @@ def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None: self._real_loop = real_loop self._select_cond = threading.Condition() - self._select_args = ( - None - ) # type: Optional[Tuple[List[_FileDescriptorLike], List[_FileDescriptorLike]]] + self._select_args: Optional[ + Tuple[List[_FileDescriptorLike], List[_FileDescriptorLike]] + ] = None self._closing_selector = False - self._thread = None # type: Optional[threading.Thread] + self._thread: Optional[threading.Thread] = None self._thread_manager_handle = self._thread_manager() async def thread_manager_anext() -> None: @@ -464,8 +472,8 @@ async def thread_manager_anext() -> None: lambda: self._real_loop.create_task(thread_manager_anext()) ) - self._readers = {} # type: Dict[_FileDescriptorLike, Callable] - self._writers = {} # type: Dict[_FileDescriptorLike, Callable] + self._readers: Dict[_FileDescriptorLike, Callable] = {} + self._writers: Dict[_FileDescriptorLike, Callable] = {} # Writing to _waker_w will wake up the selector thread, which # watches for _waker_r to be readable. @@ -603,7 +611,7 @@ def _run_select(self) -> None: pass def _handle_select( - self, rs: List["_FileDescriptorLike"], ws: List["_FileDescriptorLike"] + self, rs: List[_FileDescriptorLike], ws: List[_FileDescriptorLike] ) -> None: for r in rs: self._handle_event(r, self._readers) @@ -613,8 +621,8 @@ def _handle_select( def _handle_event( self, - fd: "_FileDescriptorLike", - cb_map: Dict["_FileDescriptorLike", Callable], + fd: _FileDescriptorLike, + cb_map: Dict[_FileDescriptorLike, Callable], ) -> None: try: callback = cb_map[fd] @@ -623,18 +631,18 @@ def _handle_event( callback() def add_reader( - self, fd: "_FileDescriptorLike", callback: Callable[..., None], *args: Any + self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any ) -> None: self._readers[fd] = functools.partial(callback, *args) self._wake_selector() def add_writer( - self, fd: "_FileDescriptorLike", callback: Callable[..., None], *args: Any + self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any ) -> None: self._writers[fd] = functools.partial(callback, *args) self._wake_selector() - def remove_reader(self, fd: "_FileDescriptorLike") -> bool: + def remove_reader(self, fd: _FileDescriptorLike) -> bool: try: del self._readers[fd] except KeyError: @@ -642,7 +650,7 @@ def remove_reader(self, fd: "_FileDescriptorLike") -> bool: self._wake_selector() return True - def remove_writer(self, fd: "_FileDescriptorLike") -> bool: + def remove_writer(self, fd: _FileDescriptorLike) -> bool: try: del self._writers[fd] except KeyError: From 78c2acb33539374ebf1d90e0c1a5b15bdba58cf1 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 19 Jun 2023 15:28:45 -0400 Subject: [PATCH 16/70] asyncio_test: Use inequality when checking thread leaks Sometimes we have a net reduction in the thread count because there was an extra thread running at the time captured the starting count, so use inequality instead of exact matches. --- tornado/test/asyncio_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tornado/test/asyncio_test.py b/tornado/test/asyncio_test.py index e4c603aedf..4eda9395c8 100644 --- a/tornado/test/asyncio_test.py +++ b/tornado/test/asyncio_test.py @@ -171,13 +171,15 @@ def assert_no_thread_leak(self): # For some reason we see transient failures here, but I haven't been able # to catch it to identify which thread is causing it. Whatever thread it # is, it appears to quickly clean up on its own, so just retry a few times. + # At least some of the time the errant thread was running at the time we + # captured self.orig_thread_count, so use inequalities. deadline = time.time() + 1 while time.time() < deadline: threads = list(threading.enumerate()) - if len(threads) == self.orig_thread_count: + if len(threads) <= self.orig_thread_count: break time.sleep(0.1) - self.assertEqual(self.orig_thread_count, len(threads), threads) + self.assertLessEqual(len(threads), self.orig_thread_count, threads) async def dummy_tornado_coroutine(self): # Just access the IOLoop to initialize the selector thread. From 2405a21d4d46d05000d15f51d25fa9904eebdfc4 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 19 Jun 2023 15:30:48 -0400 Subject: [PATCH 17/70] asyncio_test: Remove obsolete py3.4 compatibility --- tornado/test/asyncio_test.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tornado/test/asyncio_test.py b/tornado/test/asyncio_test.py index 4eda9395c8..bb6416a549 100644 --- a/tornado/test/asyncio_test.py +++ b/tornado/test/asyncio_test.py @@ -44,15 +44,8 @@ async def add_callback(): @gen_test def test_asyncio_future(self): # Test that we can yield an asyncio future from a tornado coroutine. - # Without 'yield from', we must wrap coroutines in ensure_future, - # which was introduced during Python 3.4, deprecating the prior "async". - if hasattr(asyncio, "ensure_future"): - ensure_future = asyncio.ensure_future - else: - # async is a reserved word in Python 3.7 - ensure_future = getattr(asyncio, "async") - - x = yield ensure_future( + # Without 'yield from', we must wrap coroutines in ensure_future. + x = yield asyncio.ensure_future( asyncio.get_event_loop().run_in_executor(None, lambda: 42) ) self.assertEqual(x, 42) From acde88d5e7c5b60e6fda42a8cc8ad09d1815b95a Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 21 Jun 2023 20:53:19 -0400 Subject: [PATCH 18/70] ci: Re-enable python 3.12 Now that python/cpython#105808 is fixed in beta 3. --- .github/workflows/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 94be6fe985..bd94444230 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,9 +51,8 @@ jobs: tox_env: py311-full - python: '3.11.0' tox_env: py311-full - # 3.12 is disabled until https://github.com/python/cpython/issues/105808 is fixed - #- python: '3.12.0-alpha - 3.12' - # tox_env: py312-full + - python: '3.12.0-beta.3 - 3.12' + tox_env: py312-full - python: 'pypy-3.8' # Pypy is a lot slower due to jit warmup costs, so don't run the # "full" test config there. From 4c6b8cacf56594760f80dc3cf4e2b6115cf8dc87 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 6 Jun 2023 23:25:38 -0400 Subject: [PATCH 19/70] build: Upgrade tox to support python 3.12 --- requirements.txt | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4ab4ed529c..a2d1ebc435 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ black==22.12.0 # via -r requirements.in build==0.10.0 # via pip-tools -cachetools==5.2.1 +cachetools==5.3.1 # via tox certifi==2022.12.7 # via requests @@ -32,7 +32,7 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme -filelock==3.9.0 +filelock==3.12.0 # via # tox # virtualenv @@ -54,7 +54,7 @@ mypy-extensions==0.4.3 # via # black # mypy -packaging==23.0 +packaging==23.1 # via # build # pyproject-api @@ -64,7 +64,7 @@ pathspec==0.10.3 # via black pip-tools==6.12.1 # via -r requirements.in -platformdirs==2.6.2 +platformdirs==3.5.1 # via # black # tox @@ -77,7 +77,7 @@ pyflakes==3.0.1 # via flake8 pygments==2.14.0 # via sphinx -pyproject-api==1.5.0 +pyproject-api==1.5.1 # via tox pyproject-hooks==1.0.0 # via build @@ -108,7 +108,14 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -tox==4.3.5 +tomli==2.0.1 + # via + # black + # build + # mypy + # pyproject-api + # tox +tox==4.6.0 # via -r requirements.in types-pycurl==7.45.2.0 # via -r requirements.in @@ -116,7 +123,7 @@ typing-extensions==4.4.0 # via mypy urllib3==1.26.14 # via requests -virtualenv==20.17.1 +virtualenv==20.23.0 # via tox wheel==0.38.4 # via pip-tools From 4d4d80c1d076dcb4e051c969ae4b66557d3856b8 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 8 Jun 2023 22:52:19 -0400 Subject: [PATCH 20/70] *: Adapt to deprecation of datetime utc methods Python 3.12 deprecates the utcnow and utcfromtimestamp methods and discourages the use of naive datetimes to represent UTC. This was previously the main way that Tornado used datetimes (since it was the only option available in Python 2 before the introduction of datetime.timezone.utc in Python 3.2). - httpclient_test: Test-only change to test that both kinds of datetimes are supported in If-Modified-Since (this just calls httputil.format_timestamp) - httputil: No functional changes, but format_timestamp's support for both naive and aware datetimes is now tested. - locale: format_timestamp now supports aware datetimes (in addition to the existing support for naive datetimes). - web: Cookie expirations internally use aware datetimes. StaticFileHandler.get_modified_time now supports both and the standard implementation returns aware. It feels fragile that "naive" and "aware" datetimes are not distinct types but subject to data-dependent behavior. This change uses "aware" datetimes throughout Tornado, but some operations (comparisons and subtraction) fail with mixed datetime types and if I missed any in this change may cause errors if naive datetimes were used (where previously naive datetimes would have been required). But that's apparently the API we have to work with. --- tornado/httputil.py | 3 +- tornado/locale.py | 12 +++-- tornado/test/httpclient_test.py | 12 ++++- tornado/test/httputil_test.py | 26 +++++++++- tornado/test/locale_test.py | 88 ++++++++++++++++++--------------- tornado/test/web_test.py | 25 +++++----- tornado/web.py | 28 +++++++---- 7 files changed, 126 insertions(+), 68 deletions(-) diff --git a/tornado/httputil.py b/tornado/httputil.py index 9c341d47cc..b21d8046c4 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -856,7 +856,8 @@ def format_timestamp( The argument may be a numeric timestamp as returned by `time.time`, a time tuple as returned by `time.gmtime`, or a `datetime.datetime` - object. + object. Naive `datetime.datetime` objects are assumed to represent + UTC; aware objects are converted to UTC before formatting. >>> format_timestamp(1359312200) 'Sun, 27 Jan 2013 18:43:20 GMT' diff --git a/tornado/locale.py b/tornado/locale.py index 55072af28d..c5526703b1 100644 --- a/tornado/locale.py +++ b/tornado/locale.py @@ -333,7 +333,7 @@ def format_date( shorter: bool = False, full_format: bool = False, ) -> str: - """Formats the given date (which should be GMT). + """Formats the given date. By default, we return a relative time (e.g., "2 minutes ago"). You can return an absolute date string with ``relative=False``. @@ -343,10 +343,16 @@ def format_date( This method is primarily intended for dates in the past. For dates in the future, we fall back to full format. + + .. versionchanged:: 6.4 + Aware `datetime.datetime` objects are now supported (naive + datetimes are still assumed to be UTC). """ if isinstance(date, (int, float)): - date = datetime.datetime.utcfromtimestamp(date) - now = datetime.datetime.utcnow() + date = datetime.datetime.fromtimestamp(date, datetime.timezone.utc) + if date.tzinfo is None: + date = date.replace(tzinfo=datetime.timezone.utc) + now = datetime.datetime.now(datetime.timezone.utc) if date > now: if relative and (date - now).seconds < 60: # Due to click skew, things are some things slightly diff --git a/tornado/test/httpclient_test.py b/tornado/test/httpclient_test.py index a71ec0afb6..a41040e64a 100644 --- a/tornado/test/httpclient_test.py +++ b/tornado/test/httpclient_test.py @@ -28,7 +28,7 @@ from tornado.log import gen_log, app_log from tornado import netutil from tornado.testing import AsyncHTTPTestCase, bind_unused_port, gen_test, ExpectLog -from tornado.test.util import skipOnTravis +from tornado.test.util import skipOnTravis, ignore_deprecation from tornado.web import Application, RequestHandler, url from tornado.httputil import format_timestamp, HTTPHeaders @@ -887,7 +887,15 @@ def test_body_setter(self): self.assertEqual(request.body, utf8("foo")) def test_if_modified_since(self): - http_date = datetime.datetime.utcnow() + http_date = datetime.datetime.now(datetime.timezone.utc) + request = HTTPRequest("http://example.com", if_modified_since=http_date) + self.assertEqual( + request.headers, {"If-Modified-Since": format_timestamp(http_date)} + ) + + def test_if_modified_since_naive_deprecated(self): + with ignore_deprecation(): + http_date = datetime.datetime.utcnow() request = HTTPRequest("http://example.com", if_modified_since=http_date) self.assertEqual( request.headers, {"If-Modified-Since": format_timestamp(http_date)} diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py index 8424491d87..aa9b6ee253 100644 --- a/tornado/test/httputil_test.py +++ b/tornado/test/httputil_test.py @@ -13,6 +13,7 @@ from tornado.escape import utf8, native_str from tornado.log import gen_log from tornado.testing import ExpectLog +from tornado.test.util import ignore_deprecation import copy import datetime @@ -412,8 +413,29 @@ def test_time_tuple(self): self.assertEqual(9, len(tup)) self.check(tup) - def test_datetime(self): - self.check(datetime.datetime.utcfromtimestamp(self.TIMESTAMP)) + def test_utc_naive_datetime(self): + self.check( + datetime.datetime.fromtimestamp( + self.TIMESTAMP, datetime.timezone.utc + ).replace(tzinfo=None) + ) + + def test_utc_naive_datetime_deprecated(self): + with ignore_deprecation(): + self.check(datetime.datetime.utcfromtimestamp(self.TIMESTAMP)) + + def test_utc_aware_datetime(self): + self.check( + datetime.datetime.fromtimestamp(self.TIMESTAMP, datetime.timezone.utc) + ) + + def test_other_aware_datetime(self): + # Other timezones are ignored; the timezone is always printed as GMT + self.check( + datetime.datetime.fromtimestamp( + self.TIMESTAMP, datetime.timezone(datetime.timedelta(hours=-4)) + ) + ) # HTTPServerRequest is mainly tested incidentally to the server itself, diff --git a/tornado/test/locale_test.py b/tornado/test/locale_test.py index ee74cb05e8..a2e0872b8f 100644 --- a/tornado/test/locale_test.py +++ b/tornado/test/locale_test.py @@ -91,45 +91,55 @@ def test_format_date(self): locale.format_date(date, full_format=True), "April 28, 2013 at 6:35 pm" ) - now = datetime.datetime.utcnow() - - self.assertEqual( - locale.format_date(now - datetime.timedelta(seconds=2), full_format=False), - "2 seconds ago", - ) - self.assertEqual( - locale.format_date(now - datetime.timedelta(minutes=2), full_format=False), - "2 minutes ago", - ) - self.assertEqual( - locale.format_date(now - datetime.timedelta(hours=2), full_format=False), - "2 hours ago", - ) - - self.assertEqual( - locale.format_date( - now - datetime.timedelta(days=1), full_format=False, shorter=True - ), - "yesterday", - ) - - date = now - datetime.timedelta(days=2) - self.assertEqual( - locale.format_date(date, full_format=False, shorter=True), - locale._weekdays[date.weekday()], - ) - - date = now - datetime.timedelta(days=300) - self.assertEqual( - locale.format_date(date, full_format=False, shorter=True), - "%s %d" % (locale._months[date.month - 1], date.day), - ) - - date = now - datetime.timedelta(days=500) - self.assertEqual( - locale.format_date(date, full_format=False, shorter=True), - "%s %d, %d" % (locale._months[date.month - 1], date.day, date.year), - ) + aware_dt = datetime.datetime.now(datetime.timezone.utc) + naive_dt = aware_dt.replace(tzinfo=None) + for name, now in {"aware": aware_dt, "naive": naive_dt}.items(): + with self.subTest(dt=name): + self.assertEqual( + locale.format_date( + now - datetime.timedelta(seconds=2), full_format=False + ), + "2 seconds ago", + ) + self.assertEqual( + locale.format_date( + now - datetime.timedelta(minutes=2), full_format=False + ), + "2 minutes ago", + ) + self.assertEqual( + locale.format_date( + now - datetime.timedelta(hours=2), full_format=False + ), + "2 hours ago", + ) + + self.assertEqual( + locale.format_date( + now - datetime.timedelta(days=1), + full_format=False, + shorter=True, + ), + "yesterday", + ) + + date = now - datetime.timedelta(days=2) + self.assertEqual( + locale.format_date(date, full_format=False, shorter=True), + locale._weekdays[date.weekday()], + ) + + date = now - datetime.timedelta(days=300) + self.assertEqual( + locale.format_date(date, full_format=False, shorter=True), + "%s %d" % (locale._months[date.month - 1], date.day), + ) + + date = now - datetime.timedelta(days=500) + self.assertEqual( + locale.format_date(date, full_format=False, shorter=True), + "%s %d, %d" % (locale._months[date.month - 1], date.day, date.year), + ) def test_friendly_number(self): locale = tornado.locale.get("en_US") diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index 56900d9a00..fb9c3417b9 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -404,10 +404,10 @@ def test_set_cookie_expires_days(self): match = re.match("foo=bar; expires=(?P.+); Path=/", header) assert match is not None - expires = datetime.datetime.utcnow() + datetime.timedelta(days=10) - parsed = email.utils.parsedate(match.groupdict()["expires"]) - assert parsed is not None - header_expires = datetime.datetime(*parsed[:6]) + expires = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=10 + ) + header_expires = email.utils.parsedate_to_datetime(match.groupdict()["expires"]) self.assertTrue(abs((expires - header_expires).total_seconds()) < 10) def test_set_cookie_false_flags(self): @@ -1697,11 +1697,10 @@ def get(self): def test_date_header(self): response = self.fetch("/") - parsed = email.utils.parsedate(response.headers["Date"]) - assert parsed is not None - header_date = datetime.datetime(*parsed[:6]) + header_date = email.utils.parsedate_to_datetime(response.headers["Date"]) self.assertTrue( - header_date - datetime.datetime.utcnow() < datetime.timedelta(seconds=2) + header_date - datetime.datetime.now(datetime.timezone.utc) + < datetime.timedelta(seconds=2) ) @@ -3010,10 +3009,12 @@ def test_xsrf_httponly(self): match = re.match(".*; expires=(?P.+);.*", header) assert match is not None - expires = datetime.datetime.utcnow() + datetime.timedelta(days=2) - parsed = email.utils.parsedate(match.groupdict()["expires"]) - assert parsed is not None - header_expires = datetime.datetime(*parsed[:6]) + expires = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=2 + ) + header_expires = email.utils.parsedate_to_datetime(match.groupdict()["expires"]) + if header_expires.tzinfo is None: + header_expires = header_expires.replace(tzinfo=datetime.timezone.utc) self.assertTrue(abs((expires - header_expires).total_seconds()) < 10) diff --git a/tornado/web.py b/tornado/web.py index 565140493e..439e02c47b 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -647,7 +647,9 @@ def set_cookie( if domain: morsel["domain"] = domain if expires_days is not None and not expires: - expires = datetime.datetime.utcnow() + datetime.timedelta(days=expires_days) + expires = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=expires_days + ) if expires: morsel["expires"] = httputil.format_timestamp(expires) if path: @@ -698,7 +700,9 @@ def clear_cookie(self, name: str, **kwargs: Any) -> None: raise TypeError( f"clear_cookie() got an unexpected keyword argument '{excluded_arg}'" ) - expires = datetime.datetime.utcnow() - datetime.timedelta(days=365) + expires = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( + days=365 + ) self.set_cookie(name, value="", expires=expires, **kwargs) def clear_all_cookies(self, **kwargs: Any) -> None: @@ -2812,12 +2816,12 @@ def should_return_304(self) -> bool: # content has not been modified ims_value = self.request.headers.get("If-Modified-Since") if ims_value is not None: - date_tuple = email.utils.parsedate(ims_value) - if date_tuple is not None: - if_since = datetime.datetime(*date_tuple[:6]) - assert self.modified is not None - if if_since >= self.modified: - return True + if_since = email.utils.parsedate_to_datetime(ims_value) + if if_since.tzinfo is None: + if_since = if_since.replace(tzinfo=datetime.timezone.utc) + assert self.modified is not None + if if_since >= self.modified: + return True return False @@ -2981,6 +2985,10 @@ def get_modified_time(self) -> Optional[datetime.datetime]: object or None. .. versionadded:: 3.1 + + .. versionchanged:: 6.4 + Now returns an aware datetime object instead of a naive one. + Subclasses that override this method may return either kind. """ stat_result = self._stat() # NOTE: Historically, this used stat_result[stat.ST_MTIME], @@ -2991,7 +2999,9 @@ def get_modified_time(self) -> Optional[datetime.datetime]: # consistency with the past (and because we have a unit test # that relies on this), we truncate the float here, although # I'm not sure that's the right thing to do. - modified = datetime.datetime.utcfromtimestamp(int(stat_result.st_mtime)) + modified = datetime.datetime.fromtimestamp( + int(stat_result.st_mtime), datetime.timezone.utc + ) return modified def get_content_type(self) -> str: From b56245730eb27e36da2ba9a2df3ad753649f8c2a Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 6 Jun 2023 22:48:05 -0400 Subject: [PATCH 21/70] test: Add test for open redirect fixed in 6.3.2 --- tornado/test/web_test.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index fb9c3417b9..7387124d17 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -1437,6 +1437,35 @@ def test_static_default_redirect(self): self.assertTrue(response.headers["Location"].endswith("/static/dir/")) +class StaticDefaultFilenameRootTest(WebTestCase): + def get_app_kwargs(self): + return dict( + static_path=os.path.abspath(relpath("static")), + static_handler_args=dict(default_filename="index.html"), + static_url_prefix="/", + ) + + def get_handlers(self): + return [] + + def get_http_client(self): + # simple_httpclient only: curl doesn't let you send a request starting + # with two slashes. + return SimpleAsyncHTTPClient() + + def test_no_open_redirect(self): + # This test verifies that the open redirect that affected some configurations + # prior to Tornado 6.3.2 is no longer possible. The vulnerability required + # a static_url_prefix of "/" and a default_filename (any value) to be set. + # The absolute server-side path to the static directory must also be known. + with ExpectLog(gen_log, ".*cannot redirect path with two initial slashes"): + response = self.fetch( + f"//evil.com/../{os.path.dirname(__file__)}/static/dir", + follow_redirects=False, + ) + self.assertEqual(response.code, 403) + + class StaticFileWithPathTest(WebTestCase): def get_app_kwargs(self): return dict( @@ -2847,7 +2876,7 @@ def test_xsrf_success_header(self): body=b"", headers=dict( {"X-Xsrftoken": self.xsrf_token}, # type: ignore - **self.cookie_headers() + **self.cookie_headers(), ), ) self.assertEqual(response.code, 200) From 4a57ac87abcb37d852c83e7079e28c19ff98eaf9 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 19 Jun 2023 15:54:01 -0400 Subject: [PATCH 22/70] auth: Deprecate unused client_secret parameter OAuth2Mixin.authorize_redirect has never used this argument and similar methods in this module don't have it. Closes #1122 --- tornado/auth.py | 7 +++++++ tornado/test/auth_test.py | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tornado/auth.py b/tornado/auth.py index 59501f56b7..97cfc93ada 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -63,6 +63,7 @@ async def get(self): import time import urllib.parse import uuid +import warnings from tornado import httpclient from tornado import escape @@ -571,7 +572,13 @@ def authorize_redirect( The ``callback`` argument and returned awaitable were removed; this is now an ordinary synchronous function. + + .. deprecated:: 6.4 + The ``client_secret`` argument (which has never had any effect) + is deprecated and will be removed in Tornado 7.0. """ + if client_secret is not None: + warnings.warn("client_secret argument is deprecated", DeprecationWarning) handler = cast(RequestHandler, self) args = {"response_type": response_type} if redirect_uri is not None: diff --git a/tornado/test/auth_test.py b/tornado/test/auth_test.py index 3cd715f733..5eddb9803c 100644 --- a/tornado/test/auth_test.py +++ b/tornado/test/auth_test.py @@ -550,7 +550,6 @@ def get(self): self.authorize_redirect( redirect_uri=self._OAUTH_REDIRECT_URI, client_id=self.settings["google_oauth"]["key"], - client_secret=self.settings["google_oauth"]["secret"], scope=["profile", "email"], response_type="code", extra_params={"prompt": "select_account"}, From 5ee1f1bbc5775185f13925d790ca414e937cf6aa Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 19 Jun 2023 16:12:24 -0400 Subject: [PATCH 23/70] auth: Deprecate TwitterMixin It's unclear to what extent this class still works given Twitter's recent API changes. Deprecate it since I don't intend to track future changes here. --- demos/README.rst | 1 - demos/twitter/home.html | 12 ---- demos/twitter/twitterdemo.py | 105 ----------------------------------- tornado/auth.py | 6 ++ 4 files changed, 6 insertions(+), 118 deletions(-) delete mode 100644 demos/twitter/home.html delete mode 100755 demos/twitter/twitterdemo.py diff --git a/demos/README.rst b/demos/README.rst index 1021254792..0429761dd9 100644 --- a/demos/README.rst +++ b/demos/README.rst @@ -30,7 +30,6 @@ Feature demos ~~~~~~~~~~~~~ - ``facebook``: Authentication with the Facebook Graph API. -- ``twitter``: Authentication with the Twitter API. - ``file_upload``: Client and server support for streaming HTTP request payloads. - ``tcpecho``: Using the lower-level ``IOStream`` interfaces for non-HTTP diff --git a/demos/twitter/home.html b/demos/twitter/home.html deleted file mode 100644 index a2c159c58b..0000000000 --- a/demos/twitter/home.html +++ /dev/null @@ -1,12 +0,0 @@ - - - Tornado Twitter Demo - - -
    - {% for tweet in timeline %} -
  • {{ tweet['user']['screen_name'] }}: {{ tweet['text'] }}
  • - {% end %} -
- - diff --git a/demos/twitter/twitterdemo.py b/demos/twitter/twitterdemo.py deleted file mode 100755 index f7bc1ebd2b..0000000000 --- a/demos/twitter/twitterdemo.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -"""A simplistic Twitter viewer to demonstrate the use of TwitterMixin. - -To run this app, you must first register an application with Twitter: - 1) Go to https://dev.twitter.com/apps and create an application. - Your application must have a callback URL registered with Twitter. - It doesn't matter what it is, but it has to be there (Twitter won't - let you use localhost in a registered callback URL, but that won't stop - you from running this demo on localhost). - 2) Create a file called "secrets.cfg" and put your consumer key and - secret (which Twitter gives you when you register an app) in it: - twitter_consumer_key = 'asdf1234' - twitter_consumer_secret = 'qwer5678' - (you could also generate a random value for "cookie_secret" and put it - in the same file, although it's not necessary to run this demo) - 3) Run this program and go to http://localhost:8888 (by default) in your - browser. -""" - -import asyncio -import logging - -from tornado.auth import TwitterMixin -from tornado.escape import json_decode, json_encode -from tornado import gen -from tornado.options import define, options, parse_command_line, parse_config_file -from tornado.web import Application, RequestHandler, authenticated - -define("port", default=8888, help="port to listen on") -define( - "config_file", default="secrets.cfg", help="filename for additional configuration" -) - -define( - "debug", - default=False, - group="application", - help="run in debug mode (with automatic reloading)", -) -# The following settings should probably be defined in secrets.cfg -define("twitter_consumer_key", type=str, group="application") -define("twitter_consumer_secret", type=str, group="application") -define( - "cookie_secret", - type=str, - group="application", - default="__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE__", - help="signing key for secure cookies", -) - - -class BaseHandler(RequestHandler): - COOKIE_NAME = "twitterdemo_user" - - def get_current_user(self): - user_json = self.get_signed_cookie(self.COOKIE_NAME) - if not user_json: - return None - return json_decode(user_json) - - -class MainHandler(BaseHandler, TwitterMixin): - @authenticated - @gen.coroutine - def get(self): - timeline = yield self.twitter_request( - "/statuses/home_timeline", access_token=self.current_user["access_token"] - ) - self.render("home.html", timeline=timeline) - - -class LoginHandler(BaseHandler, TwitterMixin): - @gen.coroutine - def get(self): - if self.get_argument("oauth_token", None): - user = yield self.get_authenticated_user() - del user["description"] - self.set_signed_cookie(self.COOKIE_NAME, json_encode(user)) - self.redirect(self.get_argument("next", "/")) - else: - yield self.authorize_redirect(callback_uri=self.request.full_url()) - - -class LogoutHandler(BaseHandler): - def get(self): - self.clear_cookie(self.COOKIE_NAME) - - -async def main(): - parse_command_line(final=False) - parse_config_file(options.config_file) - - app = Application( - [("/", MainHandler), ("/login", LoginHandler), ("/logout", LogoutHandler)], - login_url="/login", - **options.group_dict("application") - ) - app.listen(options.port) - - logging.info("Listening on http://localhost:%d" % options.port) - await asyncio.Event().wait() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/tornado/auth.py b/tornado/auth.py index 97cfc93ada..e9e3c359f3 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -712,6 +712,12 @@ async def get(self): includes the attributes ``username``, ``name``, ``access_token``, and all of the custom Twitter user attributes described at https://dev.twitter.com/docs/api/1.1/get/users/show + + .. deprecated:: 6.3 + This class refers to version 1.1 of the Twitter API, which has been + deprecated by Twitter. Since Twitter has begun to limit access to its + API, this class will no longer be updated and will be removed in the + future. """ _OAUTH_REQUEST_TOKEN_URL = "https://api.twitter.com/oauth/request_token" From a57189bf9d6790a156ccdf16900a12b16174f972 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 7 Jul 2023 19:37:45 -0400 Subject: [PATCH 24/70] demos: Add a demo app for google auth Add some more detail to app registration docs. This was done mainly to verify that we don't need to introduce new parameters as requested in #2140 Closes #2140 --- demos/google_auth/.gitignore | 1 + demos/google_auth/main.py | 113 +++++++++++++++++++++++++++++++++++ tornado/auth.py | 21 +++++-- 3 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 demos/google_auth/.gitignore create mode 100644 demos/google_auth/main.py diff --git a/demos/google_auth/.gitignore b/demos/google_auth/.gitignore new file mode 100644 index 0000000000..5cfc307c04 --- /dev/null +++ b/demos/google_auth/.gitignore @@ -0,0 +1 @@ +main.cfg diff --git a/demos/google_auth/main.py b/demos/google_auth/main.py new file mode 100644 index 0000000000..06dd3b5cdb --- /dev/null +++ b/demos/google_auth/main.py @@ -0,0 +1,113 @@ +"""Demo app for GoogleOAuth2Mixin + +Recommended usage: +- Register an app with Google following the instructions at + https://www.tornadoweb.org/en/stable/auth.html#tornado.auth.GoogleOAuth2Mixin +- Use "http://localhost:8888/auth/google" as the redirect URI. +- Create a file in this directory called main.cfg, containing two lines (python syntax): + google_oauth_key="..." + google_oauth_secret="..." +- Run this file with `python main.py --config_file=main.cfg` +- Visit "http://localhost:8888" in your browser. +""" +import asyncio +import json +import tornado +import urllib.parse + +from tornado.options import define, options +from tornado.web import url + +define("port", default=8888, help="run on the given port", type=int) +define("google_oauth_key", help="Google OAuth Key") +define("google_oauth_secret", help="Google OAuth Secret") +define( + "config_file", + help="tornado config file", + callback=lambda path: tornado.options.parse_config_file(path, final=False), +) + + +class BaseHandler(tornado.web.RequestHandler): + def get_current_user(self): + user_cookie = self.get_signed_cookie("googledemo_user") + if user_cookie: + return json.loads(user_cookie) + return None + + +class IndexHandler(BaseHandler, tornado.auth.GoogleOAuth2Mixin): + @tornado.web.authenticated + async def get(self): + try: + # This is redundant: we got the userinfo in the login handler. + # But this demonstrates the usage of oauth2_request outside of + # the login flow, and getting anything more than userinfo + # leads to more approval prompts and complexity. + user_info = await self.oauth2_request( + "https://www.googleapis.com/oauth2/v1/userinfo", + access_token=self.current_user["access_token"], + ) + except tornado.httpclient.HTTPClientError as e: + print(e.response.body) + raise + self.write(f"Hello {user_info['name']}") + + +class LoginHandler(BaseHandler, tornado.auth.GoogleOAuth2Mixin): + async def get(self): + redirect_uri = urllib.parse.urljoin( + self.application.settings["redirect_base_uri"], + self.reverse_url("google_oauth"), + ) + if self.get_argument("code", False): + access = await self.get_authenticated_user( + redirect_uri=redirect_uri, code=self.get_argument("code") + ) + user = await self.oauth2_request( + "https://www.googleapis.com/oauth2/v1/userinfo", + access_token=access["access_token"], + ) + # Save the user and access token. + user_cookie = dict(id=user["id"], access_token=access["access_token"]) + self.set_signed_cookie("googledemo_user", json.dumps(user_cookie)) + self.redirect("/") + else: + self.authorize_redirect( + redirect_uri=redirect_uri, + client_id=self.get_google_oauth_settings()["key"], + scope=["profile", "email"], + response_type="code", + extra_params={"approval_prompt": "auto"}, + ) + + +class LogoutHandler(BaseHandler): + def get(self): + self.clear_cookie("user") + self.redirect("/") + + +async def main(): + tornado.options.parse_command_line() + app = tornado.web.Application( + [ + url(r"/", IndexHandler), + url(r"/auth/google", LoginHandler, name="google_oauth"), + url(r"/logout", LogoutHandler), + ], + redirect_base_uri=f"http://localhost:{options.port}", + google_oauth=dict( + key=options.google_oauth_key, secret=options.google_oauth_secret + ), + debug=True, + cookie_secret="__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__", + login_url="/auth/google", + ) + app.listen(options.port) + shutdown_event = asyncio.Event() + await shutdown_event.wait() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tornado/auth.py b/tornado/auth.py index e9e3c359f3..7f05a531f1 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -852,12 +852,18 @@ class GoogleOAuth2Mixin(OAuth2Mixin): * Go to the Google Dev Console at http://console.developers.google.com * Select a project, or create a new one. + * Depending on permissions required, you may need to set your app to + "testing" mode and add your account as a test user, or go through + a verfication process. You may also need to use the "Enable + APIs and Services" command to enable specific services. * In the sidebar on the left, select Credentials. * Click CREATE CREDENTIALS and click OAuth client ID. * Under Application type, select Web application. * Name OAuth 2.0 client and click Create. * Copy the "Client secret" and "Client ID" to the application settings as ``{"google_oauth": {"key": CLIENT_ID, "secret": CLIENT_SECRET}}`` + * You must register the ``redirect_uri`` you plan to use with this class + on the Credentials page. .. versionadded:: 3.2 """ @@ -907,19 +913,26 @@ async def get_authenticated_user( class GoogleOAuth2LoginHandler(tornado.web.RequestHandler, tornado.auth.GoogleOAuth2Mixin): + # Google requires an exact match for redirect_uri, so it's + # best to get it from your app configuration instead of from + # self.request.full_uri(). + redirect_uri = urllib.parse.urljoin(self.application.settings['redirect_base_uri'], + self.reverse_url('google_oauth')) async def get(self): if self.get_argument('code', False): access = await self.get_authenticated_user( - redirect_uri='http://your.site.com/auth/google', + redirect_uri=redirect_uri, code=self.get_argument('code')) user = await self.oauth2_request( "https://www.googleapis.com/oauth2/v1/userinfo", access_token=access["access_token"]) - # Save the user and access token with - # e.g. set_signed_cookie. + # Save the user and access token. For example: + user_cookie = dict(id=user["id"], access_token=access["access_token"]) + self.set_signed_cookie("user", json.dumps(user_cookie)) + self.redirect("/") else: self.authorize_redirect( - redirect_uri='http://your.site.com/auth/google', + redirect_uri=redirect_uri, client_id=self.get_google_oauth_settings()['key'], scope=['profile', 'email'], response_type='code', From 7bf6b20d1da6789d0b745e2e34514b63ef8c826c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 7 Jul 2023 20:04:27 -0400 Subject: [PATCH 25/70] auth: Update facebook scope The read_stream scope was replaced with user_posts; this change was made to demos/facebook/facebook.py in #1674 but the corresponding comment was not updated. The offline_access scope has also been removed but seems irrelvant to this comment. Fixes #1566 --- tornado/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tornado/auth.py b/tornado/auth.py index 7f05a531f1..1312f29909 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -1008,7 +1008,7 @@ async def get(self): self.authorize_redirect( redirect_uri='/auth/facebookgraph/', client_id=self.settings["facebook_api_key"], - extra_params={"scope": "read_stream,offline_access"}) + extra_params={"scope": "user_posts"}) .. testoutput:: :hide: From f69b8dc40eb44f688d48b2fa89061b9c739cb5e2 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 7 Jul 2023 20:09:36 -0400 Subject: [PATCH 26/70] auth: Use a setting for facebook redirect url Matches a change made to the Google auth mixin in a previous commit. Fixes #756 --- tornado/auth.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tornado/auth.py b/tornado/auth.py index 1312f29909..e223973707 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -997,16 +997,19 @@ async def get_authenticated_user( class FacebookGraphLoginHandler(tornado.web.RequestHandler, tornado.auth.FacebookGraphMixin): async def get(self): + redirect_uri = urllib.parse.urljoin( + self.application.settings['redirect_base_uri'], + self.reverse_url('facebook_oauth')) if self.get_argument("code", False): user = await self.get_authenticated_user( - redirect_uri='/auth/facebookgraph/', + redirect_uri=redirect_uri, client_id=self.settings["facebook_api_key"], client_secret=self.settings["facebook_secret"], code=self.get_argument("code")) # Save the user with e.g. set_signed_cookie else: self.authorize_redirect( - redirect_uri='/auth/facebookgraph/', + redirect_uri=redirect_uri, client_id=self.settings["facebook_api_key"], extra_params={"scope": "user_posts"}) From b62902466bf6c48d4b74d1d342e294e455204556 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 7 Jul 2023 20:31:18 -0400 Subject: [PATCH 27/70] auth: Copy google mixin comment to top-of-file --- tornado/auth.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tornado/auth.py b/tornado/auth.py index e223973707..c857387be7 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -36,17 +36,28 @@ .. testcode:: class GoogleOAuth2LoginHandler(tornado.web.RequestHandler, - tornado.auth.GoogleOAuth2Mixin): + tornado.auth.GoogleOAuth2Mixin): + # Google requires an exact match for redirect_uri, so it's + # best to get it from your app configuration instead of from + # self.request.full_uri(). + redirect_uri = urllib.parse.urljoin(self.application.settings['redirect_base_uri'], + self.reverse_url('google_oauth')) async def get(self): if self.get_argument('code', False): - user = await self.get_authenticated_user( - redirect_uri='http://your.site.com/auth/google', + access = await self.get_authenticated_user( + redirect_uri=redirect_uri, code=self.get_argument('code')) - # Save the user with e.g. set_signed_cookie + user = await self.oauth2_request( + "https://www.googleapis.com/oauth2/v1/userinfo", + access_token=access["access_token"]) + # Save the user and access token. For example: + user_cookie = dict(id=user["id"], access_token=access["access_token"]) + self.set_signed_cookie("user", json.dumps(user_cookie)) + self.redirect("/") else: self.authorize_redirect( - redirect_uri='http://your.site.com/auth/google', - client_id=self.settings['google_oauth']['key'], + redirect_uri=redirect_uri, + client_id=self.get_google_oauth_settings()['key'], scope=['profile', 'email'], response_type='code', extra_params={'approval_prompt': 'auto'}) From c98f7cc5c476aa04bcbe4acdd4a1172f7cc90f64 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 7 Jul 2023 20:46:14 -0400 Subject: [PATCH 28/70] auth: Fix doctests --- tornado/auth.py | 132 ++++++++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/tornado/auth.py b/tornado/auth.py index c857387be7..d1edcc6550 100644 --- a/tornado/auth.py +++ b/tornado/auth.py @@ -33,34 +33,39 @@ Example usage for Google OAuth: +.. testsetup:: + + import urllib + .. testcode:: class GoogleOAuth2LoginHandler(tornado.web.RequestHandler, tornado.auth.GoogleOAuth2Mixin): - # Google requires an exact match for redirect_uri, so it's - # best to get it from your app configuration instead of from - # self.request.full_uri(). - redirect_uri = urllib.parse.urljoin(self.application.settings['redirect_base_uri'], - self.reverse_url('google_oauth')) async def get(self): - if self.get_argument('code', False): - access = await self.get_authenticated_user( - redirect_uri=redirect_uri, - code=self.get_argument('code')) - user = await self.oauth2_request( - "https://www.googleapis.com/oauth2/v1/userinfo", - access_token=access["access_token"]) - # Save the user and access token. For example: - user_cookie = dict(id=user["id"], access_token=access["access_token"]) - self.set_signed_cookie("user", json.dumps(user_cookie)) - self.redirect("/") - else: - self.authorize_redirect( - redirect_uri=redirect_uri, - client_id=self.get_google_oauth_settings()['key'], - scope=['profile', 'email'], - response_type='code', - extra_params={'approval_prompt': 'auto'}) + # Google requires an exact match for redirect_uri, so it's + # best to get it from your app configuration instead of from + # self.request.full_uri(). + redirect_uri = urllib.parse.urljoin(self.application.settings['redirect_base_uri'], + self.reverse_url('google_oauth')) + async def get(self): + if self.get_argument('code', False): + access = await self.get_authenticated_user( + redirect_uri=redirect_uri, + code=self.get_argument('code')) + user = await self.oauth2_request( + "https://www.googleapis.com/oauth2/v1/userinfo", + access_token=access["access_token"]) + # Save the user and access token. For example: + user_cookie = dict(id=user["id"], access_token=access["access_token"]) + self.set_signed_cookie("user", json.dumps(user_cookie)) + self.redirect("/") + else: + self.authorize_redirect( + redirect_uri=redirect_uri, + client_id=self.get_google_oauth_settings()['key'], + scope=['profile', 'email'], + response_type='code', + extra_params={'approval_prompt': 'auto'}) .. testoutput:: :hide: @@ -920,34 +925,39 @@ async def get_authenticated_user( Example usage: + .. testsetup:: + + import urllib + .. testcode:: class GoogleOAuth2LoginHandler(tornado.web.RequestHandler, tornado.auth.GoogleOAuth2Mixin): - # Google requires an exact match for redirect_uri, so it's - # best to get it from your app configuration instead of from - # self.request.full_uri(). - redirect_uri = urllib.parse.urljoin(self.application.settings['redirect_base_uri'], - self.reverse_url('google_oauth')) async def get(self): - if self.get_argument('code', False): - access = await self.get_authenticated_user( - redirect_uri=redirect_uri, - code=self.get_argument('code')) - user = await self.oauth2_request( - "https://www.googleapis.com/oauth2/v1/userinfo", - access_token=access["access_token"]) - # Save the user and access token. For example: - user_cookie = dict(id=user["id"], access_token=access["access_token"]) - self.set_signed_cookie("user", json.dumps(user_cookie)) - self.redirect("/") - else: - self.authorize_redirect( - redirect_uri=redirect_uri, - client_id=self.get_google_oauth_settings()['key'], - scope=['profile', 'email'], - response_type='code', - extra_params={'approval_prompt': 'auto'}) + # Google requires an exact match for redirect_uri, so it's + # best to get it from your app configuration instead of from + # self.request.full_uri(). + redirect_uri = urllib.parse.urljoin(self.application.settings['redirect_base_uri'], + self.reverse_url('google_oauth')) + async def get(self): + if self.get_argument('code', False): + access = await self.get_authenticated_user( + redirect_uri=redirect_uri, + code=self.get_argument('code')) + user = await self.oauth2_request( + "https://www.googleapis.com/oauth2/v1/userinfo", + access_token=access["access_token"]) + # Save the user and access token. For example: + user_cookie = dict(id=user["id"], access_token=access["access_token"]) + self.set_signed_cookie("user", json.dumps(user_cookie)) + self.redirect("/") + else: + self.authorize_redirect( + redirect_uri=redirect_uri, + client_id=self.get_google_oauth_settings()['key'], + scope=['profile', 'email'], + response_type='code', + extra_params={'approval_prompt': 'auto'}) .. testoutput:: :hide: @@ -1008,21 +1018,21 @@ async def get_authenticated_user( class FacebookGraphLoginHandler(tornado.web.RequestHandler, tornado.auth.FacebookGraphMixin): async def get(self): - redirect_uri = urllib.parse.urljoin( - self.application.settings['redirect_base_uri'], - self.reverse_url('facebook_oauth')) - if self.get_argument("code", False): - user = await self.get_authenticated_user( - redirect_uri=redirect_uri, - client_id=self.settings["facebook_api_key"], - client_secret=self.settings["facebook_secret"], - code=self.get_argument("code")) - # Save the user with e.g. set_signed_cookie - else: - self.authorize_redirect( - redirect_uri=redirect_uri, - client_id=self.settings["facebook_api_key"], - extra_params={"scope": "user_posts"}) + redirect_uri = urllib.parse.urljoin( + self.application.settings['redirect_base_uri'], + self.reverse_url('facebook_oauth')) + if self.get_argument("code", False): + user = await self.get_authenticated_user( + redirect_uri=redirect_uri, + client_id=self.settings["facebook_api_key"], + client_secret=self.settings["facebook_secret"], + code=self.get_argument("code")) + # Save the user with e.g. set_signed_cookie + else: + self.authorize_redirect( + redirect_uri=redirect_uri, + client_id=self.settings["facebook_api_key"], + extra_params={"scope": "user_posts"}) .. testoutput:: :hide: From 62363740c1cc0e137ff4344c3afc3d52e070f200 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 7 Jul 2023 21:19:18 -0400 Subject: [PATCH 29/70] asyncio: Remove atexit hook This hook was added because of an only-in-CI issue, but we have since improved our cleanup of the selector thread. As long as this passes CI, I think we can remove the atexit hook. Fixes #3291 --- tornado/platform/asyncio.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 679068e69b..32fe860a66 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -23,7 +23,6 @@ """ import asyncio -import atexit import concurrent.futures import errno import functools @@ -61,31 +60,6 @@ def fileno(self) -> int: _T = TypeVar("_T") -# Collection of selector thread event loops to shut down on exit. -_selector_loops: Set["SelectorThread"] = set() - - -def _atexit_callback() -> None: - for loop in _selector_loops: - with loop._select_cond: - loop._closing_selector = True - loop._select_cond.notify() - try: - loop._waker_w.send(b"a") - except BlockingIOError: - pass - if loop._thread is not None: - # If we don't join our (daemon) thread here, we may get a deadlock - # during interpreter shutdown. I don't really understand why. This - # deadlock happens every time in CI (both travis and appveyor) but - # I've never been able to reproduce locally. - loop._thread.join() - _selector_loops.clear() - - -atexit.register(_atexit_callback) - - class BaseAsyncIOLoop(IOLoop): def initialize( # type: ignore self, asyncio_loop: asyncio.AbstractEventLoop, **kwargs: Any @@ -480,7 +454,6 @@ async def thread_manager_anext() -> None: self._waker_r, self._waker_w = socket.socketpair() self._waker_r.setblocking(False) self._waker_w.setblocking(False) - _selector_loops.add(self) self.add_reader(self._waker_r, self._consume_waker) def close(self) -> None: @@ -492,7 +465,6 @@ def close(self) -> None: self._wake_selector() if self._thread is not None: self._thread.join() - _selector_loops.discard(self) self.remove_reader(self._waker_r) self._waker_r.close() self._waker_w.close() From 7b4dfb2f5765d0e4c1e2d15c2648a6e0cd43192b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 20 Jul 2023 12:05:51 +0000 Subject: [PATCH 30/70] build(deps): bump pygments from 2.14.0 to 2.15.0 Bumps [pygments](https://github.com/pygments/pygments) from 2.14.0 to 2.15.0. - [Release notes](https://github.com/pygments/pygments/releases) - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) - [Commits](https://github.com/pygments/pygments/compare/2.14.0...2.15.0) --- updated-dependencies: - dependency-name: pygments dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index a2d1ebc435..dbafa32a56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -75,7 +75,7 @@ pycodestyle==2.10.0 # via flake8 pyflakes==3.0.1 # via flake8 -pygments==2.14.0 +pygments==2.15.0 # via sphinx pyproject-api==1.5.1 # via tox @@ -108,13 +108,6 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -tomli==2.0.1 - # via - # black - # build - # mypy - # pyproject-api - # tox tox==4.6.0 # via -r requirements.in types-pycurl==7.45.2.0 From d7621eebba60199f21ae8398de1b57273149b04b Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 13 Jul 2023 20:57:11 -0400 Subject: [PATCH 31/70] autoreload: Support the ability to run a directory instead of a module Running a directory has some but not all of the behavior of running a module, including setting __spec__, so we must be careful not to break things by assuming that __spec__ means module mode. Fixes #2855 --- tornado/autoreload.py | 12 ++++- tornado/test/autoreload_test.py | 80 +++++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 0ac4496651..07b9112a66 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -225,7 +225,15 @@ def _reload() -> None: else: spec = getattr(sys.modules["__main__"], "__spec__", None) argv = sys.argv - if spec: + if spec and spec.name != "__main__": + # __spec__ is set in two cases: when running a module, and when running a directory. (when + # running a file, there is no spec). In the former case, we must pass -m to maintain the + # module-style behavior (setting sys.path), even though python stripped -m from its argv at + # startup. If sys.path is exactly __main__, we're running a directory and should fall + # through to the non-module behavior. + # + # Some of this, including the use of exactly __main__ as a spec for directory mode, + # is documented at https://docs.python.org/3/library/runpy.html#runpy.run_path argv = ["-m", spec.name] + argv[1:] else: path_prefix = "." + os.pathsep @@ -331,7 +339,7 @@ def main() -> None: # never made it into sys.modules and so we won't know to watch it. # Just to make sure we've covered everything, walk the stack trace # from the exception and watch every file. - for (filename, lineno, name, line) in traceback.extract_tb(sys.exc_info()[2]): + for filename, lineno, name, line in traceback.extract_tb(sys.exc_info()[2]): watch(filename) if isinstance(e, SyntaxError): # SyntaxErrors are special: their innermost stack frame is fake diff --git a/tornado/test/autoreload_test.py b/tornado/test/autoreload_test.py index fff602811b..feee94f3be 100644 --- a/tornado/test/autoreload_test.py +++ b/tornado/test/autoreload_test.py @@ -24,17 +24,23 @@ def tearDown(self): time.sleep(1) shutil.rmtree(self.path) - def test_reload_module(self): + def test_reload(self): main = """\ import os import sys from tornado import autoreload -# This import will fail if path is not set up correctly -import testapp +# In module mode, the path is set to the parent directory and we can import testapp. +try: + import testapp +except ImportError: + print("import testapp failed") +else: + print("import testapp succeeded") -print('Starting') +spec = getattr(sys.modules[__name__], '__spec__', None) +print(f"Starting {__name__=}, __spec__.name={getattr(spec, 'name', None)}") if 'TESTAPP_STARTED' not in os.environ: os.environ['TESTAPP_STARTED'] = '1' sys.stdout.flush() @@ -57,16 +63,62 @@ def test_reload_module(self): if "PYTHONPATH" in os.environ: pythonpath += os.pathsep + os.environ["PYTHONPATH"] - p = Popen( - [sys.executable, "-m", "testapp"], - stdout=subprocess.PIPE, - cwd=self.path, - env=dict(os.environ, PYTHONPATH=pythonpath), - universal_newlines=True, - encoding="utf-8", - ) - out = p.communicate()[0] - self.assertEqual(out, "Starting\nStarting\n") + with self.subTest(mode="module"): + # In module mode, the path is set to the parent directory and we can import testapp. + # Also, the __spec__.name is set to the fully qualified module name. + p = Popen( + [sys.executable, "-m", "testapp"], + stdout=subprocess.PIPE, + cwd=self.path, + env=dict(os.environ, PYTHONPATH=pythonpath), + universal_newlines=True, + encoding="utf-8", + ) + out = p.communicate()[0] + self.assertEqual( + out, + ( + "import testapp succeeded\n" + + "Starting __name__='__main__', __spec__.name=testapp.__main__\n" + ) + * 2, + ) + + with self.subTest(mode="file"): + # When the __main__.py file is run directly, there is no qualified module spec and we + # cannot import testapp. + p = Popen( + [sys.executable, "testapp/__main__.py"], + stdout=subprocess.PIPE, + cwd=self.path, + env=dict(os.environ, PYTHONPATH=pythonpath), + universal_newlines=True, + encoding="utf-8", + ) + out = p.communicate()[0] + self.assertEqual( + out, + "import testapp failed\nStarting __name__='__main__', __spec__.name=None\n" + * 2, + ) + + with self.subTest(mode="directory"): + # Running as a directory finds __main__.py like a module. It does not manipulate + # sys.path but it does set a spec with a name of exactly __main__. + p = Popen( + [sys.executable, "testapp"], + stdout=subprocess.PIPE, + cwd=self.path, + env=dict(os.environ, PYTHONPATH=pythonpath), + universal_newlines=True, + encoding="utf-8", + ) + out = p.communicate()[0] + self.assertEqual( + out, + "import testapp failed\nStarting __name__='__main__', __spec__.name=__main__\n" + * 2, + ) def test_reload_wrapper_preservation(self): # This test verifies that when `python -m tornado.autoreload` From bf3b76b3faa852f3b90cf483db6f374f19444f9a Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 22 Jul 2023 21:09:36 -0400 Subject: [PATCH 32/70] autoreload: Switch to a real option parser This will make it easier to add other options (for #2398) --- tornado/autoreload.py | 53 ++++++++++++++++++++------------- tornado/test/autoreload_test.py | 44 +++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 20 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 07b9112a66..262a305902 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -268,8 +268,7 @@ def _reload() -> None: os._exit(0) -_USAGE = """\ -Usage: +_USAGE = """ python -m tornado.autoreload -m module.to.run [args...] python -m tornado.autoreload path/to/script.py [args...] """ @@ -291,6 +290,12 @@ def main() -> None: # Remember that we were launched with autoreload as main. # The main module can be tricky; set the variables both in our globals # (which may be __main__) and the real importable version. + # + # We use optparse instead of the newer argparse because we want to + # mimic the python command-line interface which requires stopping + # parsing at the first positional argument. optparse supports + # this but as far as I can tell argparse does not. + import optparse import tornado.autoreload global _autoreload_is_main @@ -300,32 +305,39 @@ def main() -> None: tornado.autoreload._original_argv = _original_argv = original_argv original_spec = getattr(sys.modules["__main__"], "__spec__", None) tornado.autoreload._original_spec = _original_spec = original_spec - sys.argv = sys.argv[:] - if len(sys.argv) >= 3 and sys.argv[1] == "-m": - mode = "module" - module = sys.argv[2] - del sys.argv[1:3] - elif len(sys.argv) >= 2: - mode = "script" - script = sys.argv[1] - sys.argv = sys.argv[1:] + + parser = optparse.OptionParser( + prog="python -m tornado.autoreload", + usage=_USAGE, + epilog="Either -m or a path must be specified, but not both", + ) + parser.disable_interspersed_args() + parser.add_option("-m", dest="module", metavar="module", help="module to run") + opts, rest = parser.parse_args() + if opts.module is None: + if not rest: + print("Either -m or a path must be specified", file=sys.stderr) + sys.exit(1) + path = rest[0] + sys.argv = rest[:] else: - print(_USAGE, file=sys.stderr) - sys.exit(1) + path = None + sys.argv = [sys.argv[0]] + rest try: - if mode == "module": + if opts.module is not None: import runpy - runpy.run_module(module, run_name="__main__", alter_sys=True) - elif mode == "script": - with open(script) as f: + runpy.run_module(opts.module, run_name="__main__", alter_sys=True) + else: + assert path is not None + with open(path) as f: # Execute the script in our namespace instead of creating # a new one so that something that tries to import __main__ # (e.g. the unittest module) will see names defined in the # script instead of just those defined in this module. global __file__ - __file__ = script + __file__ = path # If __package__ is defined, imports may be incorrectly # interpreted as relative to this module. global __package__ @@ -352,10 +364,11 @@ def main() -> None: # restore sys.argv so subsequent executions will include autoreload sys.argv = original_argv - if mode == "module": + if opts.module is not None: + assert opts.module is not None # runpy did a fake import of the module as __main__, but now it's # no longer in sys.modules. Figure out where it is and watch it. - loader = pkgutil.get_loader(module) + loader = pkgutil.get_loader(opts.module) if loader is not None: watch(loader.get_filename()) # type: ignore diff --git a/tornado/test/autoreload_test.py b/tornado/test/autoreload_test.py index feee94f3be..c08c5be368 100644 --- a/tornado/test/autoreload_test.py +++ b/tornado/test/autoreload_test.py @@ -183,3 +183,47 @@ def test_reload_wrapper_preservation(self): out = autoreload_proc.communicate()[0] self.assertEqual(out, "Starting\n" * 2) + + def test_reload_wrapper_args(self): + main = """\ +import os +import sys + +print(os.path.basename(sys.argv[0])) +print(f'argv={sys.argv[1:]}') +sys.stdout.flush() +os._exit(0) +""" + # Create temporary test application + main_file = os.path.join(self.path, "main.py") + with open(main_file, "w", encoding="utf-8") as f: + f.write(main) + + # Make sure the tornado module under test is available to the test + # application + pythonpath = os.getcwd() + if "PYTHONPATH" in os.environ: + pythonpath += os.pathsep + os.environ["PYTHONPATH"] + + autoreload_proc = Popen( + [ + sys.executable, + "-m", + "tornado.autoreload", + "main.py", + "arg1", + "--arg2", + "-m", + "arg3", + ], + stdout=subprocess.PIPE, + cwd=self.path, + env=dict(os.environ, PYTHONPATH=pythonpath), + universal_newlines=True, + encoding="utf-8", + ) + + out, err = autoreload_proc.communicate() + if err: + print("subprocess stderr:", err) + self.assertEqual(out, "main.py\nargv=['arg1', '--arg2', '-m', 'arg3']\n") From 04f832bae166bb5482552156973a6e65b3866ef8 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 22 Jul 2023 21:42:22 -0400 Subject: [PATCH 33/70] autoreload_test: Reduce repetition --- tornado/test/autoreload_test.py | 212 +++++++++++++++----------------- 1 file changed, 100 insertions(+), 112 deletions(-) diff --git a/tornado/test/autoreload_test.py b/tornado/test/autoreload_test.py index c08c5be368..ff934520dd 100644 --- a/tornado/test/autoreload_test.py +++ b/tornado/test/autoreload_test.py @@ -4,6 +4,7 @@ from subprocess import Popen import sys from tempfile import mkdtemp +import textwrap import time import unittest @@ -12,6 +13,32 @@ class AutoreloadTest(unittest.TestCase): def setUp(self): self.path = mkdtemp() + # Each test app runs itself twice via autoreload. The first time it manually triggers + # a reload (could also do this by touching a file but this is faster since filesystem + # timestamps are not necessarily high resolution). The second time it exits directly + # so that the autoreload wrapper (if it is used) doesn't catch it. + # + # The last line of each test's "main" program should be + # exec(open("run_twice_magic.py").read()) + self.write_files( + { + "run_twice_magic.py": """ + import os + import sys + + import tornado.autoreload + + sys.stdout.flush() + + if "TESTAPP_STARTED" not in os.environ: + os.environ["TESTAPP_STARTED"] = "1" + tornado.autoreload._reload() + else: + os._exit(0) + """ + } + ) + def tearDown(self): try: shutil.rmtree(self.path) @@ -24,13 +51,56 @@ def tearDown(self): time.sleep(1) shutil.rmtree(self.path) + def write_files(self, tree, base_path=None): + """Write a directory tree to self.path. + + tree is a dictionary mapping file names to contents, or + sub-dictionaries representing subdirectories. + """ + if base_path is None: + base_path = self.path + for name, contents in tree.items(): + if isinstance(contents, dict): + os.mkdir(os.path.join(base_path, name)) + self.write_files(contents, os.path.join(base_path, name)) + else: + with open(os.path.join(base_path, name), "w", encoding="utf-8") as f: + f.write(textwrap.dedent(contents)) + + def run_subprocess(self, args): + # Make sure the tornado module under test is available to the test + # application + pythonpath = os.getcwd() + if "PYTHONPATH" in os.environ: + pythonpath += os.pathsep + os.environ["PYTHONPATH"] + + p = Popen( + args, + stdout=subprocess.PIPE, + env=dict(os.environ, PYTHONPATH=pythonpath), + cwd=self.path, + universal_newlines=True, + encoding="utf-8", + ) + + # This timeout needs to be fairly generous for pypy due to jit + # warmup costs. + for i in range(40): + if p.poll() is not None: + break + time.sleep(0.1) + else: + p.kill() + raise Exception("subprocess failed to terminate") + + out = p.communicate()[0] + self.assertEqual(p.returncode, 0) + return out + def test_reload(self): main = """\ -import os import sys -from tornado import autoreload - # In module mode, the path is set to the parent directory and we can import testapp. try: import testapp @@ -41,40 +111,23 @@ def test_reload(self): spec = getattr(sys.modules[__name__], '__spec__', None) print(f"Starting {__name__=}, __spec__.name={getattr(spec, 'name', None)}") -if 'TESTAPP_STARTED' not in os.environ: - os.environ['TESTAPP_STARTED'] = '1' - sys.stdout.flush() - autoreload._reload() +exec(open("run_twice_magic.py").read()) """ # Create temporary test application - os.mkdir(os.path.join(self.path, "testapp")) - open( - os.path.join(self.path, "testapp/__init__.py"), "w", encoding="utf-8" - ).close() - with open( - os.path.join(self.path, "testapp/__main__.py"), "w", encoding="utf-8" - ) as f: - f.write(main) - - # Make sure the tornado module under test is available to the test - # application - pythonpath = os.getcwd() - if "PYTHONPATH" in os.environ: - pythonpath += os.pathsep + os.environ["PYTHONPATH"] + self.write_files( + { + "testapp": { + "__init__.py": "", + "__main__.py": main, + }, + } + ) with self.subTest(mode="module"): # In module mode, the path is set to the parent directory and we can import testapp. # Also, the __spec__.name is set to the fully qualified module name. - p = Popen( - [sys.executable, "-m", "testapp"], - stdout=subprocess.PIPE, - cwd=self.path, - env=dict(os.environ, PYTHONPATH=pythonpath), - universal_newlines=True, - encoding="utf-8", - ) - out = p.communicate()[0] + out = self.run_subprocess([sys.executable, "-m", "testapp"]) self.assertEqual( out, ( @@ -87,15 +140,7 @@ def test_reload(self): with self.subTest(mode="file"): # When the __main__.py file is run directly, there is no qualified module spec and we # cannot import testapp. - p = Popen( - [sys.executable, "testapp/__main__.py"], - stdout=subprocess.PIPE, - cwd=self.path, - env=dict(os.environ, PYTHONPATH=pythonpath), - universal_newlines=True, - encoding="utf-8", - ) - out = p.communicate()[0] + out = self.run_subprocess([sys.executable, "testapp/__main__.py"]) self.assertEqual( out, "import testapp failed\nStarting __name__='__main__', __spec__.name=None\n" @@ -105,15 +150,7 @@ def test_reload(self): with self.subTest(mode="directory"): # Running as a directory finds __main__.py like a module. It does not manipulate # sys.path but it does set a spec with a name of exactly __main__. - p = Popen( - [sys.executable, "testapp"], - stdout=subprocess.PIPE, - cwd=self.path, - env=dict(os.environ, PYTHONPATH=pythonpath), - universal_newlines=True, - encoding="utf-8", - ) - out = p.communicate()[0] + out = self.run_subprocess([sys.executable, "testapp"]) self.assertEqual( out, "import testapp failed\nStarting __name__='__main__', __spec__.name=__main__\n" @@ -125,7 +162,6 @@ def test_reload_wrapper_preservation(self): # is used on an application that also has an internal # autoreload, the reload wrapper is preserved on restart. main = """\ -import os import sys # This import will fail if path is not set up correctly @@ -134,78 +170,38 @@ def test_reload_wrapper_preservation(self): if 'tornado.autoreload' not in sys.modules: raise Exception('started without autoreload wrapper') -import tornado.autoreload - print('Starting') -sys.stdout.flush() -if 'TESTAPP_STARTED' not in os.environ: - os.environ['TESTAPP_STARTED'] = '1' - # Simulate an internal autoreload (one not caused - # by the wrapper). - tornado.autoreload._reload() -else: - # Exit directly so autoreload doesn't catch it. - os._exit(0) +exec(open("run_twice_magic.py").read()) """ - # Create temporary test application - os.mkdir(os.path.join(self.path, "testapp")) - init_file = os.path.join(self.path, "testapp", "__init__.py") - open(init_file, "w", encoding="utf-8").close() - main_file = os.path.join(self.path, "testapp", "__main__.py") - with open(main_file, "w", encoding="utf-8") as f: - f.write(main) - - # Make sure the tornado module under test is available to the test - # application - pythonpath = os.getcwd() - if "PYTHONPATH" in os.environ: - pythonpath += os.pathsep + os.environ["PYTHONPATH"] - - autoreload_proc = Popen( - [sys.executable, "-m", "tornado.autoreload", "-m", "testapp"], - stdout=subprocess.PIPE, - cwd=self.path, - env=dict(os.environ, PYTHONPATH=pythonpath), - universal_newlines=True, - encoding="utf-8", + self.write_files( + { + "testapp": { + "__init__.py": "", + "__main__.py": main, + }, + } ) - # This timeout needs to be fairly generous for pypy due to jit - # warmup costs. - for i in range(40): - if autoreload_proc.poll() is not None: - break - time.sleep(0.1) - else: - autoreload_proc.kill() - raise Exception("subprocess failed to terminate") - - out = autoreload_proc.communicate()[0] + out = self.run_subprocess( + [sys.executable, "-m", "tornado.autoreload", "-m", "testapp"] + ) self.assertEqual(out, "Starting\n" * 2) def test_reload_wrapper_args(self): main = """\ -import os import sys print(os.path.basename(sys.argv[0])) print(f'argv={sys.argv[1:]}') -sys.stdout.flush() -os._exit(0) +exec(open("run_twice_magic.py").read()) """ # Create temporary test application - main_file = os.path.join(self.path, "main.py") - with open(main_file, "w", encoding="utf-8") as f: - f.write(main) + self.write_files({"main.py": main}) # Make sure the tornado module under test is available to the test # application - pythonpath = os.getcwd() - if "PYTHONPATH" in os.environ: - pythonpath += os.pathsep + os.environ["PYTHONPATH"] - - autoreload_proc = Popen( + out = self.run_subprocess( [ sys.executable, "-m", @@ -216,14 +212,6 @@ def test_reload_wrapper_args(self): "-m", "arg3", ], - stdout=subprocess.PIPE, - cwd=self.path, - env=dict(os.environ, PYTHONPATH=pythonpath), - universal_newlines=True, - encoding="utf-8", ) - out, err = autoreload_proc.communicate() - if err: - print("subprocess stderr:", err) - self.assertEqual(out, "main.py\nargv=['arg1', '--arg2', '-m', 'arg3']\n") + self.assertEqual(out, "main.py\nargv=['arg1', '--arg2', '-m', 'arg3']\n" * 2) From bc68b8add400cd246281b6b65e05425641c15633 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 22 Jul 2023 22:10:18 -0400 Subject: [PATCH 34/70] autoreload: Support directories in CLI wrapper A previous commit added support for using autoreload within programs that were started as directories; this commit supports them when run with the -m tornado.autoreload wrapper. This change may have side effects for file mode since we now use runpy.run_path instead of executing the file by hand (I don't think the run_path function existed when this code was originally written). --- tornado/autoreload.py | 17 ++----- tornado/test/autoreload_test.py | 89 +++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 46 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 262a305902..376b346e3d 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -325,24 +325,13 @@ def main() -> None: sys.argv = [sys.argv[0]] + rest try: - if opts.module is not None: - import runpy + import runpy + if opts.module is not None: runpy.run_module(opts.module, run_name="__main__", alter_sys=True) else: assert path is not None - with open(path) as f: - # Execute the script in our namespace instead of creating - # a new one so that something that tries to import __main__ - # (e.g. the unittest module) will see names defined in the - # script instead of just those defined in this module. - global __file__ - __file__ = path - # If __package__ is defined, imports may be incorrectly - # interpreted as relative to this module. - global __package__ - del __package__ - exec_in(f.read(), globals(), globals()) + runpy.run_path(path, run_name="__main__") except SystemExit as e: gen_log.info("Script exited with status %s", e.code) except Exception as e: diff --git a/tornado/test/autoreload_test.py b/tornado/test/autoreload_test.py index ff934520dd..025b6ed5df 100644 --- a/tornado/test/autoreload_test.py +++ b/tornado/test/autoreload_test.py @@ -11,6 +11,9 @@ class AutoreloadTest(unittest.TestCase): def setUp(self): + # When these tests fail the output sometimes exceeds the default maxDiff. + self.maxDiff = 1024 + self.path = mkdtemp() # Each test app runs itself twice via autoreload. The first time it manually triggers @@ -124,38 +127,59 @@ def test_reload(self): } ) - with self.subTest(mode="module"): - # In module mode, the path is set to the parent directory and we can import testapp. - # Also, the __spec__.name is set to the fully qualified module name. - out = self.run_subprocess([sys.executable, "-m", "testapp"]) - self.assertEqual( - out, - ( - "import testapp succeeded\n" - + "Starting __name__='__main__', __spec__.name=testapp.__main__\n" - ) - * 2, - ) - - with self.subTest(mode="file"): - # When the __main__.py file is run directly, there is no qualified module spec and we - # cannot import testapp. - out = self.run_subprocess([sys.executable, "testapp/__main__.py"]) - self.assertEqual( - out, - "import testapp failed\nStarting __name__='__main__', __spec__.name=None\n" - * 2, - ) - - with self.subTest(mode="directory"): - # Running as a directory finds __main__.py like a module. It does not manipulate - # sys.path but it does set a spec with a name of exactly __main__. - out = self.run_subprocess([sys.executable, "testapp"]) - self.assertEqual( - out, - "import testapp failed\nStarting __name__='__main__', __spec__.name=__main__\n" - * 2, - ) + # The autoreload wrapper should support all the same modes as the python interpreter. + # The wrapper itself should have no effect on this test so we try all modes with and + # without it. + for wrapper in [False, True]: + with self.subTest(wrapper=wrapper): + with self.subTest(mode="module"): + if wrapper: + base_args = [sys.executable, "-m", "tornado.autoreload"] + else: + base_args = [sys.executable] + # In module mode, the path is set to the parent directory and we can import + # testapp. Also, the __spec__.name is set to the fully qualified module name. + out = self.run_subprocess(base_args + ["-m", "testapp"]) + self.assertEqual( + out, + ( + "import testapp succeeded\n" + + "Starting __name__='__main__', __spec__.name=testapp.__main__\n" + ) + * 2, + ) + + with self.subTest(mode="file"): + out = self.run_subprocess(base_args + ["testapp/__main__.py"]) + # In file mode, we do not expect the path to be set so we can import testapp, + # but when the wrapper is used the -m argument to the python interpreter + # does this for us. + expect_import = ( + "import testapp succeeded" + if wrapper + else "import testapp failed" + ) + # In file mode there is no qualified module spec. + self.assertEqual( + out, + f"{expect_import}\nStarting __name__='__main__', __spec__.name=None\n" + * 2, + ) + + with self.subTest(mode="directory"): + # Running as a directory finds __main__.py like a module. It does not manipulate + # sys.path but it does set a spec with a name of exactly __main__. + out = self.run_subprocess(base_args + ["testapp"]) + expect_import = ( + "import testapp succeeded" + if wrapper + else "import testapp failed" + ) + self.assertEqual( + out, + f"{expect_import}\nStarting __name__='__main__', __spec__.name=__main__\n" + * 2, + ) def test_reload_wrapper_preservation(self): # This test verifies that when `python -m tornado.autoreload` @@ -190,6 +214,7 @@ def test_reload_wrapper_preservation(self): def test_reload_wrapper_args(self): main = """\ +import os import sys print(os.path.basename(sys.argv[0])) From 0c973fbb6038381dd14e9fb54d5874e883d68916 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 21:07:47 +0000 Subject: [PATCH 35/70] build(deps): bump certifi from 2022.12.7 to 2023.7.22 Bumps [certifi](https://github.com/certifi/python-certifi) from 2022.12.7 to 2023.7.22. - [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2023.07.22) --- updated-dependencies: - dependency-name: certifi dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dbafa32a56..c01c92bfd7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ build==0.10.0 # via pip-tools cachetools==5.3.1 # via tox -certifi==2022.12.7 +certifi==2023.7.22 # via requests chardet==5.1.0 # via tox From 8f71301ee5a6b2cad1fef5eabe890b992f08b3ce Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 20:15:12 -0400 Subject: [PATCH 36/70] autoreload: Add --until-success flag This flag terminates the autoreload loop after the first successful run. This makes it possible to cleanly shut down a process that is using "python -m tornado.autoreload" without printing a traceback. Fixes #2398 --- tornado/autoreload.py | 10 ++++++++++ tornado/test/autoreload_test.py | 26 ++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 376b346e3d..3277982d76 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -313,6 +313,11 @@ def main() -> None: ) parser.disable_interspersed_args() parser.add_option("-m", dest="module", metavar="module", help="module to run") + parser.add_option( + "--until-success", + action="store_true", + help="stop reloading after the program exist successfully (status code 0)", + ) opts, rest = parser.parse_args() if opts.module is None: if not rest: @@ -324,6 +329,7 @@ def main() -> None: path = None sys.argv = [sys.argv[0]] + rest + exit_status = 1 try: import runpy @@ -333,6 +339,7 @@ def main() -> None: assert path is not None runpy.run_path(path, run_name="__main__") except SystemExit as e: + exit_status = e.code gen_log.info("Script exited with status %s", e.code) except Exception as e: gen_log.warning("Script exited with uncaught exception", exc_info=True) @@ -349,6 +356,7 @@ def main() -> None: if e.filename is not None: watch(e.filename) else: + exit_status = 0 gen_log.info("Script exited normally") # restore sys.argv so subsequent executions will include autoreload sys.argv = original_argv @@ -361,6 +369,8 @@ def main() -> None: if loader is not None: watch(loader.get_filename()) # type: ignore + if opts.until_success and exit_status == 0: + return wait() diff --git a/tornado/test/autoreload_test.py b/tornado/test/autoreload_test.py index 025b6ed5df..5675faa2a5 100644 --- a/tornado/test/autoreload_test.py +++ b/tornado/test/autoreload_test.py @@ -16,12 +16,12 @@ def setUp(self): self.path = mkdtemp() - # Each test app runs itself twice via autoreload. The first time it manually triggers + # Most test apps run themselves twice via autoreload. The first time it manually triggers # a reload (could also do this by touching a file but this is faster since filesystem # timestamps are not necessarily high resolution). The second time it exits directly # so that the autoreload wrapper (if it is used) doesn't catch it. # - # The last line of each test's "main" program should be + # The last line of each such test's "main" program should be # exec(open("run_twice_magic.py").read()) self.write_files( { @@ -240,3 +240,25 @@ def test_reload_wrapper_args(self): ) self.assertEqual(out, "main.py\nargv=['arg1', '--arg2', '-m', 'arg3']\n" * 2) + + def test_reload_wrapper_until_success(self): + main = """\ +import os +import sys + +if "TESTAPP_STARTED" in os.environ: + print("exiting cleanly") + sys.exit(0) +else: + print("reloading") + exec(open("run_twice_magic.py").read()) +""" + + # Create temporary test application + self.write_files({"main.py": main}) + + out = self.run_subprocess( + [sys.executable, "-m", "tornado.autoreload", "--until-success", "main.py"] + ) + + self.assertEqual(out, "reloading\nexiting cleanly\n") From 1b2c7881270d3ee848f77d8523467cd95e54bdd2 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 21:01:35 -0400 Subject: [PATCH 37/70] autoreload: Remove some code for compatibility with py3.3 --- tornado/autoreload.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 3277982d76..9bf7bb81cf 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -60,8 +60,7 @@ # may become relative in spite of the future import. # # We address the former problem by reconstructing the original command -# line (Python >= 3.4) or by setting the $PYTHONPATH environment -# variable (Python < 3.4) before re-execution so the new process will +# line before re-execution so the new process will # see the correct path. We attempt to address the latter problem when # tornado.autoreload is run as __main__. @@ -214,10 +213,7 @@ def _reload() -> None: # sys.path fixes: see comments at top of file. If __main__.__spec__ # exists, we were invoked with -m and the effective path is about to # change on re-exec. Reconstruct the original command line to - # ensure that the new process sees the same path we did. If - # __spec__ is not available (Python < 3.4), check instead if - # sys.path[0] is an empty string and add the current directory to - # $PYTHONPATH. + # ensure that the new process sees the same path we did. if _autoreload_is_main: assert _original_argv is not None spec = _original_spec @@ -235,12 +231,7 @@ def _reload() -> None: # Some of this, including the use of exactly __main__ as a spec for directory mode, # is documented at https://docs.python.org/3/library/runpy.html#runpy.run_path argv = ["-m", spec.name] + argv[1:] - else: - path_prefix = "." + os.pathsep - if sys.path[0] == "" and not os.environ.get("PYTHONPATH", "").startswith( - path_prefix - ): - os.environ["PYTHONPATH"] = path_prefix + os.environ.get("PYTHONPATH", "") + if not _has_execv: subprocess.Popen([sys.executable] + argv) os._exit(0) From 0c38f5f2250991109c0a76ff8cb0969887681db7 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 21:02:12 -0400 Subject: [PATCH 38/70] autoreload: Remove some code for MacOS 10.5 compatibility --- tornado/autoreload.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 9bf7bb81cf..8191cba257 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -236,27 +236,7 @@ def _reload() -> None: subprocess.Popen([sys.executable] + argv) os._exit(0) else: - try: - os.execv(sys.executable, [sys.executable] + argv) - except OSError: - # Mac OS X versions prior to 10.6 do not support execv in - # a process that contains multiple threads. Instead of - # re-executing in the current process, start a new one - # and cause the current process to exit. This isn't - # ideal since the new process is detached from the parent - # terminal and thus cannot easily be killed with ctrl-C, - # but it's better than not being able to autoreload at - # all. - # Unfortunately the errno returned in this case does not - # appear to be consistent, so we can't easily check for - # this error specifically. - os.spawnv( - os.P_NOWAIT, sys.executable, [sys.executable] + argv # type: ignore - ) - # At this point the IOLoop has been closed and finally - # blocks will experience errors if we allow the stack to - # unwind, so just exit uncleanly. - os._exit(0) + os.execv(sys.executable, [sys.executable] + argv) _USAGE = """ From 5b4007bb20ea45417593e284697a0de2b4e72d76 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 21:17:34 -0400 Subject: [PATCH 39/70] autoreload: Modernize type annotations --- tornado/autoreload.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tornado/autoreload.py b/tornado/autoreload.py index 8191cba257..a4522ecbc6 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -75,8 +75,9 @@ del sys.path[0] import functools +import importlib.abc import os -import pkgutil # type: ignore +import pkgutil import sys import traceback import types @@ -86,18 +87,13 @@ from tornado import ioloop from tornado.log import gen_log from tornado import process -from tornado.util import exec_in try: import signal except ImportError: signal = None # type: ignore -import typing -from typing import Callable, Dict - -if typing.TYPE_CHECKING: - from typing import List, Optional, Union # noqa: F401 +from typing import Callable, Dict, Optional, List, Union # os.execv is broken on Windows and can't properly parse command line # arguments and executable name if they contain whitespaces. subprocess @@ -107,9 +103,11 @@ _watched_files = set() _reload_hooks = [] _reload_attempted = False -_io_loops = weakref.WeakKeyDictionary() # type: ignore +_io_loops: "weakref.WeakKeyDictionary[ioloop.IOLoop, bool]" = ( + weakref.WeakKeyDictionary() +) _autoreload_is_main = False -_original_argv = None # type: Optional[List[str]] +_original_argv: Optional[List[str]] = None _original_spec = None @@ -125,7 +123,7 @@ def start(check_time: int = 500) -> None: _io_loops[io_loop] = True if len(_io_loops) > 1: gen_log.warning("tornado.autoreload started more than once in the same process") - modify_times = {} # type: Dict[str, float] + modify_times: Dict[str, float] = {} callback = functools.partial(_reload_on_update, modify_times) scheduler = ioloop.PeriodicCallback(callback, check_time) scheduler.start() @@ -300,7 +298,9 @@ def main() -> None: path = None sys.argv = [sys.argv[0]] + rest - exit_status = 1 + # SystemExit.code is typed funny: https://github.com/python/typeshed/issues/8513 + # All we care about is truthiness + exit_status: Union[int, str, None] = 1 try: import runpy @@ -337,10 +337,11 @@ def main() -> None: # runpy did a fake import of the module as __main__, but now it's # no longer in sys.modules. Figure out where it is and watch it. loader = pkgutil.get_loader(opts.module) - if loader is not None: + if loader is not None and isinstance(loader, importlib.abc.FileLoader): + # TODO: fix when we update typeshed watch(loader.get_filename()) # type: ignore - if opts.until_success and exit_status == 0: + if opts.until_success and not exit_status: return wait() From 4a4c870f05573d0419560cad92ec45a390cd42ef Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 21:48:03 -0400 Subject: [PATCH 40/70] build: Move linters to python 3.11 --- .github/workflows/test.yml | 6 +++--- tox.ini | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bd94444230..e88a3e9051 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,12 +22,12 @@ jobs: name: Install Python with: # Lint python version must be synced with tox.ini - python-version: '3.8' + python-version: '3.11' - name: Install tox run: python -m pip install tox -c requirements.txt - name: Run test suite - run: python -m tox -e py38,lint + run: python -m tox -e py311,lint test_tox: name: Run full tests @@ -57,7 +57,7 @@ jobs: # Pypy is a lot slower due to jit warmup costs, so don't run the # "full" test config there. tox_env: pypy3 - - python: '3.8' + - python: '3.11' # Docs python version must be synced with tox.ini tox_env: docs diff --git a/tox.ini b/tox.ini index 73a14a7fd5..13b3bad076 100644 --- a/tox.ini +++ b/tox.ini @@ -35,8 +35,9 @@ basepython = # the outputs of the tools (especially where exactly the # linter warning-suppression comments go), so we specify a # python version for these builds. - docs: python3.8 - lint: python3.8 + # These versions must be synced with the versions in .github/workflows/test.yml + docs: python3.11 + lint: python3.11 deps = full: pycurl From e6854eb7e01a46df4d7e2fb04fef35f13e108b71 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 21:51:56 -0400 Subject: [PATCH 41/70] build: Upgrade pip-tools Fixes a conflict between pip-tools and pip. --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index c01c92bfd7..43a9de7ba0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile requirements.in +# pip-compile # alabaster==0.7.13 # via sphinx @@ -62,7 +62,7 @@ packaging==23.1 # tox pathspec==0.10.3 # via black -pip-tools==6.12.1 +pip-tools==7.1.0 # via -r requirements.in platformdirs==3.5.1 # via From 8bfa66ac9572680004bda9321a78d097ab144a9f Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 21:55:12 -0400 Subject: [PATCH 42/70] lint: Update mypy --- requirements.txt | 2 +- tornado/autoreload.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 43a9de7ba0..7543b10f99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,7 +48,7 @@ markupsafe==2.1.2 # via jinja2 mccabe==0.7.0 # via flake8 -mypy==0.991 +mypy==1.0.1 # via -r requirements.in mypy-extensions==0.4.3 # via diff --git a/tornado/autoreload.py b/tornado/autoreload.py index a4522ecbc6..c6a6e82da0 100644 --- a/tornado/autoreload.py +++ b/tornado/autoreload.py @@ -338,9 +338,7 @@ def main() -> None: # no longer in sys.modules. Figure out where it is and watch it. loader = pkgutil.get_loader(opts.module) if loader is not None and isinstance(loader, importlib.abc.FileLoader): - # TODO: fix when we update typeshed - watch(loader.get_filename()) # type: ignore - + watch(loader.get_filename()) if opts.until_success and not exit_status: return wait() From 7075748eb341c661bd2f4efeb88963442ce3f85e Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 26 Jul 2023 22:00:20 -0400 Subject: [PATCH 43/70] ioloop: Annotate run_in_executor returning Future, not Awaitable This required a recent update to typeshed/mypy. Fixes #3093 --- tornado/ioloop.py | 2 +- tornado/platform/asyncio.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tornado/ioloop.py b/tornado/ioloop.py index 450fbf9484..cdfb4a630f 100644 --- a/tornado/ioloop.py +++ b/tornado/ioloop.py @@ -711,7 +711,7 @@ def run_in_executor( executor: Optional[concurrent.futures.Executor], func: Callable[..., _T], *args: Any - ) -> Awaitable[_T]: + ) -> "Future[_T]": """Runs a function in a ``concurrent.futures.Executor``. If ``executor`` is ``None``, the IO loop's default executor will be used. diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 32fe860a66..8d16aa6d15 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -37,7 +37,6 @@ from typing import ( Any, - Awaitable, Callable, Dict, List, @@ -237,7 +236,7 @@ def run_in_executor( executor: Optional[concurrent.futures.Executor], func: Callable[..., _T], *args: Any, - ) -> Awaitable[_T]: + ) -> "asyncio.Future[_T]": return self.asyncio_loop.run_in_executor(executor, func, *args) def set_default_executor(self, executor: concurrent.futures.Executor) -> None: From f23d4a60523dab280ac1ec68d47cfa9fe5b58b54 Mon Sep 17 00:00:00 2001 From: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:40:02 +0900 Subject: [PATCH 44/70] Fix syntax error in docstring --- tornado/web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tornado/web.py b/tornado/web.py index 439e02c47b..333f736808 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -3135,7 +3135,7 @@ class FallbackHandler(RequestHandler): django.core.handlers.wsgi.WSGIHandler()) application = tornado.web.Application([ (r"/foo", FooHandler), - (r".*", FallbackHandler, dict(fallback=wsgi_app), + (r".*", FallbackHandler, dict(fallback=wsgi_app)), ]) """ From 418f63adb1b99a89b8ec2e9875c11a891068fcb6 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 8 Aug 2023 20:23:19 -0400 Subject: [PATCH 45/70] web_test: Fix open redirect test on windows Drive letters in windows absolute paths mess up this test, so remove them and use a path relative to the drive root instead. --- tornado/test/web_test.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index 7387124d17..c8dce68c80 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -1457,10 +1457,18 @@ def test_no_open_redirect(self): # This test verifies that the open redirect that affected some configurations # prior to Tornado 6.3.2 is no longer possible. The vulnerability required # a static_url_prefix of "/" and a default_filename (any value) to be set. - # The absolute server-side path to the static directory must also be known. + # The absolute* server-side path to the static directory must also be known. + # + # * Almost absolute: On windows, the drive letter is stripped from the path. + test_dir = os.path.dirname(__file__) + drive, tail = os.path.splitdrive(test_dir) + if os.name == "posix": + self.assertEqual(tail, test_dir) + else: + test_dir = tail with ExpectLog(gen_log, ".*cannot redirect path with two initial slashes"): response = self.fetch( - f"//evil.com/../{os.path.dirname(__file__)}/static/dir", + f"//evil.com/../{test_dir}/static/dir", follow_redirects=False, ) self.assertEqual(response.code, 403) From bf90f3a9ad150c45d9d17f189c3368e9a3b8e80f Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 8 Aug 2023 21:55:02 -0400 Subject: [PATCH 46/70] http1connection: Make content-length parsing more strict Content-length and chunk size parsing now strictly matches the RFCs. We previously used the python int() function which accepted leading plus signs and internal underscores, which are not allowed by the HTTP RFCs (it also accepts minus signs, but these are less problematic in this context since they'd result in errors elsewhere) It is important to fix this because when combined with certain proxies, the lax parsing could result in a request smuggling vulnerability (if both Tornado and the proxy accepted an invalid content-length but interpreted it differently). This is known to occur with old versions of haproxy, although the current version of haproxy is unaffected. --- tornado/http1connection.py | 27 +++++++- tornado/test/httpserver_test.py | 105 +++++++++++++++++++++++++++----- 2 files changed, 115 insertions(+), 17 deletions(-) diff --git a/tornado/http1connection.py b/tornado/http1connection.py index 5ca9168887..ca50e8ff55 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -442,7 +442,7 @@ def write_headers( ): self._expected_content_remaining = 0 elif "Content-Length" in headers: - self._expected_content_remaining = int(headers["Content-Length"]) + self._expected_content_remaining = parse_int(headers["Content-Length"]) else: self._expected_content_remaining = None # TODO: headers are supposed to be of type str, but we still have some @@ -618,7 +618,7 @@ def _read_body( headers["Content-Length"] = pieces[0] try: - content_length = int(headers["Content-Length"]) # type: Optional[int] + content_length: Optional[int] = parse_int(headers["Content-Length"]) except ValueError: # Handles non-integer Content-Length value. raise httputil.HTTPInputError( @@ -668,7 +668,10 @@ async def _read_chunked_body(self, delegate: httputil.HTTPMessageDelegate) -> No total_size = 0 while True: chunk_len_str = await self.stream.read_until(b"\r\n", max_bytes=64) - chunk_len = int(chunk_len_str.strip(), 16) + try: + chunk_len = parse_hex_int(native_str(chunk_len_str[:-2])) + except ValueError: + raise httputil.HTTPInputError("invalid chunk size") if chunk_len == 0: crlf = await self.stream.read_bytes(2) if crlf != b"\r\n": @@ -842,3 +845,21 @@ async def _server_request_loop( await asyncio.sleep(0) finally: delegate.on_close(self) + + +DIGITS = re.compile(r"[0-9]+") +HEXDIGITS = re.compile(r"[0-9a-fA-F]+") + + +def parse_int(s: str) -> int: + """Parse a non-negative integer from a string.""" + if DIGITS.fullmatch(s) is None: + raise ValueError("not an integer: %r" % s) + return int(s) + + +def parse_hex_int(s: str) -> int: + """Parse a non-negative hexadecimal integer from a string.""" + if HEXDIGITS.fullmatch(s) is None: + raise ValueError("not a hexadecimal integer: %r" % s) + return int(s, 16) diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index cd0a0e1004..db91d62daa 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -18,7 +18,7 @@ ) from tornado.iostream import IOStream from tornado.locks import Event -from tornado.log import gen_log +from tornado.log import gen_log, app_log from tornado.netutil import ssl_options_to_context from tornado.simple_httpclient import SimpleAsyncHTTPClient from tornado.testing import ( @@ -41,6 +41,7 @@ import ssl import sys import tempfile +import textwrap import unittest import urllib.parse from io import BytesIO @@ -118,7 +119,7 @@ class SSLTestMixin(object): def get_ssl_options(self): return dict( ssl_version=self.get_ssl_version(), - **AsyncHTTPSTestCase.default_ssl_options() + **AsyncHTTPSTestCase.default_ssl_options(), ) def get_ssl_version(self): @@ -558,23 +559,59 @@ def test_chunked_request_uppercase(self): ) self.assertEqual(json_decode(response), {"foo": ["bar"]}) - @gen_test - def test_invalid_content_length(self): - with ExpectLog( - gen_log, ".*Only integer Content-Length is allowed", level=logging.INFO - ): - self.stream.write( - b"""\ + def test_chunked_request_body_invalid_size(self): + # Only hex digits are allowed in chunk sizes. Python's int() function + # also accepts underscores, so make sure we reject them here. + self.stream.write( + b"""\ POST /echo HTTP/1.1 -Content-Length: foo +Transfer-Encoding: chunked -bar +1_a +1234567890abcdef1234567890 +0 """.replace( - b"\n", b"\r\n" - ) + b"\n", b"\r\n" ) - yield self.stream.read_until_close() + ) + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(400, start_line.code) + + @gen_test + def test_invalid_content_length(self): + # HTTP only allows decimal digits in content-length. Make sure we don't + # accept anything else, with special attention to things accepted by the + # python int() function (leading plus signs and internal underscores). + test_cases = [ + ("alphabetic", "foo"), + ("leading plus", "+10"), + ("internal underscore", "1_0"), + ] + for name, value in test_cases: + with self.subTest(name=name), closing(IOStream(socket.socket())) as stream: + with ExpectLog( + gen_log, + ".*Only integer Content-Length is allowed", + level=logging.INFO, + ): + yield stream.connect(("127.0.0.1", self.get_http_port())) + stream.write( + utf8( + textwrap.dedent( + f"""\ + POST /echo HTTP/1.1 + Content-Length: {value} + Connection: close + + 1234567890 + """ + ).replace("\n", "\r\n") + ) + ) + yield stream.read_until_close() class XHeaderTest(HandlerBaseTestCase): @@ -1123,6 +1160,46 @@ def body_producer(write): ) +class InvalidOutputContentLengthTest(AsyncHTTPTestCase): + class MessageDelegate(HTTPMessageDelegate): + def __init__(self, connection): + self.connection = connection + + def headers_received(self, start_line, headers): + content_lengths = { + "normal": "10", + "alphabetic": "foo", + "leading plus": "+10", + "underscore": "1_0", + } + self.connection.write_headers( + ResponseStartLine("HTTP/1.1", 200, "OK"), + HTTPHeaders({"Content-Length": content_lengths[headers["x-test"]]}), + ) + self.connection.write(b"1234567890") + self.connection.finish() + + def get_app(self): + class App(HTTPServerConnectionDelegate): + def start_request(self, server_conn, request_conn): + return InvalidOutputContentLengthTest.MessageDelegate(request_conn) + + return App() + + def test_invalid_output_content_length(self): + with self.subTest("normal"): + response = self.fetch("/", method="GET", headers={"x-test": "normal"}) + response.rethrow() + self.assertEqual(response.body, b"1234567890") + for test in ["alphabetic", "leading plus", "underscore"]: + with self.subTest(test): + # This log matching could be tighter but I think I'm already + # over-testing here. + with ExpectLog(app_log, "Uncaught exception"): + with self.assertRaises(HTTPError): + self.fetch("/", method="GET", headers={"x-test": test}) + + class MaxHeaderSizeTest(AsyncHTTPTestCase): def get_app(self): return Application([("/", HelloWorldRequestHandler)]) From 4aaa6d4b92d84a7eccdaba14f5b1b03cd891a5cf Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 10 Aug 2023 21:41:40 -0400 Subject: [PATCH 47/70] httpserver_test: Add ExpectLog to fix CI The github security advisory feature lets you make private PRs but it apparently doesn't support CI so this log failure wasn't caught until after the PR was merged. --- tornado/test/httpserver_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index db91d62daa..1faf63fabf 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -575,9 +575,10 @@ def test_chunked_request_body_invalid_size(self): b"\n", b"\r\n" ) ) - start_line, headers, response = self.io_loop.run_sync( - lambda: read_stream_body(self.stream) - ) + with ExpectLog(gen_log, ".*invalid chunk size", level=logging.INFO): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) self.assertEqual(400, start_line.code) @gen_test From d8502489b5343aa435fcd13ea418ab09d1f04326 Mon Sep 17 00:00:00 2001 From: Chengzzzi <36990436+TnTomato@users.noreply.github.com> Date: Fri, 11 Aug 2023 17:32:59 +0800 Subject: [PATCH 48/70] Fix WebSocketClientConnection parameter --- tornado/websocket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tornado/websocket.py b/tornado/websocket.py index 165cc316d6..fbfd700887 100644 --- a/tornado/websocket.py +++ b/tornado/websocket.py @@ -1357,7 +1357,7 @@ def __init__( ping_interval: Optional[float] = None, ping_timeout: Optional[float] = None, max_message_size: int = _default_max_message_size, - subprotocols: Optional[List[str]] = [], + subprotocols: Optional[List[str]] = None, resolver: Optional[Resolver] = None, ) -> None: self.connect_future = Future() # type: Future[WebSocketClientConnection] From a303ab449a0b63421b41cf351915777fa3468a83 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 10 Aug 2023 22:38:19 -0400 Subject: [PATCH 49/70] Release notes for 6.3.3 --- docs/releases.rst | 1 + docs/releases/v6.3.3.rst | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 docs/releases/v6.3.3.rst diff --git a/docs/releases.rst b/docs/releases.rst index fc7e41654f..076ac86331 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 2 + releases/v6.3.3 releases/v6.3.2 releases/v6.3.1 releases/v6.3.0 diff --git a/docs/releases/v6.3.3.rst b/docs/releases/v6.3.3.rst new file mode 100644 index 0000000000..7fe0110fda --- /dev/null +++ b/docs/releases/v6.3.3.rst @@ -0,0 +1,12 @@ +What's new in Tornado 6.3.3 +=========================== + +Aug 11, 2023 +------------ + +Security improvements +~~~~~~~~~~~~~~~~~~~~~ + +- The ``Content-Length`` header and ``chunked`` ``Transfer-Encoding`` sizes are now parsed + more strictly (according to the relevant RFCs) to avoid potential request-smuggling + vulnerabilities when deployed behind certain proxies. From b91476242b3169623dfa953e86bde9404afe8ae2 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 12 Aug 2023 20:42:15 -0400 Subject: [PATCH 50/70] httpclient_test: Increase test_destructor_log timeout This test has recently become flaky on windows CI, and before investigating further, see if it's just because the CI machines are overloaded and subprocesses are slower on windows. --- tornado/test/httpclient_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tornado/test/httpclient_test.py b/tornado/test/httpclient_test.py index a41040e64a..31a1916199 100644 --- a/tornado/test/httpclient_test.py +++ b/tornado/test/httpclient_test.py @@ -853,7 +853,7 @@ def test_destructor_log(self): stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, - timeout=5, + timeout=15, ) if proc.stdout: print("STDOUT:") From ddb7e88afd4501e34c5c3bcebd10265f89aa0cf9 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 21 Aug 2023 22:20:34 -0400 Subject: [PATCH 51/70] test: Refactor circlerefs script into a test This script was only ever run irregularly on its own; bring it in to the test suite so it can be run automatically. --- maint/circlerefs/circlerefs.py | 107 ------------------- tornado/test/circlerefs_test.py | 183 ++++++++++++++++++++++++++++++++ tornado/test/runtests.py | 1 + 3 files changed, 184 insertions(+), 107 deletions(-) delete mode 100755 maint/circlerefs/circlerefs.py create mode 100644 tornado/test/circlerefs_test.py diff --git a/maint/circlerefs/circlerefs.py b/maint/circlerefs/circlerefs.py deleted file mode 100755 index bd8214aa82..0000000000 --- a/maint/circlerefs/circlerefs.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -"""Test script to find circular references. - -Circular references are not leaks per se, because they will eventually -be GC'd. However, on CPython, they prevent the reference-counting fast -path from being used and instead rely on the slower full GC. This -increases memory footprint and CPU overhead, so we try to eliminate -circular references created by normal operation. -""" - -import gc -import traceback -import types -from tornado import web, ioloop, gen, httpclient - - -def find_circular_references(garbage=None): - def inner(level): - for item in level: - item_id = id(item) - if item_id not in garbage_ids: - continue - if item_id in visited_ids: - continue - if item_id in stack_ids: - candidate = stack[stack.index(item):] - candidate.append(item) - found.append(candidate) - continue - - stack.append(item) - stack_ids.add(item_id) - inner(gc.get_referents(item)) - stack.pop() - stack_ids.remove(item_id) - visited_ids.add(item_id) - - garbage = garbage or gc.garbage - found = [] - stack = [] - stack_ids = set() - garbage_ids = set(map(id, garbage)) - visited_ids = set() - - inner(garbage) - inner = None - return found - - -class CollectHandler(web.RequestHandler): - @gen.coroutine - def get(self): - self.write("Collected: {}\n".format(gc.collect())) - self.write("Garbage: {}\n".format(len(gc.garbage))) - for circular in find_circular_references(): - print('\n==========\n Circular \n==========') - for item in circular: - print(' ', repr(item)) - for item in circular: - if isinstance(item, types.FrameType): - print('\nLocals:', item.f_locals) - print('\nTraceback:', repr(item)) - traceback.print_stack(item) - - -class DummyHandler(web.RequestHandler): - @gen.coroutine - def get(self): - self.write('ok\n') - - -class DummyAsyncHandler(web.RequestHandler): - @gen.coroutine - def get(self): - raise web.Finish('ok\n') - - -application = web.Application([ - (r'/dummy/', DummyHandler), - (r'/dummyasync/', DummyAsyncHandler), - (r'/collect/', CollectHandler), -], debug=True) - - -@gen.coroutine -def main(): - gc.disable() - gc.collect() - gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_SAVEALL) - print('GC disabled') - - print("Start on 8888") - application.listen(8888, '127.0.0.1') - - # Do a little work. Alternately, could leave this script running and - # poke at it with a browser. - client = httpclient.AsyncHTTPClient() - yield client.fetch('http://127.0.0.1:8888/dummy/') - yield client.fetch('http://127.0.0.1:8888/dummyasync/', raise_error=False) - - # Now report on the results. - resp = yield client.fetch('http://127.0.0.1:8888/collect/') - print(resp.body) - - -if __name__ == "__main__": - ioloop.IOLoop.current().run_sync(main) diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py new file mode 100644 index 0000000000..2502c5fbac --- /dev/null +++ b/tornado/test/circlerefs_test.py @@ -0,0 +1,183 @@ +"""Test script to find circular references. + +Circular references are not leaks per se, because they will eventually +be GC'd. However, on CPython, they prevent the reference-counting fast +path from being used and instead rely on the slower full GC. This +increases memory footprint and CPU overhead, so we try to eliminate +circular references created by normal operation. +""" + +import asyncio +import contextlib +import gc +import io +import sys +import traceback +import types +import typing +import unittest + +import tornado +from tornado import web, gen, httpclient + + +def find_circular_references(garbage): + """Find circular references in a list of objects. + + The garbage list contains objects that participate in a cycle, + but also the larger set of objects kept alive by that cycle. + This function finds subsets of those objects that make up + the cycle(s). + """ + + def inner(level): + for item in level: + item_id = id(item) + if item_id not in garbage_ids: + continue + if item_id in visited_ids: + continue + if item_id in stack_ids: + candidate = stack[stack.index(item) :] + candidate.append(item) + found.append(candidate) + continue + + stack.append(item) + stack_ids.add(item_id) + inner(gc.get_referents(item)) + stack.pop() + stack_ids.remove(item_id) + visited_ids.add(item_id) + + found: typing.List[object] = [] + stack = [] + stack_ids = set() + garbage_ids = set(map(id, garbage)) + visited_ids = set() + + inner(garbage) + return found + + +@contextlib.contextmanager +def assert_no_cycle_garbage(): + """Raise AssertionError if the wrapped code creates garbage with cycles.""" + gc.disable() + gc.collect() + gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_SAVEALL) + yield + try: + # We have DEBUG_STATS on which causes gc.collect to write to stderr. + # Capture the output instead of spamming the logs on passing runs. + f = io.StringIO() + old_stderr = sys.stderr + sys.stderr = f + try: + gc.collect() + finally: + sys.stderr = old_stderr + garbage = gc.garbage[:] + # Must clear gc.garbage (the same object, not just replacing it with a + # new list) to avoid warnings at shutdown. + gc.garbage[:] = [] + if len(garbage) == 0: + return + for circular in find_circular_references(garbage): + f.write("\n==========\n Circular \n==========") + for item in circular: + f.write(f"\n {repr(item)}") + for item in circular: + if isinstance(item, types.FrameType): + f.write(f"\nLocals: {item.f_locals}") + f.write(f"\nTraceback: {repr(item)}") + traceback.print_stack(item) + del garbage + raise AssertionError(f.getvalue()) + finally: + gc.set_debug(0) + gc.enable() + + +class CircleRefsTest(unittest.TestCase): + def test_known_leak(self): + # Construct a known leak scenario to make sure the test harness works. + class C(object): + def __init__(self, name): + self.name = name + self.a: typing.Optional[C] = None + self.b: typing.Optional[C] = None + self.c: typing.Optional[C] = None + + def __repr__(self): + return f"name={self.name}" + + with self.assertRaises(AssertionError) as cm: + with assert_no_cycle_garbage(): + # a and b form a reference cycle. c is not part of the cycle, + # but it cannot be GC'd while a and b are alive. + a = C("a") + b = C("b") + c = C("c") + a.b = b + a.c = c + b.a = a + b.c = c + del a, b + self.assertIn("Circular", str(cm.exception)) + self.assertIn("name=a", str(cm.exception)) + self.assertIn("name=b", str(cm.exception)) + self.assertNotIn("name=c", str(cm.exception)) + + async def run_handler(self, handler_class): + app = web.Application( + [ + (r"/", handler_class), + ] + ) + socket, port = tornado.testing.bind_unused_port() + server = tornado.httpserver.HTTPServer(app) + server.add_socket(socket) + + client = httpclient.AsyncHTTPClient() + with assert_no_cycle_garbage(): + # Only the fetch (and the corresponding server-side handler) + # are being tested for cycles. In particular, the Application + # object has internal cycles (as of this writing) which we don't + # care to fix since in real world usage the Application object + # is effectively a global singleton. + await client.fetch(f"http://127.0.0.1:{port}/") + client.close() + server.stop() + socket.close() + + def test_sync_handler(self): + class Handler(web.RequestHandler): + def get(self): + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_finish_exception_handler(self): + class Handler(web.RequestHandler): + def get(self): + raise web.Finish("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_coro_handler(self): + class Handler(web.RequestHandler): + @gen.coroutine + def get(self): + yield asyncio.sleep(0.01) + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_async_handler(self): + class Handler(web.RequestHandler): + async def get(self): + await asyncio.sleep(0.01) + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) diff --git a/tornado/test/runtests.py b/tornado/test/runtests.py index 58cecd383e..f35b372545 100644 --- a/tornado/test/runtests.py +++ b/tornado/test/runtests.py @@ -22,6 +22,7 @@ "tornado.test.asyncio_test", "tornado.test.auth_test", "tornado.test.autoreload_test", + "tornado.test.circlerefs_test", "tornado.test.concurrent_test", "tornado.test.curl_httpclient_test", "tornado.test.escape_test", From 1d5fc98018bd1d742b1608634bfb57f6e3d990f0 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 21 Aug 2023 23:03:39 -0400 Subject: [PATCH 52/70] ioloop,concurrent: Fix reference cycles In a few places we were referring to a future via a closure instead of using the reference passed as an argument to the callback. This sometimes causes a reference cycle that can slow GC. This commit adds a test which covers two of the cases (chain_future and the concurrent.future branch of add_future) while the third was found by inspecting other calls to add_done_callback for obvious instances of this pattern. Fixes #2620 --- tornado/concurrent.py | 3 +-- tornado/ioloop.py | 6 ++---- tornado/test/circlerefs_test.py | 26 ++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/tornado/concurrent.py b/tornado/concurrent.py index 6e05346b56..f0bbf62317 100644 --- a/tornado/concurrent.py +++ b/tornado/concurrent.py @@ -150,8 +150,7 @@ def chain_future(a: "Future[_T]", b: "Future[_T]") -> None: """ - def copy(future: "Future[_T]") -> None: - assert future is a + def copy(a: "Future[_T]") -> None: if b.done(): return if hasattr(a, "exc_info") and a.exc_info() is not None: # type: ignore diff --git a/tornado/ioloop.py b/tornado/ioloop.py index cdfb4a630f..3fb1359aae 100644 --- a/tornado/ioloop.py +++ b/tornado/ioloop.py @@ -696,15 +696,13 @@ def add_future( # the error logging (i.e. it goes to tornado.log.app_log # instead of asyncio's log). future.add_done_callback( - lambda f: self._run_callback(functools.partial(callback, future)) + lambda f: self._run_callback(functools.partial(callback, f)) ) else: assert is_future(future) # For concurrent futures, we use self.add_callback, so # it's fine if future_add_done_callback inlines that call. - future_add_done_callback( - future, lambda f: self.add_callback(callback, future) - ) + future_add_done_callback(future, lambda f: self.add_callback(callback, f)) def run_in_executor( self, diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py index 2502c5fbac..3908588c59 100644 --- a/tornado/test/circlerefs_test.py +++ b/tornado/test/circlerefs_test.py @@ -181,3 +181,29 @@ async def get(self): self.write("ok\n") asyncio.run(self.run_handler(Handler)) + + def test_run_on_executor(self): + # From https://github.com/tornadoweb/tornado/issues/2620 + # + # When this test was introduced it found cycles in IOLoop.add_future + # and tornado.concurrent.chain_future. + import concurrent.futures + + thread_pool = concurrent.futures.ThreadPoolExecutor(1) + + class Factory(object): + executor = thread_pool + + @tornado.concurrent.run_on_executor + def run(self): + return None + + factory = Factory() + + async def main(): + # The cycle is not reported on the first call. It's not clear why. + for i in range(2): + await factory.run() + + with assert_no_cycle_garbage(): + asyncio.run(main()) From e80e9441b8777b2dbe78de5dd783bb6fcd3d00d5 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 21 Aug 2023 23:14:49 -0400 Subject: [PATCH 53/70] test: Fix circlerefs test on python 3.10 and before Local/attribute dicts are reported a bit differently here. --- tornado/test/circlerefs_test.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py index 3908588c59..49b53f96f6 100644 --- a/tornado/test/circlerefs_test.py +++ b/tornado/test/circlerefs_test.py @@ -125,9 +125,14 @@ def __repr__(self): b.c = c del a, b self.assertIn("Circular", str(cm.exception)) - self.assertIn("name=a", str(cm.exception)) - self.assertIn("name=b", str(cm.exception)) - self.assertNotIn("name=c", str(cm.exception)) + # Leading spaces ensure we only catch these at the beginning of a line, meaning they are a + # cycle participant and not simply the contents of a locals dict or similar container. (This + # depends on the formatting above which isn't ideal but this test evolved from a + # command-line script) Note that the behavior here changed in python 3.11; in newer pythons + # locals are handled a bit differently and the test passes without the spaces. + self.assertIn(" name=a", str(cm.exception)) + self.assertIn(" name=b", str(cm.exception)) + self.assertNotIn(" name=c", str(cm.exception)) async def run_handler(self, handler_class): app = web.Application( From 6f8921191b4b214a752739465cabfff8165c474a Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 22 Aug 2023 09:28:13 -0400 Subject: [PATCH 54/70] test: Skip circlerefs test on pypy Pypy doesn't have the same refcount fast-path as cpython so the gc behavior is different and this test is irrelevant. --- tornado/test/circlerefs_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py index 49b53f96f6..5c71858ff7 100644 --- a/tornado/test/circlerefs_test.py +++ b/tornado/test/circlerefs_test.py @@ -19,6 +19,7 @@ import tornado from tornado import web, gen, httpclient +from tornado.test.util import skipNotCPython def find_circular_references(garbage): @@ -99,6 +100,8 @@ def assert_no_cycle_garbage(): gc.enable() +# GC behavior is cpython-specific +@skipNotCPython class CircleRefsTest(unittest.TestCase): def test_known_leak(self): # Construct a known leak scenario to make sure the test harness works. From 37770e259192070758d25992bb3daa0de4ec5a88 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 22 Aug 2023 20:01:34 -0400 Subject: [PATCH 55/70] docs: Fix build after readthedocs/readthedocs.org#10638 The old magic for sphinx_rtd_theme has been removed; now we should handle the theme in the same way we do for local builds. --- docs/conf.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index bff33661fe..424d844ee6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -2,6 +2,8 @@ import sphinx.errors import sys +import sphinx_rtd_theme + # Ensure we get the local copy of tornado instead of what's on the standard path sys.path.insert(0, os.path.abspath("..")) import tornado @@ -84,16 +86,8 @@ intersphinx_mapping = {"python": ("https://docs.python.org/3/", None)} -on_rtd = os.environ.get("READTHEDOCS", None) == "True" - -# On RTD we can't import sphinx_rtd_theme, but it will be applied by -# default anyway. This block will use the same theme when building locally -# as on RTD. -if not on_rtd: - import sphinx_rtd_theme - - html_theme = "sphinx_rtd_theme" - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Suppress warnings about "class reference target not found" for these types. # In most cases these types come from type annotations and are for mypy's use. From 129d11e071dfb1d6f7cf76b1a169b5ba842cc25c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 22 Aug 2023 21:27:05 -0400 Subject: [PATCH 56/70] escape: Use the standard library where possible Many of these functions were necessary in Python 2, but are now redundant. We can simply use the standard library in many cases. The only major change is in xhtml_unescape, where we now reject invalid character references such as surrogates and control characters. Update docs throughout to be more specific about differences from the standard library. Also be more complete about the ``plus`` option to the url escaping functions. Fixes #3186 --- tornado/escape.py | 113 ++++++++++++++++++------------------ tornado/test/escape_test.py | 2 +- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/tornado/escape.py b/tornado/escape.py index 55354c30f4..af2eb59b49 100644 --- a/tornado/escape.py +++ b/tornado/escape.py @@ -17,9 +17,15 @@ Also includes a few other miscellaneous string manipulation functions that have crept in over time. + +Many functions in this module have near-equivalents in the standard library +(the differences mainly relate to handling of bytes and unicode strings, +and were more relevant in Python 2). In new code, the standard library +functions are encouraged instead of this module where applicable. See the +docstrings on each function for details. """ -import html.entities +import html import json import re import urllib.parse @@ -30,16 +36,6 @@ from typing import Union, Any, Optional, Dict, List, Callable -_XHTML_ESCAPE_RE = re.compile("[&<>\"']") -_XHTML_ESCAPE_DICT = { - "&": "&", - "<": "<", - ">": ">", - '"': """, - "'": "'", -} - - def xhtml_escape(value: Union[str, bytes]) -> str: """Escapes a string so it is valid within HTML or XML. @@ -47,25 +43,50 @@ def xhtml_escape(value: Union[str, bytes]) -> str: When used in attribute values the escaped strings must be enclosed in quotes. + Equivalent to `html.escape` except that this function always returns + type `str` while `html.escape` returns `bytes` if its input is `bytes`. + .. versionchanged:: 3.2 Added the single quote to the list of escaped characters. + + .. versionchanged:: 6.4 + + Now simply wraps `html.escape`. This is equivalent to the old behavior + except that single quotes are now escaped as ``'`` instead of + ``'`` and performance may be different. """ - return _XHTML_ESCAPE_RE.sub( - lambda match: _XHTML_ESCAPE_DICT[match.group(0)], to_basestring(value) - ) + return html.escape(to_unicode(value)) def xhtml_unescape(value: Union[str, bytes]) -> str: - """Un-escapes an XML-escaped string.""" - return re.sub(r"&(#?)(\w+?);", _convert_entity, _unicode(value)) + """Un-escapes an XML-escaped string. + + Equivalent to `html.unescape` except that this function always returns + type `str` while `html.unescape` returns `bytes` if its input is `bytes`. + + .. versionchanged:: 6.4 + + Now simply wraps `html.unescape`. This changes behavior for some inputs + as required by the HTML 5 specification + https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state + + Some invalid inputs such as surrogates now raise an error, and numeric + references to certain ISO-8859-1 characters are now handled correctly. + """ + return html.unescape(to_unicode(value)) # The fact that json_encode wraps json.dumps is an implementation detail. # Please see https://github.com/tornadoweb/tornado/pull/706 # before sending a pull request that adds **kwargs to this function. def json_encode(value: Any) -> str: - """JSON-encodes the given Python object.""" + """JSON-encodes the given Python object. + + Equivalent to `json.dumps` with the additional guarantee that the output + will never contain the character sequence ```` tag. + """ # JSON permits but does not require forward slashes to be escaped. # This is useful when json data is emitted in a tags from prematurely terminating @@ -78,9 +99,9 @@ def json_encode(value: Any) -> str: def json_decode(value: Union[str, bytes]) -> Any: """Returns Python objects for the given JSON string. - Supports both `str` and `bytes` inputs. + Supports both `str` and `bytes` inputs. Equvalent to `json.loads`. """ - return json.loads(to_basestring(value)) + return json.loads(value) def squeeze(value: str) -> str: @@ -91,16 +112,20 @@ def squeeze(value: str) -> str: def url_escape(value: Union[str, bytes], plus: bool = True) -> str: """Returns a URL-encoded version of the given value. - If ``plus`` is true (the default), spaces will be represented - as "+" instead of "%20". This is appropriate for query strings - but not for the path component of a URL. Note that this default - is the reverse of Python's urllib module. + Equivalent to either `urllib.parse.quote_plus` or `urllib.parse.quote` depending on the ``plus`` + argument. + + If ``plus`` is true (the default), spaces will be represented as ``+`` and slashes will be + represented as ``%2F``. This is appropriate for query strings. If ``plus`` is false, spaces + will be represented as ``%20`` and slashes are left as-is. This is appropriate for the path + component of a URL. Note that the default of ``plus=True`` is effectively the + reverse of Python's urllib module. .. versionadded:: 3.1 The ``plus`` argument """ quote = urllib.parse.quote_plus if plus else urllib.parse.quote - return quote(utf8(value)) + return quote(value) @typing.overload @@ -122,14 +147,15 @@ def url_unescape( # noqa: F811 The argument may be either a byte or unicode string. - If encoding is None, the result will be a byte string. Otherwise, - the result is a unicode string in the specified encoding. + If encoding is None, the result will be a byte string and this function is equivalent to + `urllib.parse.unquote_to_bytes` if ``plus=False``. Otherwise, the result is a unicode string in + the specified encoding and this function is equivalent to either `urllib.parse.unquote_plus` or + `urllib.parse.unquote` except that this function also accepts `bytes` as input. - If ``plus`` is true (the default), plus signs will be interpreted - as spaces (literal plus signs must be represented as "%2B"). This - is appropriate for query strings and form-encoded values but not - for the path component of a URL. Note that this default is the - reverse of Python's urllib module. + If ``plus`` is true (the default), plus signs will be interpreted as spaces (literal plus signs + must be represented as "%2B"). This is appropriate for query strings and form-encoded values + but not for the path component of a URL. Note that this default is the reverse of Python's + urllib module. .. versionadded:: 3.1 The ``plus`` argument @@ -375,28 +401,3 @@ def make_link(m: typing.Match) -> str: # that we won't pick up ", etc. text = _unicode(xhtml_escape(text)) return _URL_RE.sub(make_link, text) - - -def _convert_entity(m: typing.Match) -> str: - if m.group(1) == "#": - try: - if m.group(2)[:1].lower() == "x": - return chr(int(m.group(2)[1:], 16)) - else: - return chr(int(m.group(2))) - except ValueError: - return "&#%s;" % m.group(2) - try: - return _HTML_UNICODE_MAP[m.group(2)] - except KeyError: - return "&%s;" % m.group(2) - - -def _build_unicode_map() -> Dict[str, str]: - unicode_map = {} - for name, value in html.entities.name2codepoint.items(): - unicode_map[name] = chr(value) - return unicode_map - - -_HTML_UNICODE_MAP = _build_unicode_map() diff --git a/tornado/test/escape_test.py b/tornado/test/escape_test.py index a90d11d663..6bd2ae79e4 100644 --- a/tornado/test/escape_test.py +++ b/tornado/test/escape_test.py @@ -220,7 +220,7 @@ def test_xhtml_escape(self): ("", "<foo>"), ("", "<foo>"), (b"", b"<foo>"), - ("<>&\"'", "<>&"'"), + ("<>&\"'", "<>&"'"), ("&", "&amp;"), ("<\u00e9>", "<\u00e9>"), (b"<\xc3\xa9>", b"<\xc3\xa9>"), From 5f1cc0e97ddc9ed3662581e744c0e44b92ff0326 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 22 Aug 2023 21:33:44 -0400 Subject: [PATCH 57/70] escape: Remove noqa: F811 tags from overloads Newer versions of pyflakes no longer require this. --- tornado/escape.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tornado/escape.py b/tornado/escape.py index af2eb59b49..84abfca604 100644 --- a/tornado/escape.py +++ b/tornado/escape.py @@ -61,7 +61,7 @@ def xhtml_escape(value: Union[str, bytes]) -> str: def xhtml_unescape(value: Union[str, bytes]) -> str: """Un-escapes an XML-escaped string. - + Equivalent to `html.unescape` except that this function always returns type `str` while `html.unescape` returns `bytes` if its input is `bytes`. @@ -82,7 +82,7 @@ def xhtml_unescape(value: Union[str, bytes]) -> str: # before sending a pull request that adds **kwargs to this function. def json_encode(value: Any) -> str: """JSON-encodes the given Python object. - + Equivalent to `json.dumps` with the additional guarantee that the output will never contain the character sequence ```` tag. @@ -113,8 +113,8 @@ def url_escape(value: Union[str, bytes], plus: bool = True) -> str: """Returns a URL-encoded version of the given value. Equivalent to either `urllib.parse.quote_plus` or `urllib.parse.quote` depending on the ``plus`` - argument. - + argument. + If ``plus`` is true (the default), spaces will be represented as ``+`` and slashes will be represented as ``%2F``. This is appropriate for query strings. If ``plus`` is false, spaces will be represented as ``%20`` and slashes are left as-is. This is appropriate for the path @@ -133,14 +133,14 @@ def url_unescape(value: Union[str, bytes], encoding: None, plus: bool = True) -> pass -@typing.overload # noqa: F811 +@typing.overload def url_unescape( value: Union[str, bytes], encoding: str = "utf-8", plus: bool = True ) -> str: pass -def url_unescape( # noqa: F811 +def url_unescape( value: Union[str, bytes], encoding: Optional[str] = "utf-8", plus: bool = True ) -> Union[str, bytes]: """Decodes the given value from a URL. @@ -201,17 +201,17 @@ def utf8(value: bytes) -> bytes: pass -@typing.overload # noqa: F811 +@typing.overload def utf8(value: str) -> bytes: pass -@typing.overload # noqa: F811 +@typing.overload def utf8(value: None) -> None: pass -def utf8(value: Union[None, str, bytes]) -> Optional[bytes]: # noqa: F811 +def utf8(value: Union[None, str, bytes]) -> Optional[bytes]: """Converts a string argument to a byte string. If the argument is already a byte string or None, it is returned unchanged. @@ -232,17 +232,17 @@ def to_unicode(value: str) -> str: pass -@typing.overload # noqa: F811 +@typing.overload def to_unicode(value: bytes) -> str: pass -@typing.overload # noqa: F811 +@typing.overload def to_unicode(value: None) -> None: pass -def to_unicode(value: Union[None, str, bytes]) -> Optional[str]: # noqa: F811 +def to_unicode(value: Union[None, str, bytes]) -> Optional[str]: """Converts a string argument to a unicode string. If the argument is already a unicode string or None, it is returned From 01e7066c32ecefa0741e03503431f52e4967ff75 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 26 Sep 2023 20:16:00 -0400 Subject: [PATCH 58/70] docs: Release notes for 6.4.0 --- docs/releases.rst | 1 + docs/releases/v6.4.0.rst | 91 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 docs/releases/v6.4.0.rst diff --git a/docs/releases.rst b/docs/releases.rst index 076ac86331..da8dd597a8 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 2 + releases/v6.4.0 releases/v6.3.3 releases/v6.3.2 releases/v6.3.1 diff --git a/docs/releases/v6.4.0.rst b/docs/releases/v6.4.0.rst new file mode 100644 index 0000000000..62cd4ac7e4 --- /dev/null +++ b/docs/releases/v6.4.0.rst @@ -0,0 +1,91 @@ +What's new in Tornado 6.4.0 +=========================== + +In Progress +----------- + +General Changes +~~~~~~~~~~~~~~~ + +- Python 3.12 is now supported. Older versions of Tornado will work on Python 3.12 but may log + deprecation warnings. + +Deprecation Notices +~~~~~~~~~~~~~~~~~~~ + +- `.IOLoop.add_callback_from_signal` is suspected to have been broken since Tornado 5.0 and will be + removed in version 7.0. Use `asyncio.loop.add_signal_handler` instead. +- The ``client_secret`` argument to `.OAuth2Mixin.authorize_redirect` is deprecated and will be + removed in Tornado 7.0. This argument has never been used and other similar methods in this module + don't have it. +- `.TwitterMixin` is deprecated and will be removed in the future. + +``tornado.auth`` +~~~~~~~~~~~~~~~~ + +- The ``client_secret`` argument to `.OAuth2Mixin.authorize_redirect` is deprecated and will be + removed in Tornado 7.0. This argument has never been used and other similar methods in this module + don't have it. +- `.TwitterMixin` is deprecated and will be removed in the future. + +``tornado.autoreload`` +~~~~~~~~~~~~~~~~~~~~~~ + +- Autoreload can now be used when the program is run as a directory rather than a file or module. +- New CLI flag ``--until-success`` re-runs the program on any failure but stops after the first + successful run. + +``tornado.concurrent`` +~~~~~~~~~~~~~~~~~~~~~~ + +- Fixed reference cycles that could lead to increased memory usage. + +``tornado.escape`` +~~~~~~~~~~~~~~~~~~ + +- Several methods in this module now simply pass through to their equivalents in the standard + library. + +``tornado.gen`` +~~~~~~~~~~~~~~~ + +- This module now holds a strong reference to all running `asyncio.Task` objects it creates. This + prevents premature garbage collection which could cause warnings like "Task was destroyed but it + is pending!". + +``tornado.ioloop`` +~~~~~~~~~~~~~~~~~~ + +- `.IOLoop.add_callback_from_signal` is suspected to have been broken since Tornado 5.0 and will be + removed in version 7.0. Use `asyncio.loop.add_signal_handler` instead. +- The type annotation for `.IOLoop.run_in_executor` has been updated to match the updated signature + of `asyncio.loop.run_in_executor`. +- Fixed reference cycles that could lead to increased memory usage. + +``tornado.locale`` +~~~~~~~~~~~~~~~~~~ + +- `.format_timestamp` now supports "aware" datetime objects. + +``tornado.platform.asyncio`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The shutdown protocol for `.AddThreadSelectorEventLoop` now requires the use of `asyncio.run` or + `asyncio.loop.shutdown_asyncgens` to avoid leaking the thread. +- Introduced `.SelectorThread` class containing the core functionality of + `.AddThreadSelectorEventLoop`. +- The ``close()`` method of `.AddThreadSelectorEventLoop` is now idempotent. + +``tornado.web`` +~~~~~~~~~~~~~~~ + +- `.StaticFileHandler.get_modified_time` now supports "aware" datetime objects and the default + implementation now returns aware objects. + +``tornado.websocket`` +~~~~~~~~~~~~~~~~~~~~~ + +- Unclosed client connections now reliably log a warning. Previously the warning was dependent on + garbage collection and whether the ``ping_interval`` option was used. +- The ``subprotocols`` argument to `.WebSocketClientConnection` now defaults to None instead of an + empty list (which was mutable and reused) From 0cc2b92bd61d9ac608ac4148c22b2f41d30bf803 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 26 Sep 2023 20:52:14 -0400 Subject: [PATCH 59/70] Set version number to 6.4b1 --- tornado/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tornado/__init__.py b/tornado/__init__.py index 3df733ad98..ae39bb59b2 100644 --- a/tornado/__init__.py +++ b/tornado/__init__.py @@ -22,8 +22,8 @@ # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "6.4.dev1" -version_info = (6, 4, 0, -100) +version = "6.4b1" +version_info = (6, 4, 0, -99) import importlib import typing From f48682c90cb6fea8d050ead3368c04b9b9c1699c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sat, 30 Sep 2023 15:39:53 -0400 Subject: [PATCH 60/70] ci: Add windows to the main test config I've caused enough broken builds that aren't caught until the final release pipeline; time to add this to the main test config. --- .github/workflows/test.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e88a3e9051..b26d4ef902 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,4 +74,22 @@ jobs: - name: Run test suite run: python -m tox -e ${{ matrix.tox_env }} - + + test_win: + # Windows tests are fairly slow, so only run one configuration here. + # We test on windows but not mac because even though mac is a more + # fully-supported platform, it's similar enough to linux that we + # don't generally need to test it separately. Windows is different + # enough that we'll break it if we don't test it in CI. + name: Run windows tests + needs: test_quick + runs-on: windows-2022 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + name: Install Python + with: + python-version: '3.11' + - name: Run test suite + # TODO: figure out what's up with these log messages + run: py -m tornado.test --fail-if-logs=false From e55f400288ed263410133fd4e710b500d1b1e24c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 01:47:22 +0000 Subject: [PATCH 61/70] build(deps): bump urllib3 from 1.26.14 to 1.26.17 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.14 to 1.26.17. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.14...1.26.17) --- updated-dependencies: - dependency-name: urllib3 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7543b10f99..4934683dfc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -114,7 +114,7 @@ types-pycurl==7.45.2.0 # via -r requirements.in typing-extensions==4.4.0 # via mypy -urllib3==1.26.14 +urllib3==1.26.17 # via requests virtualenv==20.23.0 # via tox From 1a60c488cd90210f251b1ad3cf4e0c0597886846 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 13 Oct 2023 21:27:20 -0400 Subject: [PATCH 62/70] docs: Update intersphinx references for python 3.12 Intersphinx links are currently an unpinned dependency, so when a new version of python is released it's possible (although relatively rare) for it to break our links. 3.12 removed a few members of the ssl module. --- docs/releases/v3.0.1.rst | 2 +- docs/releases/v5.0.0.rst | 2 +- tornado/httpserver.py | 2 +- tornado/iostream.py | 6 +++--- tornado/netutil.py | 6 ++---- tornado/tcpserver.py | 2 +- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/docs/releases/v3.0.1.rst b/docs/releases/v3.0.1.rst index 4511838c85..4d289f5004 100644 --- a/docs/releases/v3.0.1.rst +++ b/docs/releases/v3.0.1.rst @@ -11,7 +11,7 @@ Apr 8, 2013 * The `tornado.testing.gen_test` decorator will no longer be recognized as a (broken) test by ``nose``. * Work around a bug in Ubuntu 13.04 betas involving an incomplete backport - of the `ssl.match_hostname` function. + of the ``ssl.match_hostname`` function. * `tornado.websocket.websocket_connect` now fails cleanly when it attempts to connect to a non-websocket url. * ``tornado.testing.LogTrapTestCase`` once again works with byte strings diff --git a/docs/releases/v5.0.0.rst b/docs/releases/v5.0.0.rst index dd0bd02439..950b2e1739 100644 --- a/docs/releases/v5.0.0.rst +++ b/docs/releases/v5.0.0.rst @@ -27,7 +27,7 @@ Backwards-compatibility notes longer supported. (The `ssl` module was updated in version 2.7.9, although in some distributions the updates are present in builds with a lower version number. Tornado requires `ssl.SSLContext`, - `ssl.create_default_context`, and `ssl.match_hostname`) + `ssl.create_default_context`, and ``ssl.match_hostname``) - Versions of Python 3.5 prior to 3.5.2 are no longer supported due to a change in the async iterator protocol in that version. - The ``trollius`` project (`asyncio` backported to Python 2) is no diff --git a/tornado/httpserver.py b/tornado/httpserver.py index 77dc541e9d..757f711b24 100644 --- a/tornado/httpserver.py +++ b/tornado/httpserver.py @@ -74,7 +74,7 @@ class HTTPServer(TCPServer, Configurable, httputil.HTTPServerConnectionDelegate) To make this server serve SSL traffic, send the ``ssl_options`` keyword argument with an `ssl.SSLContext` object. For compatibility with older versions of Python ``ssl_options`` may also be a dictionary of keyword - arguments for the `ssl.wrap_socket` method.:: + arguments for the `ssl.SSLContext.wrap_socket` method.:: ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) ssl_ctx.load_cert_chain(os.path.join(data_dir, "mydomain.crt"), diff --git a/tornado/iostream.py b/tornado/iostream.py index a408be59cd..0305370a8a 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -1217,7 +1217,7 @@ def start_tls( The ``ssl_options`` argument may be either an `ssl.SSLContext` object or a dictionary of keyword arguments for the - `ssl.wrap_socket` function. The ``server_hostname`` argument + `ssl.SSLContext.wrap_socket` function. The ``server_hostname`` argument will be used for certificate validation unless disabled in the ``ssl_options``. @@ -1322,7 +1322,7 @@ class SSLIOStream(IOStream): If the socket passed to the constructor is already connected, it should be wrapped with:: - ssl.wrap_socket(sock, do_handshake_on_connect=False, **kwargs) + ssl.SSLContext(...).wrap_socket(sock, do_handshake_on_connect=False, **kwargs) before constructing the `SSLIOStream`. Unconnected sockets will be wrapped when `IOStream.connect` is finished. @@ -1333,7 +1333,7 @@ class SSLIOStream(IOStream): def __init__(self, *args: Any, **kwargs: Any) -> None: """The ``ssl_options`` keyword argument may either be an `ssl.SSLContext` object or a dictionary of keywords arguments - for `ssl.wrap_socket` + for `ssl.SSLContext.wrap_socket` """ self._ssl_options = kwargs.pop("ssl_options", _client_ssl_defaults) super().__init__(*args, **kwargs) diff --git a/tornado/netutil.py b/tornado/netutil.py index 04db085abf..be7b55373a 100644 --- a/tornado/netutil.py +++ b/tornado/netutil.py @@ -594,7 +594,7 @@ def ssl_options_to_context( `~ssl.SSLContext` object. The ``ssl_options`` dictionary contains keywords to be passed to - `ssl.wrap_socket`. In Python 2.7.9+, `ssl.SSLContext` objects can + ``ssl.SSLContext.wrap_socket``. In Python 2.7.9+, `ssl.SSLContext` objects can be used instead. This function converts the dict form to its `~ssl.SSLContext` equivalent, and may be used when a component which accepts both forms needs to upgrade to the `~ssl.SSLContext` version @@ -652,9 +652,7 @@ def ssl_wrap_socket( ``ssl_options`` may be either an `ssl.SSLContext` object or a dictionary (as accepted by `ssl_options_to_context`). Additional - keyword arguments are passed to ``wrap_socket`` (either the - `~ssl.SSLContext` method or the `ssl` module function as - appropriate). + keyword arguments are passed to `ssl.SSLContext.wrap_socket`. .. versionchanged:: 6.2 diff --git a/tornado/tcpserver.py b/tornado/tcpserver.py index deab8f2ad9..02c0ca0cca 100644 --- a/tornado/tcpserver.py +++ b/tornado/tcpserver.py @@ -61,7 +61,7 @@ async def handle_stream(self, stream, address): To make this server serve SSL traffic, send the ``ssl_options`` keyword argument with an `ssl.SSLContext` object. For compatibility with older versions of Python ``ssl_options`` may also be a dictionary of keyword - arguments for the `ssl.wrap_socket` method.:: + arguments for the `ssl.SSLContext.wrap_socket` method.:: ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) ssl_ctx.load_cert_chain(os.path.join(data_dir, "mydomain.crt"), From 724f5d3a587ce56734dd88c701da9c2d68b87e4c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 13 Oct 2023 22:39:41 -0400 Subject: [PATCH 63/70] *: Lint on the newest version of python too. We previously only typechecked on the oldest version of python we supported, incorrectly assuming nothing we depended on would be removed. Now we typecheck on the latest version of python. Assume support for modern version of ssl and remove some pre-SNI code paths which rely on functions that are now removed. --- tornado/concurrent.py | 13 ++++++++++--- tornado/iostream.py | 33 +++------------------------------ tornado/netutil.py | 18 +++++++----------- tornado/testing.py | 5 +---- tox.ini | 6 ++++++ 5 files changed, 27 insertions(+), 48 deletions(-) diff --git a/tornado/concurrent.py b/tornado/concurrent.py index f0bbf62317..86bbd703c1 100644 --- a/tornado/concurrent.py +++ b/tornado/concurrent.py @@ -54,7 +54,7 @@ def is_future(x: Any) -> bool: class DummyExecutor(futures.Executor): - def submit( + def submit( # type: ignore[override] self, fn: Callable[..., _T], *args: Any, **kwargs: Any ) -> "futures.Future[_T]": future = futures.Future() # type: futures.Future[_T] @@ -64,8 +64,15 @@ def submit( future_set_exc_info(future, sys.exc_info()) return future - def shutdown(self, wait: bool = True) -> None: - pass + if sys.version_info >= (3, 9): + + def shutdown(self, wait: bool = True, cancel_futures: bool = False) -> None: + pass + + else: + + def shutdown(self, wait: bool = True) -> None: + pass dummy_executor = DummyExecutor() diff --git a/tornado/iostream.py b/tornado/iostream.py index 0305370a8a..bd001aeeb1 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -1411,9 +1411,9 @@ def _do_ssl_handshake(self) -> None: return self.close(exc_info=err) else: self._ssl_accepting = False - if not self._verify_cert(self.socket.getpeercert()): - self.close() - return + # Prior to the introduction of SNI, this is where we would check + # the server's claimed hostname. + assert ssl.HAS_SNI self._finish_ssl_connect() def _finish_ssl_connect(self) -> None: @@ -1422,33 +1422,6 @@ def _finish_ssl_connect(self) -> None: self._ssl_connect_future = None future_set_result_unless_cancelled(future, self) - def _verify_cert(self, peercert: Any) -> bool: - """Returns ``True`` if peercert is valid according to the configured - validation mode and hostname. - - The ssl handshake already tested the certificate for a valid - CA signature; the only thing that remains is to check - the hostname. - """ - if isinstance(self._ssl_options, dict): - verify_mode = self._ssl_options.get("cert_reqs", ssl.CERT_NONE) - elif isinstance(self._ssl_options, ssl.SSLContext): - verify_mode = self._ssl_options.verify_mode - assert verify_mode in (ssl.CERT_NONE, ssl.CERT_REQUIRED, ssl.CERT_OPTIONAL) - if verify_mode == ssl.CERT_NONE or self._server_hostname is None: - return True - cert = self.socket.getpeercert() - if cert is None and verify_mode == ssl.CERT_REQUIRED: - gen_log.warning("No SSL certificate given") - return False - try: - ssl.match_hostname(peercert, self._server_hostname) - except ssl.CertificateError as e: - gen_log.warning("Invalid SSL certificate: %s" % e) - return False - else: - return True - def _handle_read(self) -> None: if self._ssl_accepting: self._do_ssl_handshake() diff --git a/tornado/netutil.py b/tornado/netutil.py index be7b55373a..18c91e6743 100644 --- a/tornado/netutil.py +++ b/tornado/netutil.py @@ -662,14 +662,10 @@ def ssl_wrap_socket( context = ssl_options_to_context(ssl_options, server_side=server_side) if server_side is None: server_side = False - if ssl.HAS_SNI: - # In python 3.4, wrap_socket only accepts the server_hostname - # argument if HAS_SNI is true. - # TODO: add a unittest (python added server-side SNI support in 3.4) - # In the meantime it can be manually tested with - # python3 -m tornado.httpclient https://sni.velox.ch - return context.wrap_socket( - socket, server_hostname=server_hostname, server_side=server_side, **kwargs - ) - else: - return context.wrap_socket(socket, server_side=server_side, **kwargs) + assert ssl.HAS_SNI + # TODO: add a unittest for hostname validation (python added server-side SNI support in 3.4) + # In the meantime it can be manually tested with + # python3 -m tornado.httpclient https://sni.velox.ch + return context.wrap_socket( + socket, server_hostname=server_hostname, server_side=server_side, **kwargs + ) diff --git a/tornado/testing.py b/tornado/testing.py index 9bfadf45ed..da33335733 100644 --- a/tornado/testing.py +++ b/tornado/testing.py @@ -206,10 +206,7 @@ def tearDown(self) -> None: # this always happens in tests, so cancel any tasks that are # still pending by the time we get here. asyncio_loop = self.io_loop.asyncio_loop # type: ignore - if hasattr(asyncio, "all_tasks"): # py37 - tasks = asyncio.all_tasks(asyncio_loop) # type: ignore - else: - tasks = asyncio.Task.all_tasks(asyncio_loop) + tasks = asyncio.all_tasks(asyncio_loop) # Tasks that are done may still appear here and may contain # non-cancellation exceptions, so filter them out. tasks = [t for t in tasks if not t.done()] # type: ignore diff --git a/tox.ini b/tox.ini index 13b3bad076..7eaa5bd838 100644 --- a/tox.ini +++ b/tox.ini @@ -112,4 +112,10 @@ commands = # so we have to typecheck both. mypy --platform linux {posargs:tornado} mypy --platform windows {posargs:tornado} + # We mainly lint on the oldest version of Python we support, since + # we're more likely to catch problems of accidentally depending on + # something new than of depending on something old and deprecated. + # But sometimes something we depend on gets removed so we should also + # test the newest version. + mypy --platform linux --python-version 3.12 {posargs:tornado} changedir = {toxinidir} From dbbd42ebe4d554bdf2c36fad5790efe94188b5f2 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 2 Oct 2023 21:39:39 -0400 Subject: [PATCH 64/70] Revert "asyncio: Remove atexit hook" This reverts commit 62363740c1cc0e137ff4344c3afc3d52e070f200. We are again seeing hangs at shutdown in SyncHTTPClientTest.test_destructor_log. Maybe putting this back will help. --- tornado/platform/asyncio.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tornado/platform/asyncio.py b/tornado/platform/asyncio.py index 8d16aa6d15..79e60848b4 100644 --- a/tornado/platform/asyncio.py +++ b/tornado/platform/asyncio.py @@ -23,6 +23,7 @@ """ import asyncio +import atexit import concurrent.futures import errno import functools @@ -59,6 +60,31 @@ def fileno(self) -> int: _T = TypeVar("_T") +# Collection of selector thread event loops to shut down on exit. +_selector_loops: Set["SelectorThread"] = set() + + +def _atexit_callback() -> None: + for loop in _selector_loops: + with loop._select_cond: + loop._closing_selector = True + loop._select_cond.notify() + try: + loop._waker_w.send(b"a") + except BlockingIOError: + pass + if loop._thread is not None: + # If we don't join our (daemon) thread here, we may get a deadlock + # during interpreter shutdown. I don't really understand why. This + # deadlock happens every time in CI (both travis and appveyor) but + # I've never been able to reproduce locally. + loop._thread.join() + _selector_loops.clear() + + +atexit.register(_atexit_callback) + + class BaseAsyncIOLoop(IOLoop): def initialize( # type: ignore self, asyncio_loop: asyncio.AbstractEventLoop, **kwargs: Any @@ -453,6 +479,7 @@ async def thread_manager_anext() -> None: self._waker_r, self._waker_w = socket.socketpair() self._waker_r.setblocking(False) self._waker_w.setblocking(False) + _selector_loops.add(self) self.add_reader(self._waker_r, self._consume_waker) def close(self) -> None: @@ -464,6 +491,7 @@ def close(self) -> None: self._wake_selector() if self._thread is not None: self._thread.join() + _selector_loops.discard(self) self.remove_reader(self._waker_r) self._waker_r.close() self._waker_w.close() From 3340c3971f7663fb8a1a04c676f4a7cf734d1f0f Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 10 Oct 2023 20:39:25 -0400 Subject: [PATCH 65/70] test: Close the thread pool in run_on_executor test If this executor was left around it would be GC'd at an unpredictable time and would often be reported as a failure in other circlerefs tests. (For unknown reasons this would occur most often in i686 (i.e. 32-bit) linux builds). --- tornado/test/circlerefs_test.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py index 5c71858ff7..5c25adffd8 100644 --- a/tornado/test/circlerefs_test.py +++ b/tornado/test/circlerefs_test.py @@ -197,21 +197,21 @@ def test_run_on_executor(self): # and tornado.concurrent.chain_future. import concurrent.futures - thread_pool = concurrent.futures.ThreadPoolExecutor(1) + with concurrent.futures.ThreadPoolExecutor(1) as thread_pool: - class Factory(object): - executor = thread_pool + class Factory(object): + executor = thread_pool - @tornado.concurrent.run_on_executor - def run(self): - return None + @tornado.concurrent.run_on_executor + def run(self): + return None - factory = Factory() + factory = Factory() - async def main(): - # The cycle is not reported on the first call. It's not clear why. - for i in range(2): - await factory.run() + async def main(): + # The cycle is not reported on the first call. It's not clear why. + for i in range(2): + await factory.run() - with assert_no_cycle_garbage(): - asyncio.run(main()) + with assert_no_cycle_garbage(): + asyncio.run(main()) From dcc6e59c3e92d54cb6366187f6d69d2700f25688 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Oct 2023 01:28:06 +0000 Subject: [PATCH 66/70] build(deps): bump urllib3 from 1.26.17 to 1.26.18 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.17 to 1.26.18. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.17...1.26.18) --- updated-dependencies: - dependency-name: urllib3 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4934683dfc..ce09865efd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -114,7 +114,7 @@ types-pycurl==7.45.2.0 # via -r requirements.in typing-extensions==4.4.0 # via mypy -urllib3==1.26.17 +urllib3==1.26.18 # via requests virtualenv==20.23.0 # via tox From c60d80cbfc992b3cbaf303ae2e5f4f4c49fe028d Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 1 Nov 2023 21:40:54 -0400 Subject: [PATCH 67/70] web,demos: Remove more uses of deprecated datetime utc methods Add a simple test case to give us some basic coverage of this code path. Closes #3335 --- demos/blog/templates/feed.xml | 2 +- demos/s3server/s3server.py | 4 +++- tornado/test/web_test.py | 9 +++++++++ tornado/web.py | 3 ++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/demos/blog/templates/feed.xml b/demos/blog/templates/feed.xml index a98826c8d3..c63ef306a9 100644 --- a/demos/blog/templates/feed.xml +++ b/demos/blog/templates/feed.xml @@ -5,7 +5,7 @@ {% if len(entries) > 0 %} {{ max(e.updated for e in entries).strftime(date_format) }} {% else %} - {{ datetime.datetime.utcnow().strftime(date_format) }} + {{ datetime.datetime.now(datetime.timezone.utc).strftime(date_format) }} {% end %} http://{{ request.host }}/ diff --git a/demos/s3server/s3server.py b/demos/s3server/s3server.py index 5c5e6af2ba..b798c6b64b 100644 --- a/demos/s3server/s3server.py +++ b/demos/s3server/s3server.py @@ -138,7 +138,9 @@ def get(self): buckets.append( { "Name": name, - "CreationDate": datetime.datetime.utcfromtimestamp(info.st_ctime), + "CreationDate": datetime.datetime.fromtimestamp( + info.st_ctime, datetime.timezone.utc + ), } ) self.render_xml({"ListAllMyBucketsResult": {"Buckets": {"Bucket": buckets}}}) diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index c8dce68c80..fec66f39ac 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -1128,6 +1128,15 @@ def test_static_files(self): self.assertTrue(b"Disallow: /" in response.body) self.assertEqual(response.headers.get("Content-Type"), "text/plain") + def test_static_files_cacheable(self): + # Test that the version parameter triggers cache-control headers. This + # test is pretty weak but it gives us coverage of the code path which + # was important for detecting the deprecation of datetime.utcnow. + response = self.fetch("/robots.txt?v=12345") + self.assertTrue(b"Disallow: /" in response.body) + self.assertIn("Cache-Control", response.headers) + self.assertIn("Expires", response.headers) + def test_static_compressed_files(self): response = self.fetch("/static/sample.xml.gz") self.assertEqual(response.headers.get("Content-Type"), "application/gzip") diff --git a/tornado/web.py b/tornado/web.py index 333f736808..039396470f 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -2797,7 +2797,8 @@ def set_headers(self) -> None: if cache_time > 0: self.set_header( "Expires", - datetime.datetime.utcnow() + datetime.timedelta(seconds=cache_time), + datetime.datetime.now(datetime.timezone.utc) + + datetime.timedelta(seconds=cache_time), ) self.set_header("Cache-Control", "max-age=" + str(cache_time)) From 06e1a651823934710f23a138f4bbfb344ca002b0 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 13 Nov 2023 22:02:32 -0500 Subject: [PATCH 68/70] iostream_test: Test check_hostname functionality. In #3337, the removal of ssl.match_hostname revealed that we did not have any test coverage of hostname checking in tornado.iostream. Since we were forced to remove the manual check that we had in place for old versions of Python, we need a test to make sure that we didn't inadvertently break hostname checking. --- tornado/test/iostream_test.py | 75 +++++++++++++++++++++++++++++++++++ tornado/test/test.crt | 34 ++++++++-------- tornado/test/test.key | 52 ++++++++++++------------ tornado/testing.py | 4 +- 4 files changed, 120 insertions(+), 45 deletions(-) diff --git a/tornado/test/iostream_test.py b/tornado/test/iostream_test.py index e22e83e60f..8a28518001 100644 --- a/tornado/test/iostream_test.py +++ b/tornado/test/iostream_test.py @@ -1149,6 +1149,81 @@ def handle_stream(self, stream, address): yield handshake_future +class TestIOStreamCheckHostname(AsyncTestCase): + # This test ensures that hostname checks are working correctly after + # #3337 revealed that we have no test coverage in this area, and we + # removed a manual hostname check that was needed only for very old + # versions of python. + def setUp(self): + super().setUp() + self.listener, self.port = bind_unused_port() + + def accept_callback(connection, address): + ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + ssl_ctx.load_cert_chain( + os.path.join(os.path.dirname(__file__), "test.crt"), + os.path.join(os.path.dirname(__file__), "test.key"), + ) + connection = ssl_ctx.wrap_socket( + connection, + server_side=True, + do_handshake_on_connect=False, + ) + SSLIOStream(connection) + + netutil.add_accept_handler(self.listener, accept_callback) + + # Our self-signed cert is its own CA. We have to pass the CA check before + # the hostname check will be performed. + self.client_ssl_ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) + self.client_ssl_ctx.load_verify_locations( + os.path.join(os.path.dirname(__file__), "test.crt") + ) + + def tearDown(self): + self.io_loop.remove_handler(self.listener.fileno()) + self.listener.close() + super().tearDown() + + @gen_test + async def test_match(self): + stream = SSLIOStream(socket.socket(), ssl_options=self.client_ssl_ctx) + await stream.connect( + ("127.0.0.1", self.port), + server_hostname="foo.example.com", + ) + stream.close() + + @gen_test + async def test_no_match(self): + stream = SSLIOStream(socket.socket(), ssl_options=self.client_ssl_ctx) + with ExpectLog(gen_log, ".*alert bad certificate", level=logging.WARNING): + with self.assertRaises(ssl.SSLCertVerificationError): + with ExpectLog( + gen_log, + ".*(certificate verify failed: Hostname mismatch)", + level=logging.WARNING, + ): + await stream.connect( + ("127.0.0.1", self.port), + server_hostname="bar.example.com", + ) + # The server logs a warning while cleaning up the failed connection. + # Unfortunately there's no good hook to wait for this logging. + await asyncio.sleep(0.1) + + @gen_test + async def test_check_disabled(self): + # check_hostname can be set to false and the connection will succeed even though it doesn't + # have the right hostname. + self.client_ssl_ctx.check_hostname = False + stream = SSLIOStream(socket.socket(), ssl_options=self.client_ssl_ctx) + await stream.connect( + ("127.0.0.1", self.port), + server_hostname="bar.example.com", + ) + + @skipIfNonUnix class TestPipeIOStream(TestReadWriteMixin, AsyncTestCase): @gen.coroutine diff --git a/tornado/test/test.crt b/tornado/test/test.crt index ffc49b06c1..c7f19e3e62 100644 --- a/tornado/test/test.crt +++ b/tornado/test/test.crt @@ -1,20 +1,18 @@ -----BEGIN CERTIFICATE----- -MIIDWzCCAkOgAwIBAgIUV4spou0CenmvKqa7Hml/MC+JKiAwDQYJKoZIhvcNAQEL -BQAwPTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExGTAXBgNVBAoM -EFRvcm5hZG8gV2ViIFRlc3QwHhcNMTgwOTI5MTM1NjQ1WhcNMjgwOTI2MTM1NjQ1 -WjA9MQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEZMBcGA1UECgwQ -VG9ybmFkbyBXZWIgVGVzdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AKT0LdyI8tW5uwP3ahE8BFSz+j3SsKBDv/0cKvqxVVE6sLEST2s3HjArZvIIG5sb -iBkWDrqnZ6UKDvB4jlobLGAkepxDbrxHWxK53n0C28XXGLqJQ01TlTZ5rpjttMeg -5SKNjHbxpOvpUwwQS4br4WjZKKyTGiXpFkFUty+tYVU35/U2yyvreWHmzpHx/25t -H7O2RBARVwJYKOGPtlH62lQjpIWfVfklY4Ip8Hjl3B6rBxPyBULmVQw0qgoZn648 -oa4oLjs0wnYBz01gVjNMDHej52SsB/ieH7W1TxFMzqOlcvHh41uFbQJPgcXsruSS -9Z4twzSWkUp2vk/C//4Sz38CAwEAAaNTMFEwHQYDVR0OBBYEFLf8fQ5+u8sDWAd3 -r5ZjZ5MmDWJeMB8GA1UdIwQYMBaAFLf8fQ5+u8sDWAd3r5ZjZ5MmDWJeMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBADkkm3pIb9IeqVNmQ2uhQOgw -UwyToTYUHNTb/Nm5lzBTBqC8gbXAS24RQ30AB/7G115Uxeo+YMKfITxm/CgR+vhF -F59/YrzwXj+G8bdbuVl/UbB6f9RSp+Zo93rUZAtPWr77gxLUrcwSRzzDwxFjC2nC -6eigbkvt1OQY775RwnFAt7HKPclE0Out+cGJIboJuO1f3r57ZdyFH0GzbZEff/7K -atGXohijWJjYvU4mk0KFHORZrcBpsv9cfkFbmgVmiRwxRJ1tLauHM3Ne+VfqYE5M -4rTStSyz3ASqVKJ2iFMQueNR/tUOuDlfRt+0nhJMuYSSkW+KTgnwyOGU9cv+mxA= +MIIC1TCCAb2gAwIBAgIJAOV36k+idrqDMA0GCSqGSIb3DQEBCwUAMBoxGDAWBgNV +BAMMD2Zvby5leGFtcGxlLmNvbTAeFw0yMzExMTIwMTQ3MzhaFw0zMzExMDkwMTQ3 +MzhaMBoxGDAWBgNVBAMMD2Zvby5leGFtcGxlLmNvbTCCASIwDQYJKoZIhvcNAQEB +BQADggEPADCCAQoCggEBAKjfAL8hQ1G5yoR29D0NwqhL3EE9RShYLvzKvSNhOceR +e390XJLAi8PN8Xv8LkmoMITaLdRDtBwXcdw+kfHjcfXZ0cORJkxJFdk/38SsiBKV +ZzMO+1PVULfnQa92tHtahNsTGI5367WEALn9UNJLmP+jpX+3zohatUTbhlnRSruH +O/Mo5mLs1XJhQpdvp8BQNksJhiTQ7FsbcjGq6gZ75SnbfUR0PyohY0LTsrql00Tz +hCAEvm2TNiQ5s+PT5fFOg6Jh2ZGj1lYLQY3dDeqt9sdabvj7LANqfygbt2cf9yYn +a25UTRcAN7CNdWwTEfvnOVMITzCE8F2FmKDvJR+TX30CAwEAAaMeMBwwGgYDVR0R +BBMwEYIPZm9vLmV4YW1wbGUuY29tMA0GCSqGSIb3DQEBCwUAA4IBAQBjKz4gM4Bz +JO7Ny1fwbBtraHCGYnDG8gBID3+/sQlMMFeuquJK+oc+1DOpr9wFlmgih67OszdM +X2Xl/HjtHPKwNqaDHXu5bQPFT5fXzAZ8HHEOXSV9IpHaNyS7TC7bYmD/ClCZeqXU +h7MBe5yPXfCCIqWyjZMZDQfT1v6J+WX3+lO9josMJCfNR5DzvJiPmSTUxrLD5SkT ++7iKxhM6eI83D+I188sGc2IMinkFp8jSRTlaH8WYiOd5QQ2r8GSYNM9M3z1sK7zv +0Bw3hWEQgpFbEaSH0OB72KYkMUZBqK9UoeSZWBrMXHFBNaY23tEKInEwlBGBELGc +acSinK6OBC0z -----END CERTIFICATE----- diff --git a/tornado/test/test.key b/tornado/test/test.key index 7cb7d8d23c..8eea05db5f 100644 --- a/tornado/test/test.key +++ b/tornado/test/test.key @@ -1,28 +1,28 @@ -----BEGIN PRIVATE KEY----- -MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCk9C3ciPLVubsD -92oRPARUs/o90rCgQ7/9HCr6sVVROrCxEk9rNx4wK2byCBubG4gZFg66p2elCg7w -eI5aGyxgJHqcQ268R1sSud59AtvF1xi6iUNNU5U2ea6Y7bTHoOUijYx28aTr6VMM -EEuG6+Fo2Siskxol6RZBVLcvrWFVN+f1Nssr63lh5s6R8f9ubR+ztkQQEVcCWCjh -j7ZR+tpUI6SFn1X5JWOCKfB45dweqwcT8gVC5lUMNKoKGZ+uPKGuKC47NMJ2Ac9N -YFYzTAx3o+dkrAf4nh+1tU8RTM6jpXLx4eNbhW0CT4HF7K7kkvWeLcM0lpFKdr5P -wv/+Es9/AgMBAAECggEABi6AaXtYXloPgB6NgwfUwbfc8OQsalUfpMShd7OdluW0 -KW6eO05de0ClIvzay/1EJGyHMMeFQtIVrT1XWFkcWJ4FWkXMqJGkABenFtg8lDVz -X8o1E3jGZrw4ptKBq9mDvL/BO9PiclTUH+ecbPn6AIvi0lTQ7grGIryiAM9mjmLy -jpCwoutF2LD4RPNg8vqWe/Z1rQw5lp8FOHhRwPooHHeoq1bSrp8dqvVAwAam7Mmf -uFgI8jrNycPgr2cwEEtbq2TQ625MhVnCpwT+kErmAStfbXXuqv1X1ZZgiNxf+61C -OL0bhPRVIHmmjiK/5qHRuN4Q5u9/Yp2SJ4W5xadSQQKBgQDR7dnOlYYQiaoPJeD/ -7jcLVJbWwbr7bE19O/QpYAtkA/FtGlKr+hQxPhK6OYp+in8eHf+ga/NSAjCWRBoh -MNAVCJtiirHo2tFsLFOmlJpGL9n3sX8UnkJN90oHfWrzJ8BZnXaSw2eOuyw8LLj+ -Q+ISl6Go8/xfsuy3EDv4AP1wCwKBgQDJJ4vEV3Kr+bc6N/xeu+G0oHvRAWwuQpcx -9D+XpnqbJbFDnWKNE7oGsDCs8Qjr0CdFUN1pm1ppITDZ5N1cWuDg/47ZAXqEK6D1 -z13S7O0oQPlnsPL7mHs2Vl73muAaBPAojFvceHHfccr7Z94BXqKsiyfaWz6kclT/ -Nl4JTdsC3QKBgQCeYgozL2J/da2lUhnIXcyPstk+29kbueFYu/QBh2HwqnzqqLJ4 -5+t2H3P3plQUFp/DdDSZrvhcBiTsKiNgqThEtkKtfSCvIvBf4a2W/4TJsW6MzxCm -2KQDuK/UqM4Y+APKWN/N6Lln2VWNbNyBkWuuRVKFatccyJyJnSjxeqW7cwKBgGyN -idCYPIrwROAHLItXKvOWE5t0ABRq3TsZC2RkdA/b5HCPs4pclexcEriRjvXrK/Yt -MH94Ve8b+UftSUQ4ytjBMS6MrLg87y0YDhLwxv8NKUq65DXAUOW+8JsAmmWQOqY3 -MK+m1BT4TMklgVoN3w3sPsKIsSJ/jLz5cv/kYweFAoGAG4iWU1378tI2Ts/Fngsv -7eoWhoda77Y9D0Yoy20aN9VdMHzIYCBOubtRPEuwgaReNwbUBWap01J63yY/fF3K -8PTz6covjoOJqxQJOvM7nM0CsJawG9ccw3YXyd9KgRIdSt6ooEhb7N8W2EXYoKl3 -g1i2t41Q/SC3HUGC5mJjpO8= +MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCo3wC/IUNRucqE +dvQ9DcKoS9xBPUUoWC78yr0jYTnHkXt/dFySwIvDzfF7/C5JqDCE2i3UQ7QcF3Hc +PpHx43H12dHDkSZMSRXZP9/ErIgSlWczDvtT1VC350GvdrR7WoTbExiOd+u1hAC5 +/VDSS5j/o6V/t86IWrVE24ZZ0Uq7hzvzKOZi7NVyYUKXb6fAUDZLCYYk0OxbG3Ix +quoGe+Up231EdD8qIWNC07K6pdNE84QgBL5tkzYkObPj0+XxToOiYdmRo9ZWC0GN +3Q3qrfbHWm74+ywDan8oG7dnH/cmJ2tuVE0XADewjXVsExH75zlTCE8whPBdhZig +7yUfk199AgMBAAECggEBAIGFmXL/Nj0GvVfgTPBPD5A5rxOyxMpu6IsnjO4H8mMp +KInXW/GLESf7W053W6FPCPe8yA3YZ9pr+P6uVw4qHwwsJwFS4Qb9v25D2YNluXBX +ezHkOcxQ/novO2gzKba69M961Ajh3b35Iv2EV2sUZKMehx9wgU6AFCxeG6vkJOez +UCX0WG467cdo4alfe/oQZLioU3t+GGCb23m13B9xaN2tqONNh2E2yp73MVJ1Q74R +HVBkQxciHd3iJee5/4AGUJl9TLv8wAT1cf3OhcGlvOlcfSYtuNUY32TPWit1Or1y +i9fPkjo8SBw52TN5RRmjIlpNMxeK+G4+XtO1Y47TlZkCgYEA3Y+NK8mz9kTnorkf +R7CSOAaiste8c3CJbbbaTk7oTxK4MISL1RX+sFK65cnV54Wo75hCZxsjV2oEQ+4r +UOGw1JxcV16V6iP/AaQS41xsxZca/xnC//YojBN6kP+OV+/ByF4HNs5eDN6uHg0y +OOfBWi6oc449CFFMxVnrQ0SymaMCgYEAwx7M9xQ1eth902Wq2VNszFIgdgptGYhj +XbWsAFYKII+aUXC92sVL5sO33vNyhBbyMN1aSRXYe8B5EnwsP31o5csrHQw6i/Dp +jUx1AUBYkNPgL9ctqlTQf1nb0LenGlCUBD6jrSrJVHeOF4y+HIZHXNZ++otH7+eu +b3dbHgV/9F8CgYBTopO0utAvH3WdDGqNYk7fzUlvX1ao8Qs/mi2wL8MrzjIvRmmO +h137607X3Sfc3KyXvQ8b4relkMSJbAd34aohp+CHrpHCr9HcKbZjkwkQUWkEcRIW +EzLdJaE3yPBPq5an7y6j9qS0EP8DIxIZPwrS4xf9fuz1DdOAD+BqJS2SJwKBgQCt +zZzTpduxbnA+Qrx503cBVVJ28viVmsiwK2hn8DwbHu9eBegHnGDs0H/Th9UE1g+r ++TA4E85/BUaTcapUb5hlwKDJwh/QkaroYyeCEtgRQbnbw3d41w3Vsqw78atWpFoE +oetYD9nAdLJMReD+NZoRlzsKX9CXYS8fORkf19RPTwKBgQCQdvDMicrtnJ4U2MOA +y+59K7V77VRfHLecjAMGblBGmrtoQSBvQiFznXm0DUOSy3eZRITwe01/t+RUDhx9 +MVLlyNzwRCVHzPe7kUI10GM4W5ZKAf8f/t0KjBrQBeYtRUOEI3QVzsVzc1hY8Fv8 +YHOhmI8Tdd2biF+lqXKC6vGlvQ== -----END PRIVATE KEY----- diff --git a/tornado/testing.py b/tornado/testing.py index da33335733..bdbff87bc3 100644 --- a/tornado/testing.py +++ b/tornado/testing.py @@ -517,7 +517,9 @@ def get_ssl_options(self) -> Dict[str, Any]: def default_ssl_options() -> Dict[str, Any]: # Testing keys were generated with: # openssl req -new -keyout tornado/test/test.key \ - # -out tornado/test/test.crt -nodes -days 3650 -x509 + # -out tornado/test/test.crt \ + # -nodes -days 3650 -x509 \ + # -subj "/CN=foo.example.com" -addext "subjectAltName = DNS:foo.example.com" module_dir = os.path.dirname(__file__) return dict( certfile=os.path.join(module_dir, "test", "test.crt"), From 2da0a9912bc5207e2ac8207b40035377de3e1cd5 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 13 Nov 2023 22:11:59 -0500 Subject: [PATCH 69/70] iostream_test: Don't require server-side log on windows --- tornado/test/iostream_test.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tornado/test/iostream_test.py b/tornado/test/iostream_test.py index 8a28518001..02fcd3e13f 100644 --- a/tornado/test/iostream_test.py +++ b/tornado/test/iostream_test.py @@ -1197,7 +1197,12 @@ async def test_match(self): @gen_test async def test_no_match(self): stream = SSLIOStream(socket.socket(), ssl_options=self.client_ssl_ctx) - with ExpectLog(gen_log, ".*alert bad certificate", level=logging.WARNING): + with ExpectLog( + gen_log, + ".*alert bad certificate", + level=logging.WARNING, + required=platform.system() != "Windows", + ): with self.assertRaises(ssl.SSLCertVerificationError): with ExpectLog( gen_log, @@ -1210,7 +1215,9 @@ async def test_no_match(self): ) # The server logs a warning while cleaning up the failed connection. # Unfortunately there's no good hook to wait for this logging. - await asyncio.sleep(0.1) + # It doesn't seem to happen on windows; I'm not sure why. + if platform.system() != "Windows": + await asyncio.sleep(0.1) @gen_test async def test_check_disabled(self): From 451419c1c7c431d4362fd6d6ed36227a1d52f32c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 28 Nov 2023 21:55:46 -0500 Subject: [PATCH 70/70] Set version to 6.4 final --- docs/releases/v6.4.0.rst | 4 ++-- tornado/__init__.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/releases/v6.4.0.rst b/docs/releases/v6.4.0.rst index 62cd4ac7e4..d1e099a2df 100644 --- a/docs/releases/v6.4.0.rst +++ b/docs/releases/v6.4.0.rst @@ -1,8 +1,8 @@ What's new in Tornado 6.4.0 =========================== -In Progress ------------ +Nov 28, 2023 +------------ General Changes ~~~~~~~~~~~~~~~ diff --git a/tornado/__init__.py b/tornado/__init__.py index ae39bb59b2..a0ae714d3e 100644 --- a/tornado/__init__.py +++ b/tornado/__init__.py @@ -22,8 +22,8 @@ # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "6.4b1" -version_info = (6, 4, 0, -99) +version = "6.4" +version_info = (6, 4, 0, 0) import importlib import typing