Test fork support, fix some fork problems and improve test file

python · miss-islington · Aug 30, 2022 · Aug 7, 2022 · Aug 20, 2022 · Aug 20, 2022
commit 264bed72d5582880731fb7c69ca37c00f0b49985
diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py
@@ -13,31 +13,134 @@
     raise unittest.SkipTest("test module requires subprocess")
 
 
+def supports_trampoline_profiling():
+    perf_trampoline = sysconfig.get_config_var("PERF_TRAMPOLINE_SUPPORT")
+    if not perf_trampoline:
+        return False
+    return int(perf_trampoline) == 1
+
+
+if not supports_trampoline_profiling():
+    raise unittest.SkipTest("perf trampoline profiling not supported")
+
+
+class TestPerfTrampoline(unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        self.perf_files = set(pathlib.Path("/tmp/").glob("perf-*.map"))
+
+    def tearDown(self) -> None:
+        super().tearDown()
+        files_to_delete = (
+            set(pathlib.Path("/tmp/").glob("perf-*.map")) - self.perf_files
+        )
+        for file in files_to_delete:
+            file.unlink()
+
+    def test_trampoline_works(self):
+        code = """if 1:
+                def foo():
+                    pass
+
+                def bar():
+                    foo()
+
+                def baz():
+                    bar()
+
+                baz()
+                """
+        with subprocess.Popen(
+            [sys.executable, "-Xperf", "-c", code],
+            universal_newlines=True,
+            stderr=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+        ) as process:
+            stdout, stderr = process.communicate()
+
+        self.assertEqual(process.returncode, 0)
+        self.assertEqual(stderr, "")
+        self.assertEqual(stdout, "")
+        perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
+        self.assertTrue(perf_file.exists())
+
+    def test_trampoline_works_with_forks(self):
+        code = """if 1:
+                import os, sys
+
+                def foo_fork():
+                    pass
+
+                def bar_fork():
+                    foo_fork()
+
+                def baz_fork():
+                    bar_fork()
+
+                def foo():
+                    pid = os.fork()
+                    if pid == 0:
+                        print(os.getpid())
+                        baz_fork()
+                    else:
+                        _, status = os.waitpid(-1, 0)
+                        sys.exit(status)
+
+                def bar():
+                    foo()
+
+                def baz():
+                    bar()
+
+                baz()
+                """
+        with temp_dir() as script_dir:
+            script = make_script(script_dir, "perftest", code)
+            with subprocess.Popen(
+                [sys.executable, "-Xperf", script],
+                universal_newlines=True,
+                stderr=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+            ) as process:
+                stdout, stderr = process.communicate()
+
+        self.assertEqual(process.returncode, 0)
+        self.assertEqual(stderr, "")
+        child_pid = int(stdout.strip())
+        perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
+        perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map")
+        self.assertTrue(perf_file.exists())
+        self.assertTrue(perf_child_file.exists())
+
+        perf_file_contents = perf_file.read_text()
+        self.assertIn(f"py::foo:{script}", perf_file_contents)
+        self.assertIn(f"py::bar:{script}", perf_file_contents)
+        self.assertIn(f"py::baz:{script}", perf_file_contents)
+
+        child_perf_file_contents = perf_child_file.read_text()
+        self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents)
+        self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)
+        self.assertIn(f"py::baz_fork:{script}", child_perf_file_contents)
+
+
 def is_unwinding_reliable():
     cflags = sysconfig.get_config_var("PY_CORE_CFLAGS")
     if not cflags:
         return False
     return "no-omit-frame-pointer" in cflags
 
 
-if not is_unwinding_reliable():
-    raise unittest.SkipTest("Unwinding without frame pointer is unreliable")
-
-if support.check_sanitizer(address=True, memory=True, ub=True):
-    raise unittest.SkipTest("Perf unwinding doesn't work with sanitizers")
-
-
-def check_perf_command():
+def perf_command_works():
     try:
         cmd = ["perf", "--help"]
         stdout = subprocess.check_output(cmd, universal_newlines=True)
     except (subprocess.SubprocessError, OSError):
-        raise unittest.SkipTest("Couldn't find perf on the path")
+        return False
 
     # perf version does not return a version number on Fedora. Use presence
     # of "perf.data" in help as indicator that it's perf from Linux tools.
     if "perf.data" not in stdout:
-        raise unittest.SkipTest("perf command does not look like Linux tool perf")
+        return False
 
     # Check that we can run a simple perf run
     with temp_dir() as script_dir:
@@ -59,13 +162,12 @@ def check_perf_command():
                 cmd, cwd=script_dir, universal_newlines=True, stderr=subprocess.STDOUT
             )
         except (subprocess.SubprocessError, OSError):
-            raise unittest.SkipTest("Couldn't run perf on simple script")
+            return False
 
         if "hello" not in stdout:
-            raise unittest.SkipTest("perf run did not work correctly")
-
+            return False
 
-check_perf_command()
+    return True
 
 
 def run_perf(cwd, *args, **env_vars):
@@ -76,39 +178,45 @@ def run_perf(cwd, *args, **env_vars):
         env = None
     output_file = cwd + "/perf_output.perf"
     base_cmd = ("perf", "record", "-g", "--call-graph=fp", "-o", output_file, "--")
-    prev_perf_files = set(pathlib.Path("/tmp/").glob("perf-*.map"))
-    try:
-        proc = subprocess.run(
-            base_cmd + args,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            env=env,
-        )
-        if proc.returncode:
-            print(proc.stderr)
-            raise ValueError(f"Perf failed with return code {proc.returncode}")
+    proc = subprocess.run(
+        base_cmd + args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        env=env,
+    )
+    if proc.returncode:
+        print(proc.stderr)
+        raise ValueError(f"Perf failed with return code {proc.returncode}")
 
-        base_cmd = ("perf", "script")
-        proc = subprocess.run(
-            ("perf", "script", "-i", output_file),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            env=env,
-            check=True,
-        )
-        return proc.stdout.decode("utf-8", "replace"), proc.stderr.decode(
-            "utf-8", "replace"
-        )
-    finally:
-        # Clean up the perf map file at the end
+    base_cmd = ("perf", "script")
+    proc = subprocess.run(
+        ("perf", "script", "-i", output_file),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        env=env,
+        check=True,
+    )
+    return proc.stdout.decode("utf-8", "replace"), proc.stderr.decode(
+        "utf-8", "replace"
+    )
+
+
+@unittest.skipUnless(perf_command_works(), "perf command doesn't work")
+@unittest.skipUnless(is_unwinding_reliable(), "Unwinding is unreliable")
+@support.skip_if_sanitizer(address=True, memory=True, ub=True)
+class TestPerfProfiler(unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        self.perf_files = set(pathlib.Path("/tmp/").glob("perf-*.map"))
+
+    def tearDown(self) -> None:
+        super().tearDown()
         files_to_delete = (
-            set(pathlib.Path("/tmp/").glob("perf-*.map")) - prev_perf_files
+            set(pathlib.Path("/tmp/").glob("perf-*.map")) - self.perf_files
         )
         for file in files_to_delete:
             file.unlink()
 
-
-class TestPerfProfiler(unittest.TestCase):
     def test_python_calls_appear_in_the_stack_if_perf_activated(self):
         with temp_dir() as script_dir:
             code = """if 1:

@@ -54,6 +54,7 @@ DTRACE=         @DTRACE@
 DFLAGS=         @DFLAGS@
 DTRACE_HEADERS= @DTRACE_HEADERS@
 DTRACE_OBJS=    @DTRACE_OBJS@
+PERF_TRAMPOLINE_SUPPORT= @PERF_TRAMPOLINE_SUPPORT@
 
 GNULD=		@GNULD@
 

diff --git a/Objects/perf_trampoline.c b/Objects/perf_trampoline.c
@@ -362,16 +362,11 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
     }
     PyCodeObject *co = frame->f_code;
     py_trampoline f = NULL;
-    int ret = -1;
-    if (extra_code_index != -1) {
-        ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
-    }
+    assert(extra_code_index != -1);
+    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
     if (ret != 0 || f == NULL) {
         // This is the first time we see this code object so we need
         // to compile a trampoline for it.
-        if (extra_code_index == -1) {
-            extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
-        }
         py_trampoline new_trampoline = compile_trampoline();
         if (new_trampoline == NULL) {
             goto default_eval;
@@ -445,6 +440,10 @@ _PyPerfTrampoline_Init(int activate)
             }
             trampoline_api.state = state;
         }
+        extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
+        if (extra_code_index == -1) {
+            return -1;
+        }
         perf_status = PERF_STATUS_OK;
     }
 #endif
@@ -455,6 +454,10 @@ int
 _PyPerfTrampoline_Fini(void)
 {
 #ifdef _PY_HAVE_PERF_TRAMPOLINE
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (tstate->interp->eval_frame == py_trampoline_evaluator) {
+        tstate->interp->eval_frame = NULL;
+    }
     free_code_arenas();
     if (trampoline_api.state != NULL) {
         trampoline_api.free_state(trampoline_api.state);

@@ -3435,9 +3435,12 @@ AS_CASE([$PLATFORM_TRIPLET],
 )
 AC_MSG_RESULT([$perf_trampoline])
 
+AC_SUBST(PERF_TRAMPOLINE_SUPPORT)
+PERF_TRAMPOLINE_SUPPORT=
 AS_VAR_IF([perf_trampoline], [yes], [
   AC_DEFINE([_PY_HAVE_PERF_TRAMPOLINE], [1], [Define to 1 if you have the perf trampoline.])
   PERF_TRAMPOLINE_OBJ=Objects/asm_trampoline.o
+  PERF_TRAMPOLINE_SUPPORT=1
 
   dnl perf needs frame pointers for unwinding, include compiler option in debug builds
   AS_VAR_IF([Py_DEBUG], [true], [