Skip to content

Commit 06567e1

Browse files
authored
[Impeller] Test FenceWaiterVK and fix termination bugs (flutter#45870)
Fixes flutter/flutter#134751, and @jonahwilliams suspects it could be related to a number of other flaky/texture leak scenarios (flutter/flutter#133506 (comment)) that only happen sometimes (i.e. on CI but not real devices), i.e. stuff like: ```txt --- Vulkan Debug Report  ---------------------------------------- |                Severity: Error |                    Type: { Validation } |                 ID Name: VUID-vkDestroyBuffer-buffer-00922 |               ID Number: -464217071 |       Queue Breadcrumbs: [NONE] |  CMD Buffer Breadcrumbs: [NONE] |         Related Objects: Device [94498356231456] [ImpellerDevice] |                 Trigger: Validation Error: [ VUID-vkDestroyBuffer-buffer-00922 ] Object 0: handle = 0x55f21cf47d20, name = ImpellerDevice, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0xe4549c11 | Cannot call vkDestroyBuffer on VkBuffer 0xbb00000000bb[] that is currently in use by a command buffer. The Vulkan spec states: All submitted commands that refer to buffer, either directly or via a VkBufferView, must have completed execution (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkDestroyBuffer-buffer-00922) ----------------------------------------------------------------- ``` --- ~~This PR will look a bit like a mess until the last 2 PRs merge in, but locally it appears to fix fence races/segfaults that I was seeing on CI, including on Linux with validations enabled. We can test it further tomorrow.~~ EDIT: Updated.
1 parent f4d5e45 commit 06567e1

File tree

9 files changed

+305
-77
lines changed

9 files changed

+305
-77
lines changed

ci/licenses_golden/excluded_files

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@
154154
../../../flutter/impeller/renderer/backend/vulkan/blit_command_vk_unittests.cc
155155
../../../flutter/impeller/renderer/backend/vulkan/command_encoder_vk_unittests.cc
156156
../../../flutter/impeller/renderer/backend/vulkan/context_vk_unittests.cc
157+
../../../flutter/impeller/renderer/backend/vulkan/fence_waiter_vk_unittests.cc
157158
../../../flutter/impeller/renderer/backend/vulkan/pass_bindings_cache_unittests.cc
158159
../../../flutter/impeller/renderer/backend/vulkan/resource_manager_vk_unittests.cc
159160
../../../flutter/impeller/renderer/backend/vulkan/test

impeller/renderer/backend/vulkan/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ impeller_component("vulkan_unittests") {
1111
"blit_command_vk_unittests.cc",
1212
"command_encoder_vk_unittests.cc",
1313
"context_vk_unittests.cc",
14+
"fence_waiter_vk_unittests.cc",
1415
"pass_bindings_cache_unittests.cc",
1516
"resource_manager_vk_unittests.cc",
1617
"test/mock_vulkan.cc",

impeller/renderer/backend/vulkan/context_vk.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,6 @@ void ContextVK::Setup(Settings settings) {
377377
///
378378
auto fence_waiter =
379379
std::shared_ptr<FenceWaiterVK>(new FenceWaiterVK(device_holder));
380-
if (!fence_waiter->IsValid()) {
381-
VALIDATION_LOG << "Could not create fence waiter.";
382-
return;
383-
}
384380

385381
//----------------------------------------------------------------------------
386382
/// Create the resource manager.

impeller/renderer/backend/vulkan/fence_waiter_vk.cc

Lines changed: 95 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <algorithm>
88
#include <chrono>
9+
#include <utility>
910

1011
#include "flutter/fml/thread.h"
1112
#include "flutter/fml/trace_event.h"
@@ -47,28 +48,25 @@ class WaitSetEntry {
4748
FenceWaiterVK::FenceWaiterVK(std::weak_ptr<DeviceHolder> device_holder)
4849
: device_holder_(std::move(device_holder)) {
4950
waiter_thread_ = std::make_unique<std::thread>([&]() { Main(); });
50-
is_valid_ = true;
5151
}
5252

5353
FenceWaiterVK::~FenceWaiterVK() {
5454
Terminate();
55-
if (waiter_thread_) {
56-
waiter_thread_->join();
57-
}
58-
}
59-
60-
bool FenceWaiterVK::IsValid() const {
61-
return is_valid_;
55+
waiter_thread_->join();
6256
}
6357

6458
bool FenceWaiterVK::AddFence(vk::UniqueFence fence,
6559
const fml::closure& callback) {
6660
TRACE_EVENT0("flutter", "FenceWaiterVK::AddFence");
67-
if (!IsValid() || !fence || !callback) {
61+
if (!fence || !callback) {
6862
return false;
6963
}
7064
{
65+
// Maintain the invariant that terminate_ is accessed only under the lock.
7166
std::scoped_lock lock(wait_set_mutex_);
67+
if (terminate_) {
68+
return false;
69+
}
7270
wait_set_.emplace_back(WaitSetEntry::Create(std::move(fence), callback));
7371
}
7472
wait_set_cv_.notify_one();
@@ -92,85 +90,112 @@ void FenceWaiterVK::Main() {
9290
using namespace std::literals::chrono_literals;
9391

9492
while (true) {
95-
std::unique_lock lock(wait_set_mutex_);
96-
97-
// If there are no fences to wait on, wait on the condition variable.
98-
wait_set_cv_.wait(lock, [&]() { return !wait_set_.empty() || terminate_; });
93+
// We'll read the terminate_ flag within the lock below.
94+
bool terminate = false;
9995

100-
// We don't want to check on fence status or collect wait set entries in the
101-
// critical section. Copy the array of entries and immediately unlock the
102-
// mutex.
103-
WaitSet wait_set = wait_set_;
96+
{
97+
std::unique_lock lock(wait_set_mutex_);
10498

105-
const auto terminate = terminate_;
99+
// If there are no fences to wait on, wait on the condition variable.
100+
wait_set_cv_.wait(lock,
101+
[&]() { return !wait_set_.empty() || terminate_; });
106102

107-
lock.unlock();
103+
// Still under the lock, check if the waiter has been terminated.
104+
terminate = terminate_;
105+
}
108106

109107
if (terminate) {
108+
WaitUntilEmpty();
110109
break;
111110
}
112111

113-
// Check if the context had died in the meantime.
114-
auto device_holder = device_holder_.lock();
115-
if (!device_holder) {
112+
if (!Wait()) {
116113
break;
117114
}
115+
}
116+
}
118117

119-
const auto& device = device_holder->GetDevice();
118+
void FenceWaiterVK::WaitUntilEmpty() {
119+
// Note, there is no lock because once terminate_ is set to true, no other
120+
// fence can be added to the wait set. Just in case, here's a FML_DCHECK:
121+
FML_DCHECK(terminate_) << "Fence waiter must be terminated.";
122+
while (!wait_set_.empty() && Wait()) {
123+
// Intentionally empty.
124+
}
125+
}
120126

121-
// Wait for one or more fences to be signaled. Any additional fences added
122-
// to the waiter will be serviced in the next pass. If a fence that is going
123-
// to be signaled at an abnormally long deadline is the only one in the set,
124-
// a timeout will bail out the wait.
125-
auto fences = GetFencesForWaitSet(wait_set);
126-
if (fences.empty()) {
127-
continue;
128-
}
127+
bool FenceWaiterVK::Wait() {
128+
// Snapshot the wait set and wait on the fences.
129+
WaitSet wait_set;
130+
{
131+
std::scoped_lock lock(wait_set_mutex_);
132+
wait_set = wait_set_;
133+
}
129134

130-
auto result = device.waitForFences(
131-
fences.size(), // fences count
132-
fences.data(), // fences
133-
false, // wait for all
134-
std::chrono::nanoseconds{100ms}.count() // timeout (ns)
135-
);
136-
if (!(result == vk::Result::eSuccess || result == vk::Result::eTimeout)) {
137-
VALIDATION_LOG << "Fence waiter encountered an unexpected error. Tearing "
138-
"down the waiter thread.";
139-
break;
140-
}
135+
using namespace std::literals::chrono_literals;
141136

142-
// One or more fences have been signaled. Find out which ones and update
143-
// their signaled statuses.
144-
{
145-
TRACE_EVENT0("impeller", "CheckFenceStatus");
146-
for (auto& entry : wait_set) {
147-
entry->UpdateSignalledStatus(device);
148-
}
149-
wait_set.clear();
150-
}
137+
// Check if the context had died in the meantime.
138+
auto device_holder = device_holder_.lock();
139+
if (!device_holder) {
140+
return false;
141+
}
151142

152-
// Quickly acquire the wait set lock and erase signaled entries. Make sure
153-
// the mutex is unlocked before calling the destructors of the erased
154-
// entries. These might touch allocators.
155-
WaitSet erased_entries;
156-
{
157-
static auto is_signalled = [](const auto& entry) {
158-
return entry->IsSignalled();
159-
};
160-
std::scoped_lock lock(wait_set_mutex_);
161-
std::copy_if(wait_set_.begin(), wait_set_.end(),
162-
std::back_inserter(erased_entries), is_signalled);
163-
wait_set_.erase(
164-
std::remove_if(wait_set_.begin(), wait_set_.end(), is_signalled),
165-
wait_set_.end());
166-
}
143+
const auto& device = device_holder->GetDevice();
144+
// Wait for one or more fences to be signaled. Any additional fences added
145+
// to the waiter will be serviced in the next pass. If a fence that is going
146+
// to be signaled at an abnormally long deadline is the only one in the set,
147+
// a timeout will bail out the wait.
148+
auto fences = GetFencesForWaitSet(wait_set);
149+
if (fences.empty()) {
150+
return true;
151+
}
167152

168-
{
169-
TRACE_EVENT0("impeller", "ClearSignaledFences");
170-
// Erase the erased entries which will invoke callbacks.
171-
erased_entries.clear(); // Bit redundant because of scope but hey.
153+
auto result = device.waitForFences(
154+
/*fenceCount=*/fences.size(),
155+
/*pFences=*/fences.data(),
156+
/*waitAll=*/false,
157+
/*timeout=*/std::chrono::nanoseconds{100ms}.count());
158+
if (!(result == vk::Result::eSuccess || result == vk::Result::eTimeout)) {
159+
VALIDATION_LOG << "Fence waiter encountered an unexpected error. Tearing "
160+
"down the waiter thread.";
161+
return false;
162+
}
163+
164+
// One or more fences have been signaled. Find out which ones and update
165+
// their signaled statuses.
166+
{
167+
TRACE_EVENT0("impeller", "CheckFenceStatus");
168+
for (auto& entry : wait_set) {
169+
entry->UpdateSignalledStatus(device);
172170
}
171+
wait_set.clear();
173172
}
173+
174+
// Quickly acquire the wait set lock and erase signaled entries. Make sure
175+
// the mutex is unlocked before calling the destructors of the erased
176+
// entries. These might touch allocators.
177+
WaitSet erased_entries;
178+
{
179+
static constexpr auto is_signalled = [](const auto& entry) {
180+
return entry->IsSignalled();
181+
};
182+
std::scoped_lock lock(wait_set_mutex_);
183+
184+
// TODO(matanlurey): Iterate the list 1x by copying is_signaled into erased.
185+
std::copy_if(wait_set_.begin(), wait_set_.end(),
186+
std::back_inserter(erased_entries), is_signalled);
187+
wait_set_.erase(
188+
std::remove_if(wait_set_.begin(), wait_set_.end(), is_signalled),
189+
wait_set_.end());
190+
}
191+
192+
{
193+
TRACE_EVENT0("impeller", "ClearSignaledFences");
194+
// Erase the erased entries which will invoke callbacks.
195+
erased_entries.clear(); // Bit redundant because of scope but hey.
196+
}
197+
198+
return true;
174199
}
175200

176201
void FenceWaiterVK::Terminate() {

impeller/renderer/backend/vulkan/fence_waiter_vk.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,14 @@ class FenceWaiterVK {
4242
std::condition_variable wait_set_cv_;
4343
WaitSet wait_set_;
4444
bool terminate_ = false;
45-
bool is_valid_ = false;
4645

4746
explicit FenceWaiterVK(std::weak_ptr<DeviceHolder> device_holder);
4847

4948
void Main();
5049

50+
bool Wait();
51+
void WaitUntilEmpty();
52+
5153
FML_DISALLOW_COPY_AND_ASSIGN(FenceWaiterVK);
5254
};
5355

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2013 The Flutter Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
#include "fml/synchronization/waitable_event.h"
6+
#include "gtest/gtest.h" // IWYU pragma: keep
7+
#include "impeller/renderer/backend/vulkan/fence_waiter_vk.h" // IWYU pragma: keep
8+
#include "impeller/renderer/backend/vulkan/test/mock_vulkan.h"
9+
10+
namespace impeller {
11+
namespace testing {
12+
13+
TEST(FenceWaiterVKTest, IgnoresNullFence) {
14+
auto const context = MockVulkanContextBuilder().Build();
15+
auto const waiter = context->GetFenceWaiter();
16+
EXPECT_FALSE(waiter->AddFence(vk::UniqueFence(), []() {}));
17+
}
18+
19+
TEST(FenceWaiterVKTest, IgnoresNullCallback) {
20+
auto const context = MockVulkanContextBuilder().Build();
21+
auto const device = context->GetDevice();
22+
auto const waiter = context->GetFenceWaiter();
23+
24+
auto fence = device.createFenceUnique({}).value;
25+
EXPECT_FALSE(waiter->AddFence(std::move(fence), nullptr));
26+
}
27+
28+
TEST(FenceWaiterVKTest, ExecutesFenceCallback) {
29+
auto const context = MockVulkanContextBuilder().Build();
30+
auto const device = context->GetDevice();
31+
auto const waiter = context->GetFenceWaiter();
32+
33+
auto signal = fml::ManualResetWaitableEvent();
34+
auto fence = device.createFenceUnique({}).value;
35+
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });
36+
37+
signal.Wait();
38+
}
39+
40+
TEST(FenceWaiterVKTest, ExecutesFenceCallbackX2) {
41+
auto const context = MockVulkanContextBuilder().Build();
42+
auto const device = context->GetDevice();
43+
auto const waiter = context->GetFenceWaiter();
44+
45+
auto signal = fml::ManualResetWaitableEvent();
46+
auto fence = device.createFenceUnique({}).value;
47+
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });
48+
49+
auto signal2 = fml::ManualResetWaitableEvent();
50+
auto fence2 = device.createFenceUnique({}).value;
51+
waiter->AddFence(std::move(fence2), [&signal2]() { signal2.Signal(); });
52+
53+
signal.Wait();
54+
signal2.Wait();
55+
}
56+
57+
TEST(FenceWaiterVKTest, ExecutesNewFenceThenOldFence) {
58+
auto const context = MockVulkanContextBuilder().Build();
59+
auto const device = context->GetDevice();
60+
auto const waiter = context->GetFenceWaiter();
61+
62+
auto signal = fml::ManualResetWaitableEvent();
63+
auto fence = device.createFenceUnique({}).value;
64+
MockFence::SetStatus(fence, vk::Result::eNotReady);
65+
auto raw_fence = MockFence::GetRawPointer(fence);
66+
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });
67+
68+
// The easiest way to verify that the callback was _not_ called is to wait
69+
// for a timeout, but that could introduce flakiness. Instead, we'll add a
70+
// second fence that will signal immediately, and wait for that one instead.
71+
{
72+
auto signal2 = fml::ManualResetWaitableEvent();
73+
auto fence2 = device.createFenceUnique({}).value;
74+
MockFence::SetStatus(fence2, vk::Result::eSuccess);
75+
waiter->AddFence(std::move(fence2), [&signal2]() { signal2.Signal(); });
76+
signal2.Wait();
77+
}
78+
79+
// Now, we'll signal the first fence, and wait for the callback to be called.
80+
raw_fence->SetStatus(vk::Result::eSuccess);
81+
82+
// Now, we'll signal the first fence, and wait for the callback to be called.
83+
signal.Wait();
84+
}
85+
86+
TEST(FenceWaiterVKTest, AddFenceDoesNothingIfTerminating) {
87+
auto signal = fml::ManualResetWaitableEvent();
88+
89+
{
90+
auto const context = MockVulkanContextBuilder().Build();
91+
auto const device = context->GetDevice();
92+
auto const waiter = context->GetFenceWaiter();
93+
waiter->Terminate();
94+
95+
auto fence = device.createFenceUnique({}).value;
96+
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });
97+
}
98+
99+
// Ensure the fence did _not_ signal.
100+
EXPECT_TRUE(signal.WaitWithTimeout(fml::TimeDelta::FromMilliseconds(100)));
101+
}
102+
103+
TEST(FenceWaiterVKTest, InProgressFencesStillWaitIfTerminated) {
104+
MockFence* raw_fence = nullptr;
105+
auto signal = fml::ManualResetWaitableEvent();
106+
107+
auto const context = MockVulkanContextBuilder().Build();
108+
auto const device = context->GetDevice();
109+
auto const waiter = context->GetFenceWaiter();
110+
111+
// Add a fence that isn't signalled yet.
112+
auto fence = device.createFenceUnique({}).value;
113+
114+
// Even if the fence is eSuccess, it's not guaranteed to be called in time.
115+
MockFence::SetStatus(fence, vk::Result::eNotReady);
116+
raw_fence = MockFence::GetRawPointer(fence);
117+
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });
118+
119+
// Terminate the waiter.
120+
waiter->Terminate();
121+
122+
// Signal the fence.
123+
raw_fence->SetStatus(vk::Result::eSuccess);
124+
125+
// This will hang if the fence was not signalled.
126+
signal.Wait();
127+
}
128+
129+
} // namespace testing
130+
} // namespace impeller

0 commit comments

Comments
 (0)