Skip to content

Commit 52aedc6

Browse files
author
Jonah Williams
authored
[Impeller] have Hostbuffer write directly to block allocated device buffers. (flutter#49505)
part of flutter#140804 We can't use the existing host buffer abstraction as that requires us to collect all allocations up front. By itself, this isn't sufficient for flutter#140804 , because we'll need a way to mark ranges as dirty and/or flush if we don't have host coherent memory. But by itself this change should be beneficial as we'll create fewer device buffers and should do less allocation in general. The size of the device buffers is 1024 Kb, somewhat arbitrarily chosen.
1 parent ff1c3d0 commit 52aedc6

File tree

102 files changed

+678
-469
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+678
-469
lines changed

ci/licenses_golden/excluded_files

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
../../../flutter/impeller/entity/contents/filters/directional_gaussian_blur_filter_contents_unittests.cc
147147
../../../flutter/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc
148148
../../../flutter/impeller/entity/contents/filters/inputs/filter_input_unittests.cc
149+
../../../flutter/impeller/entity/contents/host_buffer_unittests.cc
149150
../../../flutter/impeller/entity/contents/test
150151
../../../flutter/impeller/entity/contents/tiled_texture_contents_unittests.cc
151152
../../../flutter/impeller/entity/contents/vertices_contents_unittests.cc
@@ -187,7 +188,6 @@
187188
../../../flutter/impeller/renderer/compute_subgroup_unittests.cc
188189
../../../flutter/impeller/renderer/compute_unittests.cc
189190
../../../flutter/impeller/renderer/device_buffer_unittests.cc
190-
../../../flutter/impeller/renderer/host_buffer_unittests.cc
191191
../../../flutter/impeller/renderer/pipeline_descriptor_unittests.cc
192192
../../../flutter/impeller/renderer/pool_unittests.cc
193193
../../../flutter/impeller/renderer/renderer_dart_unittests.cc

impeller/aiks/aiks_context.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,19 @@ ContentContext& AiksContext::GetContentContext() const {
4040
return *content_context_;
4141
}
4242

43-
bool AiksContext::Render(const Picture& picture, RenderTarget& render_target) {
43+
bool AiksContext::Render(const Picture& picture,
44+
RenderTarget& render_target,
45+
bool reset_host_buffer) {
4446
if (!IsValid()) {
4547
return false;
4648
}
4749

4850
if (picture.pass) {
4951
return picture.pass->Render(*content_context_, render_target);
5052
}
53+
if (reset_host_buffer) {
54+
content_context_->GetTransientsBuffer().Reset();
55+
}
5156

5257
return true;
5358
}

impeller/aiks/aiks_context.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ class AiksContext {
3939

4040
ContentContext& GetContentContext() const;
4141

42-
bool Render(const Picture& picture, RenderTarget& render_target);
42+
bool Render(const Picture& picture,
43+
RenderTarget& render_target,
44+
bool reset_host_buffer);
4345

4446
private:
4547
std::shared_ptr<Context> context_;

impeller/aiks/aiks_playground.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ bool AiksPlayground::OpenPlaygroundHere(AiksPlaygroundCallback callback) {
5353
if (!picture.has_value()) {
5454
return false;
5555
}
56-
return renderer.Render(*picture, render_target);
56+
return renderer.Render(*picture, render_target, true);
5757
});
5858
}
5959

impeller/aiks/picture.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ std::shared_ptr<Texture> Picture::RenderToTexture(
8484
return nullptr;
8585
}
8686

87-
if (!context.Render(*this, target)) {
87+
if (!context.Render(*this, target, false)) {
8888
VALIDATION_LOG << "Could not render Picture to Texture.";
8989
return nullptr;
9090
}

impeller/base/allocation.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#define FLUTTER_IMPELLER_BASE_ALLOCATION_H_
77

88
#include <cstdint>
9-
#include <limits>
109
#include <memory>
1110

1211
#include "flutter/fml/mapping.h"

impeller/core/buffer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ class Buffer {
1616
public:
1717
virtual ~Buffer();
1818

19-
virtual std::shared_ptr<const DeviceBuffer> GetDeviceBuffer(
20-
Allocator& allocator) const = 0;
19+
virtual std::shared_ptr<const DeviceBuffer> GetDeviceBuffer() const = 0;
2120
};
2221

2322
} // namespace impeller

impeller/core/device_buffer.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ DeviceBuffer::DeviceBuffer(DeviceBufferDescriptor desc) : desc_(desc) {}
1111
DeviceBuffer::~DeviceBuffer() = default;
1212

1313
// |Buffer|
14-
std::shared_ptr<const DeviceBuffer> DeviceBuffer::GetDeviceBuffer(
15-
Allocator& allocator) const {
14+
std::shared_ptr<const DeviceBuffer> DeviceBuffer::GetDeviceBuffer() const {
1615
return shared_from_this();
1716
}
1817

18+
void DeviceBuffer::Flush(std::optional<Range> range) const {}
19+
1920
BufferView DeviceBuffer::AsBufferView() const {
2021
BufferView view;
2122
view.buffer = shared_from_this();

impeller/core/device_buffer.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,21 @@ class DeviceBuffer : public Buffer,
3838
uint16_t row_bytes) const;
3939

4040
// |Buffer|
41-
std::shared_ptr<const DeviceBuffer> GetDeviceBuffer(
42-
Allocator& allocator) const;
41+
std::shared_ptr<const DeviceBuffer> GetDeviceBuffer() const;
4342

4443
const DeviceBufferDescriptor& GetDeviceBufferDescriptor() const;
4544

4645
virtual uint8_t* OnGetContents() const = 0;
4746

47+
/// Make any pending writes visible to the GPU.
48+
///
49+
/// This method must be called if the device pointer provided by
50+
/// [OnGetContents] is written to without using [CopyHostBuffer]. On Devices
51+
/// with coherent host memory, this method will not perform extra work.
52+
///
53+
/// If the range is not provided, the entire buffer is flushed.
54+
virtual void Flush(std::optional<Range> range = std::nullopt) const;
55+
4856
protected:
4957
const DeviceBufferDescriptor desc_;
5058

impeller/core/host_buffer.cc

Lines changed: 116 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,33 @@
44

55
#include "impeller/core/host_buffer.h"
66

7-
#include <algorithm>
87
#include <cstring>
9-
10-
#include "flutter/fml/logging.h"
8+
#include <tuple>
119

1210
#include "impeller/core/allocator.h"
1311
#include "impeller/core/buffer_view.h"
1412
#include "impeller/core/device_buffer.h"
13+
#include "impeller/core/device_buffer_descriptor.h"
14+
#include "impeller/core/formats.h"
1515

1616
namespace impeller {
1717

18-
std::shared_ptr<HostBuffer> HostBuffer::Create() {
19-
return std::shared_ptr<HostBuffer>(new HostBuffer());
18+
constexpr size_t kAllocatorBlockSize = 1024000; // 1024 Kb.
19+
20+
std::shared_ptr<HostBuffer> HostBuffer::Create(
21+
const std::shared_ptr<Allocator>& allocator) {
22+
return std::shared_ptr<HostBuffer>(new HostBuffer(allocator));
2023
}
2124

22-
HostBuffer::HostBuffer() = default;
25+
HostBuffer::HostBuffer(const std::shared_ptr<Allocator>& allocator) {
26+
state_->allocator = allocator;
27+
DeviceBufferDescriptor desc;
28+
desc.size = kAllocatorBlockSize;
29+
desc.storage_mode = StorageMode::kHostVisible;
30+
for (auto i = 0u; i < kHostBufferArenaSize; i++) {
31+
state_->device_buffers[i].push_back(allocator->CreateBuffer(desc));
32+
}
33+
}
2334

2435
HostBuffer::~HostBuffer() = default;
2536

@@ -30,104 +41,143 @@ void HostBuffer::SetLabel(std::string label) {
3041
BufferView HostBuffer::Emplace(const void* buffer,
3142
size_t length,
3243
size_t align) {
33-
auto [device_buffer, range] = state_->Emplace(buffer, length, align);
44+
auto [data, range, device_buffer] = state_->Emplace(buffer, length, align);
3445
if (!device_buffer) {
3546
return {};
3647
}
37-
return BufferView{state_, device_buffer, range};
48+
return BufferView{std::move(device_buffer), data, range};
3849
}
3950

4051
BufferView HostBuffer::Emplace(const void* buffer, size_t length) {
41-
auto [device_buffer, range] = state_->Emplace(buffer, length);
52+
auto [data, range, device_buffer] = state_->Emplace(buffer, length);
4253
if (!device_buffer) {
4354
return {};
4455
}
45-
return BufferView{state_, device_buffer, range};
56+
return BufferView{std::move(device_buffer), data, range};
4657
}
4758

4859
BufferView HostBuffer::Emplace(size_t length,
4960
size_t align,
5061
const EmplaceProc& cb) {
51-
auto [buffer, range] = state_->Emplace(length, align, cb);
52-
if (!buffer) {
62+
auto [data, range, device_buffer] = state_->Emplace(length, align, cb);
63+
if (!device_buffer) {
5364
return {};
5465
}
55-
return BufferView{state_, buffer, range};
66+
return BufferView{std::move(device_buffer), data, range};
5667
}
5768

58-
std::shared_ptr<const DeviceBuffer> HostBuffer::GetDeviceBuffer(
59-
Allocator& allocator) const {
60-
return state_->GetDeviceBuffer(allocator);
69+
HostBuffer::TestStateQuery HostBuffer::GetStateForTest() {
70+
return HostBuffer::TestStateQuery{
71+
.current_frame = state_->frame_index,
72+
.current_buffer = state_->current_buffer,
73+
.total_buffer_count = state_->device_buffers[state_->frame_index].size(),
74+
};
6175
}
6276

6377
void HostBuffer::Reset() {
6478
state_->Reset();
6579
}
6680

67-
size_t HostBuffer::GetSize() const {
68-
return state_->GetReservedLength();
69-
}
70-
71-
size_t HostBuffer::GetLength() const {
72-
return state_->GetLength();
81+
void HostBuffer::HostBufferState::MaybeCreateNewBuffer(size_t required_size) {
82+
current_buffer++;
83+
if (current_buffer >= device_buffers[frame_index].size()) {
84+
FML_DCHECK(required_size <= kAllocatorBlockSize);
85+
DeviceBufferDescriptor desc;
86+
desc.size = kAllocatorBlockSize;
87+
desc.storage_mode = StorageMode::kHostVisible;
88+
device_buffers[frame_index].push_back(allocator->CreateBuffer(desc));
89+
}
90+
offset = 0;
7391
}
7492

75-
std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
76-
size_t length,
77-
size_t align,
78-
const EmplaceProc& cb) {
93+
std::tuple<uint8_t*, Range, std::shared_ptr<DeviceBuffer>>
94+
HostBuffer::HostBufferState::Emplace(size_t length,
95+
size_t align,
96+
const EmplaceProc& cb) {
7997
if (!cb) {
8098
return {};
8199
}
100+
101+
// If the requested allocation is bigger than the block size, create a one-off
102+
// device buffer and write to that.
103+
if (length > kAllocatorBlockSize) {
104+
DeviceBufferDescriptor desc;
105+
desc.size = length;
106+
desc.storage_mode = StorageMode::kHostVisible;
107+
auto device_buffer = allocator->CreateBuffer(desc);
108+
if (!device_buffer) {
109+
return {};
110+
}
111+
if (cb) {
112+
cb(device_buffer->OnGetContents());
113+
device_buffer->Flush(Range{0, length});
114+
}
115+
return std::make_tuple(device_buffer->OnGetContents(), Range{0, length},
116+
device_buffer);
117+
}
118+
82119
auto old_length = GetLength();
83-
if (!Truncate(old_length + length)) {
84-
return {};
120+
if (old_length + length > kAllocatorBlockSize) {
121+
MaybeCreateNewBuffer(length);
85122
}
86-
generation++;
87-
cb(GetBuffer() + old_length);
123+
old_length = GetLength();
124+
125+
cb(GetCurrentBuffer()->OnGetContents() + old_length);
126+
GetCurrentBuffer()->Flush(Range{old_length, length});
88127

89-
return std::make_pair(GetBuffer(), Range{old_length, length});
128+
offset += length;
129+
return std::make_tuple(GetCurrentBuffer()->OnGetContents(),
130+
Range{old_length, length}, GetCurrentBuffer());
90131
}
91132

92-
std::shared_ptr<const DeviceBuffer>
93-
HostBuffer::HostBufferState::GetDeviceBuffer(Allocator& allocator) const {
94-
if (generation == device_buffer_generation) {
95-
return device_buffer;
96-
}
97-
auto new_buffer = allocator.CreateBufferWithCopy(GetBuffer(), GetLength());
98-
if (!new_buffer) {
99-
return nullptr;
133+
std::tuple<uint8_t*, Range, std::shared_ptr<DeviceBuffer>>
134+
HostBuffer::HostBufferState::Emplace(const void* buffer, size_t length) {
135+
// If the requested allocation is bigger than the block size, create a one-off
136+
// device buffer and write to that.
137+
if (length > kAllocatorBlockSize) {
138+
DeviceBufferDescriptor desc;
139+
desc.size = length;
140+
desc.storage_mode = StorageMode::kHostVisible;
141+
auto device_buffer = allocator->CreateBuffer(desc);
142+
if (!device_buffer) {
143+
return {};
144+
}
145+
if (buffer) {
146+
if (!device_buffer->CopyHostBuffer(static_cast<const uint8_t*>(buffer),
147+
Range{0, length})) {
148+
return {};
149+
}
150+
}
151+
return std::make_tuple(device_buffer->OnGetContents(), Range{0, length},
152+
device_buffer);
100153
}
101-
new_buffer->SetLabel(label);
102-
device_buffer_generation = generation;
103-
device_buffer = std::move(new_buffer);
104-
return device_buffer;
105-
}
106154

107-
std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
108-
const void* buffer,
109-
size_t length) {
110155
auto old_length = GetLength();
111-
if (!Truncate(old_length + length)) {
112-
return {};
156+
if (old_length + length > kAllocatorBlockSize) {
157+
MaybeCreateNewBuffer(length);
113158
}
114-
generation++;
159+
old_length = GetLength();
160+
115161
if (buffer) {
116-
::memmove(GetBuffer() + old_length, buffer, length);
162+
::memmove(GetCurrentBuffer()->OnGetContents() + old_length, buffer, length);
163+
GetCurrentBuffer()->Flush(Range{old_length, length});
117164
}
118-
return std::make_pair(GetBuffer(), Range{old_length, length});
165+
offset += length;
166+
return std::make_tuple(GetCurrentBuffer()->OnGetContents(),
167+
Range{old_length, length}, GetCurrentBuffer());
119168
}
120169

121-
std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
122-
const void* buffer,
123-
size_t length,
124-
size_t align) {
170+
std::tuple<uint8_t*, Range, std::shared_ptr<DeviceBuffer>>
171+
HostBuffer::HostBufferState::Emplace(const void* buffer,
172+
size_t length,
173+
size_t align) {
125174
if (align == 0 || (GetLength() % align) == 0) {
126175
return Emplace(buffer, length);
127176
}
128177

129178
{
130-
auto [buffer, range] = Emplace(nullptr, align - (GetLength() % align));
179+
auto [buffer, range, device_buffer] =
180+
Emplace(nullptr, align - (GetLength() % align));
131181
if (!buffer) {
132182
return {};
133183
}
@@ -137,10 +187,15 @@ std::pair<uint8_t*, Range> HostBuffer::HostBufferState::Emplace(
137187
}
138188

139189
void HostBuffer::HostBufferState::Reset() {
140-
generation += 1;
141-
device_buffer = nullptr;
142-
bool did_truncate = Truncate(0);
143-
FML_CHECK(did_truncate);
190+
// When resetting the host buffer state at the end of the frame, check if
191+
// there are any unused buffers and remove them.
192+
while (device_buffers[frame_index].size() > current_buffer + 1) {
193+
device_buffers[frame_index].pop_back();
194+
}
195+
196+
offset = 0u;
197+
current_buffer = 0u;
198+
frame_index = (frame_index + 1) % kHostBufferArenaSize;
144199
}
145200

146201
} // namespace impeller

0 commit comments

Comments
 (0)