Skip to content

Commit 21f1d80

Browse files
cheshiretensorflower-gardener
authored andcommitted
Unify ScopedDeviceMemory and OwningDeviceMemory.
Previously, ScopedDeviceMemory was parameterized by a type it holds, and was only constructible by a stream executor, while OwningDeviceMemory was constructible from an arbitrary allocator, but was not parameterized. This refactoring leaves a single class ScopedDeviceMemory which is parameterized, can be constructed from an arbitrary allocator, and has helper constructors for constructing from a stream executor. OwningDeviceMemory is left as a typedef for ScopedDeviceMemory parameterized on uint8 (holds arbitrary bytes, no alignment guaranteed). PiperOrigin-RevId: 248234830
1 parent 5b5ed78 commit 21f1d80

29 files changed

+259
-430
lines changed

tensorflow/compiler/jit/xla_launch_util.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ limitations under the License.
3030
#include "tensorflow/core/lib/core/status.h"
3131
#include "tensorflow/core/lib/gtl/array_slice.h"
3232
#include "tensorflow/stream_executor/device_memory_allocator.h"
33-
#include "tensorflow/stream_executor/owning_device_memory.h"
3433

3534
namespace tensorflow {
3635
class XlaAllocator;

tensorflow/compiler/jit/xla_tensor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Status XlaTensor::AllocateShapedBuffer(DataType dtype,
6363
client->backend().memory_allocator()->Allocate(
6464
device_ordinal, size, /*retry_on_failure=*/false));
6565
// Move our buffer into shaped_buffer, which takes ownership of it.
66-
index_to_buffer.second = buffer.Forget();
66+
index_to_buffer.second = buffer.Release();
6767
}
6868

6969
VLOG(4) << shaped_buffer.ToString();

tensorflow/compiler/xla/python/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,6 @@ tf_pybind_extension(
214214
"//tensorflow/compiler/xla/client/lib:self_adjoint_eig",
215215
"//tensorflow/compiler/xla/client/lib:svd",
216216
"//tensorflow/compiler/xla/service:computation_placer",
217-
"//tensorflow/stream_executor:device_memory_allocator",
218217
"//tensorflow/compiler/xla/service:hlo",
219218
"//tensorflow/compiler/xla/service:hlo_graph_dumper",
220219
"//tensorflow/compiler/xla/service:name_uniquer",
@@ -229,6 +228,7 @@ tf_pybind_extension(
229228
# without any TF dependencies as "jaxlib" on Pypi, and "jaxlib" does
230229
# not require Tensorflow.
231230
"//tensorflow/core:lib_internal_impl", # buildcleaner: keep
231+
"//tensorflow/stream_executor:device_memory_allocator",
232232
] + xla_python_default_plugins(),
233233
)
234234

tensorflow/compiler/xla/python/shared_device_buffer.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ static void PopulateShapedBufferFromBuffer(
132132
ShapeTree<se::DeviceMemoryBase>::iterator* iterator,
133133
const ShapeTree<se::DeviceMemoryBase>::iterator& end) {
134134
CHECK(*iterator != end);
135-
(*iterator)->second = buffer.device_memory().AsDeviceMemoryBase();
135+
(*iterator)->second = *buffer.device_memory();
136136
++*iterator;
137137
for (const auto& child : buffer.children()) {
138138
PopulateShapedBufferFromBuffer(*child, iterator, end);

tensorflow/compiler/xla/python/shared_device_buffer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ limitations under the License.
2121
#include "tensorflow/compiler/xla/service/transfer_manager.h"
2222
#include "tensorflow/compiler/xla/shape.h"
2323
#include "tensorflow/stream_executor/device_memory_allocator.h"
24-
#include "tensorflow/stream_executor/owning_device_memory.h"
2524

2625
namespace xla {
2726

tensorflow/compiler/xla/python/shared_device_buffer_test.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,9 @@ TEST(PySharedDeviceBufferTest, AsShapedBuffer) {
114114
abc_tuple_buffer->on_device_shape());
115115

116116
std::vector<se::DeviceMemoryBase> expected_buffer_sequence = {
117-
abc_tuple_buffer->device_memory().AsDeviceMemoryBase(),
118-
c_buffer->device_memory().AsDeviceMemoryBase(),
119-
ab_tuple_buffer->device_memory().AsDeviceMemoryBase(),
120-
a_buffer->device_memory().AsDeviceMemoryBase(),
121-
b_buffer->device_memory().AsDeviceMemoryBase(),
117+
*abc_tuple_buffer->device_memory(), *c_buffer->device_memory(),
118+
*ab_tuple_buffer->device_memory(), *a_buffer->device_memory(),
119+
*b_buffer->device_memory(),
122120
};
123121
auto it = shaped_buffer.buffers().begin();
124122
auto expected_it = expected_buffer_sequence.begin();

tensorflow/compiler/xla/python/types.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ limitations under the License.
1717

1818
#include "absl/container/flat_hash_map.h"
1919
#include "tensorflow/compiler/xla/status_macros.h"
20-
#include "tensorflow/stream_executor/owning_device_memory.h"
2120

2221
namespace xla {
2322

tensorflow/compiler/xla/service/cpu/cpu_executable.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,17 @@ CpuExecutable::CreateBufferTable(
113113
} else {
114114
TF_ASSIGN_OR_RETURN(owning_buffers[i], memory_allocator->Allocate(
115115
device_ordinal, buffer_size));
116-
unowning_buffers[i] = owning_buffers[i].AsDeviceMemoryBase();
116+
unowning_buffers[i] = *owning_buffers[i];
117117

118118
VLOG(3) << "buffer #" << i << " allocated " << buffer_size << " bytes ["
119-
<< owning_buffers[i].opaque() << "]";
119+
<< owning_buffers[i]->opaque() << "]";
120120
}
121121

122122
// Since the output buffer and all the temporary buffers were written into
123123
// by the JITed code, msan has no way of knowing their memory was
124124
// initialized. Mark them initialized so that msan doesn't flag loads from
125125
// these buffers.
126-
TF_ANNOTATE_MEMORY_IS_INITIALIZED(owning_buffers[i].opaque(), buffer_size);
126+
TF_ANNOTATE_MEMORY_IS_INITIALIZED(owning_buffers[i]->opaque(), buffer_size);
127127
}
128128

129129
TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
@@ -247,7 +247,7 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::CreateResultShapedBuffer(
247247
// ownership, and hence a buffer coming from there cannot be part of
248248
// the new ScopedShapedBuffer we create for the result (which assumes
249249
// ownership).
250-
*device_memory = buffer.Forget();
250+
*device_memory = buffer.Release();
251251
} else {
252252
auto output_alias = input_output_alias.GetAliasedOutput(
253253
slice.allocation()->parameter_number(),

tensorflow/compiler/xla/service/executable.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ limitations under the License.
3939
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
4040
#include "tensorflow/core/platform/thread_annotations.h"
4141
#include "tensorflow/stream_executor/device_memory_allocator.h"
42-
#include "tensorflow/stream_executor/owning_device_memory.h"
4342

4443
namespace xla {
4544

tensorflow/compiler/xla/service/gpu/buffer_allocations.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,18 @@ StatusOr<std::unique_ptr<BufferAllocations>> BufferAllocations::Builder::Build(
8080
se::OwningDeviceMemory buffer;
8181
TF_ASSIGN_OR_RETURN(
8282
buffer, memory_allocator->Allocate(device_ordinal, buffer_size));
83-
if (reinterpret_cast<uintptr_t>(buffer.opaque()) % expected_alignment !=
83+
if (reinterpret_cast<uintptr_t>(buffer->opaque()) %
84+
expected_alignment !=
8485
0) {
8586
return InternalError(
8687
"Address returned by memory_allocator->Allocate must be a "
8788
"multiple of 0x%x, but was %p",
88-
kXlaAllocatedBufferAlignBytes, buffer.opaque());
89+
kXlaAllocatedBufferAlignBytes, buffer->opaque());
8990
}
9091
// We do manual memory management within BufferAllocations. Be sure not
9192
// to do a TF_RETURN_IF_ERROR between this line and the
9293
// buffer_allocations->SetBuffer(buffer_address) call below!
93-
buffer_address = buffer.Forget();
94+
buffer_address = buffer.Release();
9495
}
9596

9697
buffer_allocations->SetBuffer(i, buffer_address);

0 commit comments

Comments
 (0)