Skip to content
35 changes: 25 additions & 10 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1220,7 +1220,10 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
#endif

#if !defined(ORT_MINIMAL_BUILD)
/** Gets the GraphProto representation of this Graph only. */
/** Gets the GraphProto representation of this Graph only.
* This does not remove in-memory tags for graph initializers.
* Use ToGraphProto() const to get a GraphProto that can be serialized externally.
*/
const ONNX_NAMESPACE::GraphProto& ToGraphProto();

/// <summary>
Expand Down Expand Up @@ -1439,6 +1442,13 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
return Resolve(default_options);
}

/// <summary>
/// This function converts all the graph TensorProto initializers into OrtValues
/// and creates a in-memory external data reference for each OrtValue.
/// </summary>
/// <returns></returns>
Status ConvertInitializersIntoOrtValues();

const std::unordered_set<std::string>& GetOuterScopeNodeArgNames() const noexcept {
return outer_scope_node_arg_names_;
}
Expand Down Expand Up @@ -1595,20 +1605,25 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
/// This function is used by ToGraphProto() to ensure in-memory external data references
/// don't leak externally since they are non-standard.
///
/// It handles two scenarios:
/// - When GraphSynchronizationNeeded() is false: GraphProto is simply copied
/// It is used when GraphSynchronizationNeeded() is false: GraphProto is simply copied
/// from graph_proto_ by ToGraphProto(). This copy includes both main graph
/// and subgraph initializers. This function examines all initializers
/// and inlines any in-memory data references.
/// - When GraphSynchronizationNeeded() is true: ToGraphProto() generates a new GraphProto
/// using ToGraphProtoInternal(). This doesn't transfer main graph initializers, which are
/// copied and inlined by ToGraphProto() itself. This function processes only the subgraph initializers
/// as needed.
/// </summary>
/// <param name="output_graph_proto">The GraphProto to process</param>
/// <param name="process_main">Whether to process the main graph initializers</param>
/// <returns>Status indicating success or failure</returns> ///
Status ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto, bool process_main) const;
/// <returns>Status indicating success or failure</returns>
Status ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto) const;

/// <summary>
/// This function replaces all of the initializers within output_graph_proto
/// from this Graph instance. All in memory initializers are regenerated and inlined.
/// This is necessary even if the graph_proto_ is already up to date because initializers() may
/// contain obsolete initializers that are no longer in use due to optimizations and contain obsolete
/// references to OrtValues that may no longer be around (since we like appending rather than replacing).
/// </summary>
/// <param name="output_graph_proto">Destination GraphProto to receive the updated initializers.</param>
/// <returns>Status indicating success or failure.</returns>
Status RegenerateInitializersAndReplaceInMemory(ONNX_NAMESPACE::GraphProto& output_graph_proto) const;

/// <summary>
/// This function traverses the graph bottom up and externalizes
Expand Down
167 changes: 60 additions & 107 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -666,12 +666,16 @@ void Node::ToProto(NodeProto& proto, bool update_subgraphs) const {

// Set attributes.
proto.clear_attribute();
for (const auto& attribute : attributes_) {
for (const auto& [name, attribute] : attributes_) {
const gsl::not_null<AttributeProto*> attr{proto.add_attribute()};
*attr = attribute.second; // copy
if (update_subgraphs && attr->has_g()) {
*attr = attribute; // copy
if (update_subgraphs && utils::HasGraph(*attr)) {
auto find_hit = attr_to_subgraph_map_.find(name);
// Force ToGraphProto() const to be called so
// that any in-memory TensorProto initializers go back to being inlined
const Graph& subgraph = *find_hit->second;
attr->clear_g();
*attr->mutable_g() = attr_to_subgraph_map_.find(attribute.first)->second->ToGraphProto();
*attr->mutable_g() = subgraph.ToGraphProto();
}
}

Expand Down Expand Up @@ -3381,7 +3385,12 @@ Status Graph::Resolve(const ResolveOptions& options) {

return Status::OK(); };

ORT_RETURN_IF_ERROR(ForThisAndAllSubgraphs(all_subgraphs, finalize_func));
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
}

Status Graph::ConvertInitializersIntoOrtValues() {
std::vector<Graph*> all_subgraphs;
FindAllSubgraphs(all_subgraphs);

auto put_weights_maybe_in_memory_func = [&](Graph& graph) -> Status {
// if we have any initializers that are not in memory, put them there.
Expand Down Expand Up @@ -4311,8 +4320,45 @@ Status InlineOrCopyInitializer(const Graph& src_graph, const ONNX_NAMESPACE::Ten

} // namespace

Status Graph::ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto,
bool process_main) const {
Status Graph::RegenerateInitializersAndReplaceInMemory(ONNX_NAMESPACE::GraphProto& output_graph_proto) const {
auto& mutable_initializers = *output_graph_proto.mutable_initializer();
// This does not preserve strong exception safety, but in case of error
// the output is thrown away.
mutable_initializers.Clear();

#if !defined(DISABLE_SPARSE_TENSORS)
output_graph_proto.clear_sparse_initializer();

const auto& model_path = ModelPath();
const bool has_sparse_initializers = !sparse_tensor_names_.empty();
const auto sparse_end = sparse_tensor_names_.end();

for (const auto& [name, tensor_proto] : name_to_initial_tensor_) {
const auto& initializer = *tensor_proto;
if (!has_sparse_initializers || sparse_end == sparse_tensor_names_.find(name)) {
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
*mutable_initializers.Add()));
} else {
auto& sparse_initializer = *output_graph_proto.add_sparse_initializer();
if (utils::HasExternalDataInMemory(initializer)) {
ONNX_NAMESPACE::TensorProto tensor_proto_inlined;
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
tensor_proto_inlined));
ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(tensor_proto_inlined, model_path, sparse_initializer));
} else {
ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer));
}
}
}
#else
for (const auto& [name, tensor_proto] : name_to_initial_tensor_) {
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, *tensor_proto, *mutable_initializers.Add()));
}
#endif
return Status::OK();
}

Status Graph::ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto) const {
for (const auto& node : Nodes()) {
if (node.ContainsSubgraph()) {
// Let's find this node in the output_graph_proto
Expand Down Expand Up @@ -4343,103 +4389,26 @@ Status Graph::ProcessSubgraphsInMemoryData(ONNX_NAMESPACE::GraphProto& output_gr
"Subgraph ", name, " is referred to in GetAttributeNameToSubgraphMap, but not found in node ",
node.Name(), " while attempting to recurse into it.");
auto& result_subgraph = *sub_hit->mutable_g();
ORT_RETURN_IF_ERROR(subgraph->ProcessSubgraphsInMemoryData(result_subgraph, process_main));
ORT_RETURN_IF_ERROR(subgraph->ProcessSubgraphsInMemoryData(result_subgraph));
}
}
}

// When graph_proto is copied from graph_proto, initializers already present in the main graph
if (parent_graph_ != nullptr || process_main) {
#if !defined(DISABLE_SPARSE_TENSORS)
auto* mutable_initializers = output_graph_proto.mutable_initializer();
const auto& model_path = ModelPath();
const bool has_sparse_initializers = !sparse_tensor_names_.empty();
const auto sparse_end = sparse_tensor_names_.end();

// We want to make sure that sparse initializers do not appear
// as dense duplicates within the initializers list.
std::optional<InlinedHashSet<std::string>> initializer_to_remove;
if (has_sparse_initializers) {
// We need to remove the dense initializers that are sparse tensors
initializer_to_remove.emplace();
}

for (auto first = mutable_initializers->begin(), end = mutable_initializers->end(); first != end; ++first) {
auto& initializer = *first;
if (utils::HasExternalDataInMemory(initializer)) {
// If the initializer has external data in memory, we need to inline it.
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer, initializer));
}
if (has_sparse_initializers && sparse_end != sparse_tensor_names_.find(initializer.name())) {
auto& sparse_initializer = *output_graph_proto.add_sparse_initializer();
ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer));
initializer_to_remove->insert(initializer.name());
}
}

// erase/remove dense initializers that are sparse tensors so no duplicates are present
if (initializer_to_remove && !initializer_to_remove->empty()) {
mutable_initializers->erase(std::remove_if(
mutable_initializers->begin(), mutable_initializers->end(),
[&initializer_to_remove](const ONNX_NAMESPACE::TensorProto& initializer) {
return initializer_to_remove->count(initializer.name()) > 0;
}),
mutable_initializers->end());
}
#else
for (auto& initializer : *output_graph_proto.mutable_initializer()) {
if (utils::HasExternalDataInMemory(initializer)) {
// If the initializer has external data in memory, we need to inline it.
ORT_RETURN_IF_ERROR(InlineOrCopyInitializer(*this, initializer, initializer));
}
}
#endif
}
return Status::OK();
return RegenerateInitializersAndReplaceInMemory(output_graph_proto);
}

ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
GraphProto result;
if (!GraphProtoSyncNeeded()) {
result = *graph_proto_;
ORT_THROW_IF_ERROR(ProcessSubgraphsInMemoryData(result, /*process_main*/ true));
ORT_THROW_IF_ERROR(ProcessSubgraphsInMemoryData(result));
} else {
// Recursion is handled via Node::ToProto() const -> Graph::ToGraphProto() const (this method)
// so below we handle this graph only.
ToGraphProtoInternal(result);

ORT_THROW_IF_ERROR(ProcessSubgraphsInMemoryData(result, /*process_main*/ false));

// Add initializers to parent graph by copy converting them from graph_proto_
// ToGraphProtoInternal() does not copy initializers for the main graph
auto* mutable_initializers = result.mutable_initializer();

#if !defined(DISABLE_SPARSE_TENSORS)
const auto& model_path = ModelPath();
const bool has_sparse_initializers = !sparse_tensor_names_.empty();
const auto sparse_end = sparse_tensor_names_.end();

for (const auto& initializer : graph_proto_->initializer()) {
if (!has_sparse_initializers || sparse_end == sparse_tensor_names_.find(initializer.name())) {
ORT_THROW_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
*mutable_initializers->Add()));
} else {
auto& sparse_initializer = *result.add_sparse_initializer();
if (utils::HasExternalDataInMemory(initializer)) {
ONNX_NAMESPACE::TensorProto tensor_proto;
ORT_THROW_IF_ERROR(InlineOrCopyInitializer(*this, initializer,
tensor_proto));
ORT_THROW_IF_ERROR(utils::DenseTensorToSparseTensorProto(tensor_proto, model_path, sparse_initializer));
} else {
ORT_THROW_IF_ERROR(utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer));
}
}
}
#else
for (const auto& initializer : graph_proto_->initializer()) {
ORT_THROW_IF_ERROR(InlineOrCopyInitializer(*this, initializer, *mutable_initializers->Add()));
}
#endif
ORT_THROW_IF_ERROR(RegenerateInitializersAndReplaceInMemory(result));
}

return result;
}

Expand Down Expand Up @@ -5235,23 +5204,7 @@ Status Graph::AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& nod
tensor_proto.set_name(std::string(new_name.value()));
}

// In the constant node, we won't have symbolic dims.
const auto tensor_shape = utils::GetTensorShapeFromTensorProto(tensor_proto);
auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
const size_t size_in_bytes = Tensor::CalculateTensorStorageSize(ml_data, tensor_shape);

if (size_in_bytes > utils::kSmallTensorExternalDataThreshold) {
OrtValue ort_value;
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), ModelPath(), tensor_proto,
CPUAllocator::DefaultInstance(), ort_value));

constexpr const bool use_tensor_buffer_true = true;
auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
use_tensor_buffer_true);
ORT_RETURN_IF_ERROR(AddInitializedOrtValue(tensor_proto_to_add, ort_value));
} else {
AddInitializedTensor(tensor_proto);
}
AddInitializedTensor(tensor_proto);

if (GetNodeArg(tensor_proto.name()) == nullptr) {
TypeProto t{utils::TypeProtoFromTensorProto(tensor_proto)};
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/optimizer/attention_fusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static NodeArg& MergeQkvWeights(Graph& graph, int64_t hidden_size,
utils::SetRawDataInTensorProto(initializer, result.data(), gsl::narrow<size_t>(element_count) * sizeof(MLFloat16));
}

return graph_utils::AddInitializerWithExternalData(graph, initializer);
return graph_utils::AddInitializer(graph, initializer);
}

static NodeArg* ConvertMaskToInt32(Graph& graph, NodeArg* mask_input, ProviderType provider_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ NodeArg* CreateInitializerFromVector(Graph& graph,
"total_count: ", total_count, " values.size(): ", values.size());

utils::SetRawDataInTensorProto(const_tensor, values.data(), values.size() * sizeof(int64_t));
return &graph_utils::AddInitializerWithExternalData(graph, const_tensor);
return &graph_utils::AddInitializer(graph, const_tensor);
}

NodeArg* InsertNodesForValidIndices(Graph& graph,
Expand Down
13 changes: 4 additions & 9 deletions onnxruntime/core/optimizer/constant_folding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ static bool ConstantFoldShapeNode(Graph& graph, Node& node) {
ONNX_NAMESPACE::TensorShapeProto result_shape;
result_shape.add_dim()->set_dim_value(clamped_slice_length);
constant_arg_out->SetShape(result_shape);
graph_utils::AddInitializerWithExternalData(graph, shape_constant);
graph_utils::AddInitializer(graph, shape_constant);
}

return is_concrete_shape; // convert to constant if this is true
Expand Down Expand Up @@ -317,24 +317,19 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level,
// Build the TensorProto that corresponds to the computed OrtValue and add it as initializer to the graph.
auto* constant_arg_out = node->MutableOutputDefs()[fetch_idx];
const Tensor& out_tensor = ort_value.Get<Tensor>();
constexpr const bool use_tensor_buffer_true = true;
constexpr const bool use_tensor_buffer_false = false;
ONNX_NAMESPACE::TensorProto out_tensorproto = utils::TensorToTensorProto(
out_tensor,
constant_arg_out->Name(),
use_tensor_buffer_true);
use_tensor_buffer_false);

ONNX_NAMESPACE::TensorShapeProto result_shape;
for (auto& dim : out_tensor.Shape().GetDims()) {
result_shape.add_dim()->set_dim_value(dim);
}

constant_arg_out->SetShape(result_shape);
// The data is too small and has been inlined.
if (!utils::HasExternalData(out_tensorproto)) {
ORT_THROW_IF_ERROR(graph.AddInitializedOrtValue(out_tensorproto, OrtValue()));
} else {
ORT_THROW_IF_ERROR(graph.AddInitializedOrtValue(out_tensorproto, ort_value));
}
graph.AddInitializedTensor(out_tensorproto);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/optimizer/conv_add_fusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Status ConvAddFusion::Apply(Graph& graph, Node& node, RewriteRuleEffect& modifie
auto new_name = graph.GenerateNodeArgName("ConvAddFusion_B_" + B_input_name);
new_conv_B_tensor_proto.set_name(new_name);

NodeArg& new_conv_B_node_arg = graph_utils::AddInitializerWithExternalData(graph, new_conv_B_tensor_proto);
NodeArg& new_conv_B_node_arg = graph_utils::AddInitializer(graph, new_conv_B_tensor_proto);
graph_utils::ReplaceNodeInput(node, 2, new_conv_B_node_arg);

} else {
Expand All @@ -94,7 +94,7 @@ Status ConvAddFusion::Apply(Graph& graph, Node& node, RewriteRuleEffect& modifie
auto new_name = graph.GenerateNodeArgName("ConvAddFusion_Add_B_" + add_B_tensor_proto->name());
new_conv_B_tensor_proto.set_name(new_name);

NodeArg& new_add_B_node_arg = graph_utils::AddInitializerWithExternalData(graph, new_conv_B_tensor_proto);
NodeArg& new_add_B_node_arg = graph_utils::AddInitializer(graph, new_conv_B_tensor_proto);
graph_utils::AddNodeInput(node, 2, new_add_B_node_arg);
}

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/optimizer/conv_bn_fusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,10 @@ Status ConvBNFusion::Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_eff
new_conv_W_tensor_proto.set_name(new_W_name);
new_conv_B_tensor_proto.set_name(new_B_name);

NodeArg& new_conv_W_node_arg = graph_utils::AddInitializerWithExternalData(graph, new_conv_W_tensor_proto);
NodeArg& new_conv_W_node_arg = graph_utils::AddInitializer(graph, new_conv_W_tensor_proto);
graph_utils::ReplaceNodeInput(node, 1, new_conv_W_node_arg);

auto& new_conv_B_node_arg = graph_utils::AddInitializerWithExternalData(graph, new_conv_B_tensor_proto);
auto& new_conv_B_node_arg = graph_utils::AddInitializer(graph, new_conv_B_tensor_proto);

if (conv_inputs.size() == 3) {
graph_utils::ReplaceNodeInput(node, 2, new_conv_B_node_arg);
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/optimizer/conv_mul_fusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ Status ConvMulFusion::Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_ef
new_conv_W_tensor_proto.set_name(new_W_name);

// Replace initializers of conv node
NodeArg& new_conv_W_node_arg = graph_utils::AddInitializerWithExternalData(graph, new_conv_W_tensor_proto);
NodeArg& new_conv_W_node_arg = graph_utils::AddInitializer(graph, new_conv_W_tensor_proto);
graph_utils::ReplaceNodeInput(conv_node, 1, new_conv_W_node_arg);

if (is_3d) {
Expand All @@ -100,7 +100,7 @@ Status ConvMulFusion::Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_ef
auto new_B_name = graph.GenerateNodeArgName("ConvMulFusion_Mul_B_" + mul_B_tensor_proto->name());
new_conv_B_tensor_proto.set_name(new_B_name);

NodeArg& new_conv_B_node_arg = graph_utils::AddInitializerWithExternalData(graph, new_conv_B_tensor_proto);
NodeArg& new_conv_B_node_arg = graph_utils::AddInitializer(graph, new_conv_B_tensor_proto);
graph_utils::ReplaceNodeInput(conv_node, 2, new_conv_B_node_arg);
}

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/optimizer/double_qdq_pairs_remover.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static void ApplyNewInputValue(Graph& graph, Node& node, QDQ::InputIndex index,
auto new_name = graph.GenerateNodeArgName("DoubleQDQRemoved_" + node.InputDefs()[index]->Name());
new_input_tensor.set_name(new_name);
new_input_tensor.add_dims(1);
NodeArg& new_input = graph_utils::AddInitializerWithExternalData(graph, new_input_tensor);
NodeArg& new_input = graph_utils::AddInitializer(graph, new_input_tensor);
graph_utils::ReplaceNodeInput(node, index, new_input);
}

Expand Down
Loading
Loading