-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Compile API: output model and initializer stream write functions #25455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
43 commits
Select commit
Hold shift + click to select a range
033a887
Add func typedefs
adrianlizarraga fcdb5cf
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga 03eb5fa
stub apis
adrianlizarraga 3310968
merge main
adrianlizarraga c3693de
new branch. add 2 streams first
adrianlizarraga a69d5f9
Move away from using Graph's graph_proto_ member
adrianlizarraga 5743dcd
fix deref assignment
adrianlizarraga fd87e0c
Clean up
adrianlizarraga a40f463
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga 0dadf4d
Use std::filesystem::path in ModelCompilationOptions; fix memleak in …
adrianlizarraga d94cf44
fix unused variable warning (as error)
adrianlizarraga 5bfbddb
Merge main and fix conflicts
adrianlizarraga 69a4338
Update handler function signature to take in the ExternalDataInfo for…
adrianlizarraga 90ade82
Add test that reuses external initializers from original model
adrianlizarraga c36afe5
Define new ExternalDataInfo constructor only for non-minimal builds
adrianlizarraga c07dc11
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga 4b83a2b
Fix unused variable warning (as error)
adrianlizarraga 91acc8f
another unused variable
adrianlizarraga 6e5629a
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga 9b092bf
clean up
adrianlizarraga 049b9ad
Start adding csharp api funcs
adrianlizarraga 8e00a06
Remove qnn_factory memleak fix (address in different PR)
adrianlizarraga 11a6c74
Add ExternalInitializerInfo to C++ api
adrianlizarraga 9ca882f
Add compile_to_stream py api
adrianlizarraga 6d522d8
Python bindings and tests
adrianlizarraga af996bb
C# API for WriteBuffer delegate
adrianlizarraga 9b27b31
c# api handle initializers
adrianlizarraga 9607193
missing documentation in c#
adrianlizarraga e65710a
Add ExternalInitializerInfo C# class
adrianlizarraga c16b327
Full C# API for delegate that handles initializers
adrianlizarraga 0b2f0e6
Update comment
adrianlizarraga 83758d1
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga c62ed23
Address review comments
adrianlizarraga a35e7b6
Address review comments
adrianlizarraga d906855
Remove unused variable
adrianlizarraga 255c2df
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga 3db3117
Merge main conflicts
adrianlizarraga c7f98de
Merge main again
adrianlizarraga 9031635
Address review comments for C#
adrianlizarraga abd0297
Rename functions in C and python
adrianlizarraga d5012fb
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga 0e0497a
Address comments
adrianlizarraga 0a61f1f
Merge branch 'main' into adrianl/compile-api-output-stream
adrianlizarraga File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
stub apis
- Loading branch information
commit 03eb5fa8df157afec5928858af1c7e1f56e5eebd
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #include <cassert> | ||
| #include <limits> | ||
| #include <string> | ||
| #include <utility> | ||
| #include "core/common/common.h" | ||
| #include "core/framework/ep_context_options.h" | ||
| #include "core/framework/error_code_helper.h" | ||
| #include "core/session/onnxruntime_session_options_config_keys.h" | ||
|
|
||
| namespace onnxruntime { | ||
| namespace epctx { | ||
| // class ModelGenOptions | ||
|
|
||
| ModelGenOptions::ModelGenOptions(const ConfigOptions& config_options) { | ||
| enable = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1"; | ||
|
|
||
| std::string output_model_path = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); | ||
| if (!output_model_path.empty()) { | ||
| output_model_location = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, ""); | ||
| } else { | ||
| output_model_location = std::monostate{}; | ||
| } | ||
|
|
||
| output_external_initializers_file_path = config_options.GetConfigOrDefault( | ||
| kOrtSessionOptionsEpContextModelExternalInitializersFileName, ""); | ||
| output_external_initializer_size_threshold = 0; | ||
| embed_ep_context_in_model = config_options.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "0") == "1"; | ||
| } | ||
|
|
||
| bool ModelGenOptions::HasOutputModelLocation() const { | ||
| return !std::holds_alternative<std::monostate>(output_model_location); | ||
| } | ||
|
|
||
| const std::string* ModelGenOptions::TryGetOutputModelPath() const { | ||
| return std::get_if<std::string>(&output_model_location); | ||
| } | ||
|
|
||
| const BufferHolder* ModelGenOptions::TryGetOutputModelBuffer() const { | ||
| return std::get_if<BufferHolder>(&output_model_location); | ||
| } | ||
|
|
||
| const OutStreamHolder* ModelGenOptions::TryGetOutputModelOutStream() const { | ||
| return std::get_if<OutStreamHolder>(&output_model_location); | ||
| } | ||
|
|
||
| // class OutStreamBuf | ||
|
|
||
| OutStreamBuf::OutStreamBuf(OutStreamHolder out_stream_holder) : out_stream_holder_(out_stream_holder) { | ||
| setp(buffer_.data(), buffer_.data() + buffer_.size()); | ||
| } | ||
|
|
||
| OutStreamBuf::~OutStreamBuf() { | ||
| sync(); | ||
| } | ||
|
|
||
| // Called when the buffer_ is full. Flushes the buffer_ (via sync()) and then writes the overflow character to buffer_. | ||
| std::streambuf::int_type OutStreamBuf::overflow(std::streambuf::int_type ch) { | ||
| if (sync() == -1) { | ||
| return traits_type::eof(); | ||
| } | ||
|
|
||
| if (ch != traits_type::eof()) { | ||
| *pptr() = static_cast<char>(ch); | ||
| pbump(1); | ||
| } | ||
|
|
||
| return ch; | ||
| } | ||
|
|
||
| // Flushes the entire buffer_ to the user's write function. | ||
| int OutStreamBuf::sync() { | ||
| if (!last_status_.IsOK()) { | ||
| return -1; | ||
| } | ||
|
|
||
| std::ptrdiff_t num_bytes = pptr() - pbase(); | ||
| if (num_bytes == 0) { | ||
| return 0; | ||
| } | ||
|
|
||
| // Can only call pbump() with an int, so can only write at most (2^31 - 1) bytes. | ||
| if (num_bytes > std::numeric_limits<int>::max()) { | ||
| num_bytes = std::numeric_limits<int>::max(); | ||
| } | ||
|
|
||
| char* ptr = pbase(); | ||
|
|
||
| Status status = Status::OK(); | ||
|
|
||
| ORT_TRY { | ||
| status = ToStatus(out_stream_holder_.write_func(out_stream_holder_.stream_state, | ||
| ptr, num_bytes)); | ||
| } | ||
| ORT_CATCH(const std::exception& e) { | ||
| ORT_HANDLE_EXCEPTION([&]() { | ||
| status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, | ||
| "Caught exception while calling user's OrtOutStreamWriteFunc callback: ", e.what()); | ||
| }); | ||
| } | ||
|
|
||
| if (!status.IsOK()) { | ||
| last_status_ = std::move(status); | ||
| return -1; | ||
| } | ||
|
|
||
| pbump(-static_cast<int>(num_bytes)); // Reset internal pointer to point to the beginning of the buffer_ | ||
| return 0; | ||
| } | ||
|
|
||
| } // namespace epctx | ||
| } // namespace onnxruntime |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <array> | ||
| #include <streambuf> | ||
| #include <string> | ||
| #include <variant> | ||
| #include "core/framework/allocator.h" | ||
| #include "core/framework/config_options.h" | ||
|
|
||
| namespace onnxruntime { | ||
| namespace epctx { | ||
| /// <summary> | ||
| /// Holds the buffer that will store the output model and the allocator used to allocate the memory. | ||
| /// </summary> | ||
| struct BufferHolder { | ||
| void** buffer_ptr = nullptr; | ||
| size_t* buffer_size_ptr = nullptr; | ||
| AllocatorPtr buffer_allocator = nullptr; | ||
| }; | ||
|
|
||
| /// <summary> | ||
| /// Holds the opaque stream state and the write function that ORT calls to write out the output model. | ||
| /// </summary> | ||
| struct OutStreamHolder { | ||
| OrtOutStreamWriteFunc write_func = nullptr; | ||
| void* stream_state = nullptr; // Opaque pointer to user's stream state. Passed as first argument to write_func. | ||
| }; | ||
|
|
||
| /// <summary> | ||
| /// Stores EPContext model generation options. Used in SessionOptions. | ||
| /// </summary> | ||
| struct ModelGenOptions { | ||
| ModelGenOptions() = default; | ||
|
|
||
| // Initializes from string key/value pairs in session config options. | ||
| explicit ModelGenOptions(const ConfigOptions& config_options); | ||
|
|
||
| bool enable = false; | ||
| bool overwrite_existing_output_file = false; | ||
| bool error_if_no_compiled_nodes = false; | ||
| bool embed_ep_context_in_model = false; | ||
|
|
||
| std::variant<std::monostate, // Initial state (no output model location) | ||
| std::string, // output model path | ||
| BufferHolder, // buffer to save output model | ||
| OutStreamHolder> // Function to write the output model to a user's stream. | ||
| output_model_location{}; | ||
|
|
||
| std::string output_external_initializers_file_path; | ||
| size_t output_external_initializer_size_threshold = 0; | ||
|
|
||
| bool HasOutputModelLocation() const; | ||
| const std::string* TryGetOutputModelPath() const; | ||
| const BufferHolder* TryGetOutputModelBuffer() const; | ||
| const OutStreamHolder* TryGetOutputModelOutStream() const; | ||
| }; | ||
|
|
||
| // Class that wraps the user's OrtOutStreamWriteFunc function to enable use with | ||
| // C++'s std::ostream. | ||
| // Example: | ||
| // OutStreamHolder stream_holder{write_func, stream_state}; | ||
| // std::unique_ptr<OutStreamBuf> out_stream_buf = std::make_unique<OutStreamBuf>(stream_holder); | ||
| // std::ostream out_stream(out_stream_buf.get()); | ||
| class OutStreamBuf : public std::streambuf { | ||
| public: | ||
| explicit OutStreamBuf(OutStreamHolder out_stream_holder); | ||
| ~OutStreamBuf(); | ||
|
|
||
| const Status& GetStatus() const { | ||
| return last_status_; | ||
| } | ||
|
|
||
| protected: | ||
| int_type overflow(int_type ch) override; | ||
| int sync() override; | ||
|
|
||
| private: | ||
| OutStreamHolder out_stream_holder_{}; | ||
| std::array<char, 4096> buffer_{}; | ||
| Status last_status_{}; | ||
| }; | ||
|
|
||
| } // namespace epctx | ||
| } // namespace onnxruntime | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.