Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b3664f8
[CUDA] Support SwiGlu in MoE and qMoE (#25530)
tianleiwu Jul 28, 2025
a8e1186
[CUDA] BF16 MoE and qMoE (#25572)
tianleiwu Jul 31, 2025
a9f74a0
Add CUDA implementation of GatherBlockQuantized operator (#25575)
xiaomsft Aug 1, 2025
d83904b
Add support for QMoE in CPU (#25558)
apsonawane Aug 2, 2025
8654241
Update MoE and qMoE spec (#25619)
tianleiwu Aug 2, 2025
6ca2047
[CPU] Improve QMoE kernel (#25822)
apsonawane Aug 26, 2025
dd32daf
Fix MoE CPP tests (#25877)
apsonawane Aug 28, 2025
581b8e7
Add custom ops library_path to EP metadata (#25830)
psakhamoori Aug 29, 2025
a9308a1
[Fix] illegal memory access in GetInputIndices with optional inputs (…
mingyueliuh Aug 29, 2025
6c7f150
[TRT RTX EP] Add sync method (#25898)
gedoensmax Sep 2, 2025
535fcc6
[TRT RTX EP] Memory map the engine buffer (#25909)
gedoensmax Sep 3, 2025
1f4e581
[TRT RTX EP] Add support for RTX runtime caches (#25917)
gedoensmax Sep 3, 2025
9732a3e
Compile API: disable optimizations by default (#25474)
adrianlizarraga Sep 3, 2025
df25f45
[CXX] Introduce C++ API for new C entry points (#25897)
yuslepukhin Sep 3, 2025
8f587b1
Migrate model tests to ONNX Model ZOO only (#25888)
kobby-kobbs Sep 3, 2025
ab71f1e
Remove std::string::data() non-const usage from public headers (#25943)
yuslepukhin Sep 4, 2025
2d36f04
Compile API: output model and initializer stream write functions (#25…
adrianlizarraga Sep 4, 2025
c5096d9
[TRT RTX EP] Fixing the stream parameter in CopyTensors API and passi…
praneshgo Sep 4, 2025
5ee309e
[MLAS] Add 8-bit weights ARM64 Gemm implementation (#25110)
hariharans29 Sep 4, 2025
157df9c
[NV TensorRT RTX] Handle unsupported data types (#25953)
ishwar-raut1 Sep 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Compile API: output model and initializer stream write functions (#25455
)

### Description
- Adds `ModelCompilationOptions_SetOutputModelWriteFunc` to the compile
API to allow writing the output model ONNX bytes to a user-provided
write function (i.e., for streaming).
- Adds `ModelCompilationOptions_SetOutputModelHandleInitializerFunc` to
the compile API to allow the user to write individual initializers to
some destination. Also allows specifying if an initializer should be
embedded within the ONNX model or written to a custom file.
- Adds C++, Python, and C# bindings for the new APIs.

A follow-up PR adds a write function for EPContext node binary data:
#25471

### Example
`ModelCompilationOptions_SetOutputModelWriteFunc`:
https://github.com/microsoft/onnxruntime/blob/c62ed23c328cbbfefd3083c1f7a6ced604772c19/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc#L2075-L2131

`ModelCompilationOptions_SetOutputModelHandleInitializerFunc`:

https://github.com/microsoft/onnxruntime/blob/c62ed23c328cbbfefd3083c1f7a6ced604772c19/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc#L2160-L2292

### Motivation and Context
Add output streaming capabilities when saving compiled models.
  • Loading branch information
adrianlizarraga authored and tianleiwu committed Sep 4, 2025
commit 2d36f04b3e23e97e6770a3dbd174ef9c45c53ca0
372 changes: 349 additions & 23 deletions csharp/src/Microsoft.ML.OnnxRuntime/CompileModel.shared.cs

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ public struct OrtCompileApi
public IntPtr ModelCompilationOptions_SetFlags;
public IntPtr ModelCompilationOptions_SetEpContextBinaryInformation;
public IntPtr ModelCompilationOptions_SetGraphOptimizationLevel;
public IntPtr ModelCompilationOptions_SetOutputModelWriteFunc;
public IntPtr ModelCompilationOptions_SetOutputModelGetInitializerLocationFunc;
}

internal class NativeMethods
Expand Down Expand Up @@ -118,6 +120,22 @@ public DOrtModelCompilationOptions_SetEpContextBinaryInformation
public DOrtModelCompilationOptions_SetGraphOptimizationLevel
OrtModelCompilationOptions_SetGraphOptimizationLevel;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtModelCompilationOptions_SetOutputModelWriteFunc(
IntPtr /* OrtModelCompilationOptions* */ options,
IntPtr /* DOrtWriteBufferDelegate */ writeFunc,
IntPtr /* void* */ state);
public DOrtModelCompilationOptions_SetOutputModelWriteFunc
OrtModelCompilationOptions_SetOutputModelWriteFunc;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc(
IntPtr /* OrtModelCompilationOptions* */ options,
IntPtr /* DOrtHandleInitializerDataDelegate */ handleInitializerFunc,
IntPtr /* void* */ state);
public DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc
OrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc;

internal NativeMethods(OnnxRuntime.NativeMethods.DOrtGetCompileApi getCompileApi)
{

Expand Down Expand Up @@ -188,6 +206,17 @@ internal NativeMethods(OnnxRuntime.NativeMethods.DOrtGetCompileApi getCompileApi
_compileApi.ModelCompilationOptions_SetGraphOptimizationLevel,
typeof(DOrtModelCompilationOptions_SetGraphOptimizationLevel));

OrtModelCompilationOptions_SetOutputModelWriteFunc =
(DOrtModelCompilationOptions_SetOutputModelWriteFunc)Marshal.GetDelegateForFunctionPointer(
_compileApi.ModelCompilationOptions_SetOutputModelWriteFunc,
typeof(DOrtModelCompilationOptions_SetOutputModelWriteFunc));

OrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc =
(DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc)Marshal.
GetDelegateForFunctionPointer(
_compileApi.ModelCompilationOptions_SetOutputModelGetInitializerLocationFunc,
typeof(DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc));

}
}
}
91 changes: 91 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ public struct OrtApi

public IntPtr Graph_GetModelMetadata;
public IntPtr GetModelCompatibilityForEpDevices;
public IntPtr CreateExternalInitializerInfo;
}

internal static class NativeMethods
Expand Down Expand Up @@ -787,9 +788,35 @@ static NativeMethods()
api_.SessionOptionsSetEpSelectionPolicyDelegate,
typeof(DSessionOptionsSetEpSelectionPolicyDelegate));

OrtReleaseExternalInitializerInfo =
(DOrtReleaseExternalInitializerInfo)Marshal.GetDelegateForFunctionPointer(
api_.ReleaseExternalInitializerInfo,
typeof(DOrtReleaseExternalInitializerInfo));

OrtExternalInitializerInfo_GetFilePath =
(DOrtExternalInitializerInfo_GetFilePath)Marshal.GetDelegateForFunctionPointer(
api_.ExternalInitializerInfo_GetFilePath,
typeof(DOrtExternalInitializerInfo_GetFilePath));

OrtExternalInitializerInfo_GetFileOffset =
(DOrtExternalInitializerInfo_GetFileOffset)Marshal.GetDelegateForFunctionPointer(
api_.ExternalInitializerInfo_GetFileOffset,
typeof(DOrtExternalInitializerInfo_GetFileOffset));

OrtExternalInitializerInfo_GetByteSize =
(DOrtExternalInitializerInfo_GetByteSize)Marshal.GetDelegateForFunctionPointer(
api_.ExternalInitializerInfo_GetByteSize,
typeof(DOrtExternalInitializerInfo_GetByteSize));

OrtGetModelCompatibilityForEpDevices = (DOrtGetModelCompatibilityForEpDevices)Marshal.GetDelegateForFunctionPointer(
api_.GetModelCompatibilityForEpDevices,
typeof(DOrtGetModelCompatibilityForEpDevices));

OrtCreateExternalInitializerInfo =
(DOrtCreateExternalInitializerInfo)Marshal.GetDelegateForFunctionPointer(
api_.CreateExternalInitializerInfo,
typeof(DOrtCreateExternalInitializerInfo));

}

internal class NativeLib
Expand Down Expand Up @@ -2382,6 +2409,70 @@ out IntPtr lora_adapter
public delegate ref CompileApi.OrtCompileApi DOrtGetCompileApi();
#endif
public static DOrtGetCompileApi OrtGetCompileApi;

/// <summary>
/// Delegate called by ORT to write a buffer (ONNX model bytes) to a custom destination (e.g., file or stream).
/// </summary>
/// <param name="state">State that was provided in when the delegate was registered.</param>
/// <param name="buffer">The buffer to write.</param>
/// <param name="bufferNumBytes">The size of the buffer in bytes.</param>
/// <returns>OrtStatus*</returns>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtWriteBufferToDestinationDelegate(
IntPtr /* void* */ state,
IntPtr /* const void* */ buffer,
UIntPtr /* size_t */ bufferNumBytes
);

/// <summary>
/// Function called by ORT to allow user to specify how an initializer should be saved while compiling
/// a model, that is, either written to an external file or stored within the model. ORT calls this function
/// for every initializer.
/// </summary>
/// <param name="state">State that was provided when the delegate was registered.</param>
/// <param name="initializerName">The initializer's name.</param>
/// <param name="initializerValue">The OrtValue containing the initializer's data, type, and shape</param>
/// <param name="externalInfo">The original initializer's location in an external file, or NULL.</param>
/// <param name="newExternalInfo">Output parameter set to a new OrtExternalInitializerInfo instance
/// indicating the location where the function implementation stored the initializer data. If the function
/// implementation sets `newExternalInfo` to NULL, ORT stores the initializer within the generated model.</param>
/// <returns></returns>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtGetInitializerLocationDelegate(
IntPtr /* void* */ state,
IntPtr /* const char* */ initializerName,
IntPtr /* const OrtValue* */ initializerValue,
IntPtr /* const OrtExternalInitializerInfo* */ externalInfo,
out IntPtr /* OrtExternalInitializerInfo** */ newExternalInfo
);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate void DOrtReleaseExternalInitializerInfo(IntPtr /* OrtExternalInitializerInfo* */ info);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtCreateExternalInitializerInfo(
byte[] /* const ORTCHAR_T* */ filePath,
long /* int64_t */ fileOffset,
UIntPtr /* size_t */ byteSize,
out IntPtr /* OrtExternalInitializerInfo** */ outInfo);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* const ORTCHAR_T* */ DOrtExternalInitializerInfo_GetFilePath(
IntPtr /* const OrtExternalInitializerInfo* */ info);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate long /* int64_t */ DOrtExternalInitializerInfo_GetFileOffset(
IntPtr /* const OrtExternalInitializerInfo* */ info);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate UIntPtr /* size_t */ DOrtExternalInitializerInfo_GetByteSize(
IntPtr /* const OrtExternalInitializerInfo* */ info);

public static DOrtReleaseExternalInitializerInfo OrtReleaseExternalInitializerInfo;
public static DOrtCreateExternalInitializerInfo OrtCreateExternalInitializerInfo;
public static DOrtExternalInitializerInfo_GetFilePath OrtExternalInitializerInfo_GetFilePath;
public static DOrtExternalInitializerInfo_GetFileOffset OrtExternalInitializerInfo_GetFileOffset;
public static DOrtExternalInitializerInfo_GetByteSize OrtExternalInitializerInfo_GetByteSize;
#endregion

#region Auto EP API related
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,45 @@ internal static byte[] GetPlatformSerializedString(string str)
else
return StringToZeroTerminatedUtf8(str);
}

/// <summary>
/// Converts a null-terminated path string that is pointed to by the given IntPtr handle into
/// a C# UTF-16 string.
/// </summary>
/// <remarks>A path string on Windows is utf-16, but utf-8 on other operating systems.</remarks>
/// <param name="strPtr"></param>
/// <returns></returns>
internal static string StringFromNativePathString(IntPtr strPtr)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
if (strPtr == IntPtr.Zero)
{
return string.Empty;
}

// Get length of utf16 string by checking for two 0 bytes in a row.
int length = 0;
while (Marshal.ReadInt16(strPtr, length * 2) != 0)
{
length += 1;
}

if (length == 0)
{
return string.Empty;
}

unsafe
{
return System.Text.Encoding.Unicode.GetString((byte*)strPtr, length * 2);
}
}
else
{
return StringFromNativeUtf8(strPtr);
}
}
}

// Guards an array of disposable objects on stack and disposes them in reverse order
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.


namespace Microsoft.ML.OnnxRuntime
{
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;

/// <summary>
/// Class to that stores information about the file location where an "external" initializer is stored.
/// </summary>
/// <see cref="OrtModelCompilationOptions.HandleInitializerDelegate"/>
public class OrtExternalInitializerInfo : SafeHandle, IReadOnlyExternalInitializerInfo
{
// Set to false when constructed with an externally managed constant handle owned by ORT.
private readonly bool _ownsHandle = true;

/// <summary>
/// Create a new OrtExternalInitializerInfo instance.
/// </summary>
/// <param name="filePath">The path to the file that stores the initializer data.</param>
/// <param name="fileOffset">The byte offset in the file where the data is stored.</param>
/// <param name="byteSize">The size of the data (in bytes) within the file.</param>
public OrtExternalInitializerInfo(string filePath, long fileOffset, long byteSize)
: base(IntPtr.Zero, ownsHandle: true)
{
var platformFilePath = NativeOnnxValueHelper.GetPlatformSerializedString(filePath);
NativeApiStatus.VerifySuccess(
NativeMethods.OrtCreateExternalInitializerInfo(platformFilePath, fileOffset, (UIntPtr)byteSize, out handle));
_ownsHandle = true;
}

/// <summary>
/// Create a new OrtExternalInitializerInfo instance from an existing native OrtExternalInitializerInfo handle.
/// </summary>
/// <param name="constHandle">Native OrtExternalInitializerInfo handle.</param>
/// <param name="ownsHandle">True if the OrtExternalInitializerInfo instance owns the native handle.
/// Defaults to false.</param>
internal OrtExternalInitializerInfo(IntPtr constHandle, bool ownsHandle = false)
: base(IntPtr.Zero, ownsHandle)
{
Debug.Assert(constHandle != IntPtr.Zero);
SetHandle(constHandle);
_ownsHandle = ownsHandle;
}

/// <summary>
/// Get the file path to the file that store's the initializer's data.
/// </summary>
/// <remarks>
/// The path is relative to the filesystem directory where the ONNX model was stored.
/// </remarks>
/// <returns>The file path.</returns>
public string GetFilePath()
{
IntPtr filePathPtr = NativeMethods.OrtExternalInitializerInfo_GetFilePath(handle);
if (filePathPtr == IntPtr.Zero)
{
return string.Empty;
}

return NativeOnnxValueHelper.StringFromNativePathString(filePathPtr);
}

/// <summary>
/// Get the byte offset within the file where the initializer's data is stored.
/// </summary>
/// <returns>The file offset location.</returns>
public long GetFileOffset()
{
return NativeMethods.OrtExternalInitializerInfo_GetFileOffset(handle);
}

/// <summary>
/// Get the size in bytes of the initializer's data within the file.
/// </summary>
/// <returns>The size in bytes of the initializer data.</returns>
public long GetByteSize()
{
UIntPtr byteSize = NativeMethods.OrtExternalInitializerInfo_GetByteSize(handle);
return checked((long)byteSize);
}

/// <summary>
/// Indicates whether the native handle is invalid.
/// </summary>
public override bool IsInvalid { get { return handle == IntPtr.Zero; } }

/// <summary>
/// Release the native instance of OrtExternalInitializerInfo if we own it.
/// </summary>
/// <returns>true on success and false on error.</returns>
protected override bool ReleaseHandle()
{
if (!_ownsHandle)
{
// Return false to indicate an error.
// ReleaseHandle() should not be called on a const handle that this class does not own.
return false;
}

NativeMethods.OrtReleaseExternalInitializerInfo(handle);
handle = IntPtr.Zero;
return true;
}
}

/// <summary>
/// Interface for all readonly methods implemented by OrtExternalInitializerInfo.
/// </summary>
public interface IReadOnlyExternalInitializerInfo
{
/// <summary>
/// Get the file path to the file that store's the initializer's data.
/// </summary>
/// <remarks>
/// The path is relative to the filesystem directory where the ONNX model was stored.
/// </remarks>
/// <returns>The file path.</returns>
string GetFilePath();

/// <summary>
/// Get the byte offset within the file where the initializer's data is stored.
/// </summary>
/// <returns>The file offset location.</returns>
long GetFileOffset();

/// <summary>
/// Get the size in bytes of the initializer's data within the file.
/// </summary>
/// <returns>The size in bytes of the initializer data.</returns>
long GetByteSize();
}
}
Loading