Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b3664f8
[CUDA] Support SwiGlu in MoE and qMoE (#25530)
tianleiwu Jul 28, 2025
a8e1186
[CUDA] BF16 MoE and qMoE (#25572)
tianleiwu Jul 31, 2025
a9f74a0
Add CUDA implementation of GatherBlockQuantized operator (#25575)
xiaomsft Aug 1, 2025
d83904b
Add support for QMoE in CPU (#25558)
apsonawane Aug 2, 2025
8654241
Update MoE and qMoE spec (#25619)
tianleiwu Aug 2, 2025
6ca2047
[CPU] Improve QMoE kernel (#25822)
apsonawane Aug 26, 2025
dd32daf
Fix MoE CPP tests (#25877)
apsonawane Aug 28, 2025
581b8e7
Add custom ops library_path to EP metadata (#25830)
psakhamoori Aug 29, 2025
a9308a1
[Fix] illegal memory access in GetInputIndices with optional inputs (…
mingyueliuh Aug 29, 2025
6c7f150
[TRT RTX EP] Add sync method (#25898)
gedoensmax Sep 2, 2025
535fcc6
[TRT RTX EP] Memory map the engine buffer (#25909)
gedoensmax Sep 3, 2025
1f4e581
[TRT RTX EP] Add support for RTX runtime caches (#25917)
gedoensmax Sep 3, 2025
9732a3e
Compile API: disable optimizations by default (#25474)
adrianlizarraga Sep 3, 2025
df25f45
[CXX] Introduce C++ API for new C entry points (#25897)
yuslepukhin Sep 3, 2025
8f587b1
Migrate model tests to ONNX Model ZOO only (#25888)
kobby-kobbs Sep 3, 2025
ab71f1e
Remove std::string::data() non-const usage from public headers (#25943)
yuslepukhin Sep 4, 2025
2d36f04
Compile API: output model and initializer stream write functions (#25…
adrianlizarraga Sep 4, 2025
c5096d9
[TRT RTX EP] Fixing the stream parameter in CopyTensors API and passi…
praneshgo Sep 4, 2025
5ee309e
[MLAS] Add 8-bit weights ARM64 Gemm implementation (#25110)
hariharans29 Sep 4, 2025
157df9c
[NV TensorRT RTX] Handle unsupported data types (#25953)
ishwar-raut1 Sep 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -603,10 +603,6 @@ if(NOT (onnx_FOUND OR ONNX_FOUND)) # building ONNX from source
endif()
endif()

if (onnxruntime_RUN_ONNX_TESTS)
add_definitions(-DORT_RUN_EXTERNAL_ONNX_TESTS)
endif()

if(onnxruntime_ENABLE_DLPACK)
message(STATUS "dlpack is enabled.")

Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/eltwise_kernel_neon.h
${MLAS_SRC_DIR}/eltwise_kernel_neon.cpp
${MLAS_SRC_DIR}/eltwise_kernel_neon_fp16.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
)

set(mlas_platform_preprocess_srcs
Expand Down Expand Up @@ -429,12 +430,16 @@ else()
${MLAS_SRC_DIR}/softmax_kernel_neon.cpp
${MLAS_SRC_DIR}/eltwise_kernel_neon.h
${MLAS_SRC_DIR}/eltwise_kernel_neon.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
)
if (onnxruntime_USE_KLEIDIAI)
setup_kleidiai()
endif()
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8.cpp
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+dotprod")
set_source_files_properties(${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon_int8_i8mm.cpp
PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+i8mm ")

if (NOT APPLE)
set(mlas_platform_srcs
${mlas_platform_srcs}
Expand Down
397 changes: 376 additions & 21 deletions csharp/src/Microsoft.ML.OnnxRuntime/CompileModel.shared.cs

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ public struct OrtCompileApi
public IntPtr ModelCompilationOptions_SetEpContextEmbedMode;
public IntPtr CompileModel;
public IntPtr ModelCompilationOptions_SetFlags;
public IntPtr ModelCompilationOptions_SetEpContextBinaryInformation;
public IntPtr ModelCompilationOptions_SetGraphOptimizationLevel;
public IntPtr ModelCompilationOptions_SetOutputModelWriteFunc;
public IntPtr ModelCompilationOptions_SetOutputModelGetInitializerLocationFunc;
}

internal class NativeMethods
Expand Down Expand Up @@ -101,6 +105,37 @@ public DOrtModelCompilationOptions_SetOutputModelExternalInitializersFile
uint flags);
public DOrtModelCompilationOptions_SetFlags OrtModelCompilationOptions_SetFlags;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtModelCompilationOptions_SetEpContextBinaryInformation(
IntPtr /* OrtModelCompilationOptions* */ options,
byte[] /* const ORTCHAR_T* */ outputDirectory,
byte[] /* const ORTCHAR_T* */ modelName);
public DOrtModelCompilationOptions_SetEpContextBinaryInformation
OrtModelCompilationOptions_SetEpContextBinaryInformation;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtModelCompilationOptions_SetGraphOptimizationLevel(
IntPtr /* OrtModelCompilationOptions* */ options,
GraphOptimizationLevel graphOptimizationLevel);
public DOrtModelCompilationOptions_SetGraphOptimizationLevel
OrtModelCompilationOptions_SetGraphOptimizationLevel;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtModelCompilationOptions_SetOutputModelWriteFunc(
IntPtr /* OrtModelCompilationOptions* */ options,
IntPtr /* DOrtWriteBufferDelegate */ writeFunc,
IntPtr /* void* */ state);
public DOrtModelCompilationOptions_SetOutputModelWriteFunc
OrtModelCompilationOptions_SetOutputModelWriteFunc;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc(
IntPtr /* OrtModelCompilationOptions* */ options,
IntPtr /* DOrtHandleInitializerDataDelegate */ handleInitializerFunc,
IntPtr /* void* */ state);
public DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc
OrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc;

internal NativeMethods(OnnxRuntime.NativeMethods.DOrtGetCompileApi getCompileApi)
{

Expand Down Expand Up @@ -161,6 +196,27 @@ internal NativeMethods(OnnxRuntime.NativeMethods.DOrtGetCompileApi getCompileApi
_compileApi.ModelCompilationOptions_SetFlags,
typeof(DOrtModelCompilationOptions_SetFlags));

OrtModelCompilationOptions_SetEpContextBinaryInformation =
(DOrtModelCompilationOptions_SetEpContextBinaryInformation)Marshal.GetDelegateForFunctionPointer(
_compileApi.ModelCompilationOptions_SetEpContextBinaryInformation,
typeof(DOrtModelCompilationOptions_SetEpContextBinaryInformation));

OrtModelCompilationOptions_SetGraphOptimizationLevel =
(DOrtModelCompilationOptions_SetGraphOptimizationLevel)Marshal.GetDelegateForFunctionPointer(
_compileApi.ModelCompilationOptions_SetGraphOptimizationLevel,
typeof(DOrtModelCompilationOptions_SetGraphOptimizationLevel));

OrtModelCompilationOptions_SetOutputModelWriteFunc =
(DOrtModelCompilationOptions_SetOutputModelWriteFunc)Marshal.GetDelegateForFunctionPointer(
_compileApi.ModelCompilationOptions_SetOutputModelWriteFunc,
typeof(DOrtModelCompilationOptions_SetOutputModelWriteFunc));

OrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc =
(DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc)Marshal.
GetDelegateForFunctionPointer(
_compileApi.ModelCompilationOptions_SetOutputModelGetInitializerLocationFunc,
typeof(DOrtModelCompilationOptions_SetOutputModelGetInitializerLocationFunc));

}
}
}
91 changes: 91 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ public struct OrtApi

public IntPtr Graph_GetModelMetadata;
public IntPtr GetModelCompatibilityForEpDevices;
public IntPtr CreateExternalInitializerInfo;
}

internal static class NativeMethods
Expand Down Expand Up @@ -787,9 +788,35 @@ static NativeMethods()
api_.SessionOptionsSetEpSelectionPolicyDelegate,
typeof(DSessionOptionsSetEpSelectionPolicyDelegate));

OrtReleaseExternalInitializerInfo =
(DOrtReleaseExternalInitializerInfo)Marshal.GetDelegateForFunctionPointer(
api_.ReleaseExternalInitializerInfo,
typeof(DOrtReleaseExternalInitializerInfo));

OrtExternalInitializerInfo_GetFilePath =
(DOrtExternalInitializerInfo_GetFilePath)Marshal.GetDelegateForFunctionPointer(
api_.ExternalInitializerInfo_GetFilePath,
typeof(DOrtExternalInitializerInfo_GetFilePath));

OrtExternalInitializerInfo_GetFileOffset =
(DOrtExternalInitializerInfo_GetFileOffset)Marshal.GetDelegateForFunctionPointer(
api_.ExternalInitializerInfo_GetFileOffset,
typeof(DOrtExternalInitializerInfo_GetFileOffset));

OrtExternalInitializerInfo_GetByteSize =
(DOrtExternalInitializerInfo_GetByteSize)Marshal.GetDelegateForFunctionPointer(
api_.ExternalInitializerInfo_GetByteSize,
typeof(DOrtExternalInitializerInfo_GetByteSize));

OrtGetModelCompatibilityForEpDevices = (DOrtGetModelCompatibilityForEpDevices)Marshal.GetDelegateForFunctionPointer(
api_.GetModelCompatibilityForEpDevices,
typeof(DOrtGetModelCompatibilityForEpDevices));

OrtCreateExternalInitializerInfo =
(DOrtCreateExternalInitializerInfo)Marshal.GetDelegateForFunctionPointer(
api_.CreateExternalInitializerInfo,
typeof(DOrtCreateExternalInitializerInfo));

}

internal class NativeLib
Expand Down Expand Up @@ -2382,6 +2409,70 @@ out IntPtr lora_adapter
public delegate ref CompileApi.OrtCompileApi DOrtGetCompileApi();
#endif
public static DOrtGetCompileApi OrtGetCompileApi;

/// <summary>
/// Delegate called by ORT to write a buffer (ONNX model bytes) to a custom destination (e.g., file or stream).
/// </summary>
/// <param name="state">State that was provided in when the delegate was registered.</param>
/// <param name="buffer">The buffer to write.</param>
/// <param name="bufferNumBytes">The size of the buffer in bytes.</param>
/// <returns>OrtStatus*</returns>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtWriteBufferToDestinationDelegate(
IntPtr /* void* */ state,
IntPtr /* const void* */ buffer,
UIntPtr /* size_t */ bufferNumBytes
);

/// <summary>
/// Function called by ORT to allow user to specify how an initializer should be saved while compiling
/// a model, that is, either written to an external file or stored within the model. ORT calls this function
/// for every initializer.
/// </summary>
/// <param name="state">State that was provided when the delegate was registered.</param>
/// <param name="initializerName">The initializer's name.</param>
/// <param name="initializerValue">The OrtValue containing the initializer's data, type, and shape</param>
/// <param name="externalInfo">The original initializer's location in an external file, or NULL.</param>
/// <param name="newExternalInfo">Output parameter set to a new OrtExternalInitializerInfo instance
/// indicating the location where the function implementation stored the initializer data. If the function
/// implementation sets `newExternalInfo` to NULL, ORT stores the initializer within the generated model.</param>
/// <returns></returns>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtGetInitializerLocationDelegate(
IntPtr /* void* */ state,
IntPtr /* const char* */ initializerName,
IntPtr /* const OrtValue* */ initializerValue,
IntPtr /* const OrtExternalInitializerInfo* */ externalInfo,
out IntPtr /* OrtExternalInitializerInfo** */ newExternalInfo
);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate void DOrtReleaseExternalInitializerInfo(IntPtr /* OrtExternalInitializerInfo* */ info);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */ DOrtCreateExternalInitializerInfo(
byte[] /* const ORTCHAR_T* */ filePath,
long /* int64_t */ fileOffset,
UIntPtr /* size_t */ byteSize,
out IntPtr /* OrtExternalInitializerInfo** */ outInfo);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* const ORTCHAR_T* */ DOrtExternalInitializerInfo_GetFilePath(
IntPtr /* const OrtExternalInitializerInfo* */ info);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate long /* int64_t */ DOrtExternalInitializerInfo_GetFileOffset(
IntPtr /* const OrtExternalInitializerInfo* */ info);

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate UIntPtr /* size_t */ DOrtExternalInitializerInfo_GetByteSize(
IntPtr /* const OrtExternalInitializerInfo* */ info);

public static DOrtReleaseExternalInitializerInfo OrtReleaseExternalInitializerInfo;
public static DOrtCreateExternalInitializerInfo OrtCreateExternalInitializerInfo;
public static DOrtExternalInitializerInfo_GetFilePath OrtExternalInitializerInfo_GetFilePath;
public static DOrtExternalInitializerInfo_GetFileOffset OrtExternalInitializerInfo_GetFileOffset;
public static DOrtExternalInitializerInfo_GetByteSize OrtExternalInitializerInfo_GetByteSize;
#endregion

#region Auto EP API related
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,45 @@ internal static byte[] GetPlatformSerializedString(string str)
else
return StringToZeroTerminatedUtf8(str);
}

/// <summary>
/// Converts a null-terminated path string that is pointed to by the given IntPtr handle into
/// a C# UTF-16 string.
/// </summary>
/// <remarks>A path string on Windows is utf-16, but utf-8 on other operating systems.</remarks>
/// <param name="strPtr"></param>
/// <returns></returns>
internal static string StringFromNativePathString(IntPtr strPtr)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
if (strPtr == IntPtr.Zero)
{
return string.Empty;
}

// Get length of utf16 string by checking for two 0 bytes in a row.
int length = 0;
while (Marshal.ReadInt16(strPtr, length * 2) != 0)
{
length += 1;
}

if (length == 0)
{
return string.Empty;
}

unsafe
{
return System.Text.Encoding.Unicode.GetString((byte*)strPtr, length * 2);
}
}
else
{
return StringFromNativeUtf8(strPtr);
}
}
}

// Guards an array of disposable objects on stack and disposes them in reverse order
Expand Down
Loading
Loading