diff --git a/ggml/src/ggml-openvino/openvino/op/mulmat.cpp b/ggml/src/ggml-openvino/openvino/op/mulmat.cpp index 57fd476f0abaa..9148a27517b92 100644 --- a/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +++ b/ggml/src/ggml-openvino/openvino/op/mulmat.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "../node_context.hpp" @@ -29,8 +30,13 @@ OutputVector translate_mulmat(const NodeContext& context) { ov::Output res; ov::Output B = context.get_input(0); ov::Output A = context.get_input(1); - if (context.get_input_type(0) != context.get_input_type(1)) { + + bool convert_out_type = false; + if (ov::op::util::is_constant(B.get_node()) && context.get_input_type(0) != context.get_input_type(1)) { B = std::make_shared(context.get_input(0), context.get_input_type(1)); + } else if (context.get_input_type(0) != context.get_input_type(1)) { + A = std::make_shared(context.get_input(1), context.get_input_type(0)); + convert_out_type = true; } auto B_shape = context.get_input_shape(0).to_shape(); @@ -65,7 +71,12 @@ OutputVector translate_mulmat(const NodeContext& context) { A = Z; } - res = std::make_shared(A, B, false, true); + if (convert_out_type) { + auto result_lp = std::make_shared(A, B, false, true); + res = std::make_shared(result_lp, context.get_output_type(0)); + } else { + res = std::make_shared(A, B, false, true); + } return rename_outputs_with_suffix({res}, context.get_name()); } diff --git a/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.hpp b/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.hpp new file mode 100644 index 0000000000000..163422bf339f7 --- /dev/null +++ b/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "mark_decompression_convert_constant_folding.hpp" +#include "openvino/pass/matcher_pass.hpp" +#include "openvino/core/visibility.hpp" + +#ifdef OPENVINO_STATIC_LIBRARY +# define TRANSFORMATIONS_API +#else +# ifdef IMPLEMENT_OPENVINO_API +# define TRANSFORMATIONS_API OPENVINO_CORE_EXPORTS +# else +# define TRANSFORMATIONS_API OPENVINO_CORE_IMPORTS +# endif // IMPLEMENT_OPENVINO_API +#endif // OPENVINO_STATIC_LIBRARY + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API MarkCompressedFloatConstants; + +} // namespace pass +} // namespace ov + +class ov::pass::MarkCompressedFloatConstants : public MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("MarkCompressedFloatConstants"); + MarkCompressedFloatConstants(); +}; diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index c4fe8c88ee22a..ed7db614148fa 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -28,6 +28,7 @@ #include "ggml-openvino/openvino/utils.hpp" #include "input_model.hpp" #include "pass/fuse_to_sdpa.hpp" +#include "pass/mark_decompression_convert_constant_folding.hpp" namespace ov { namespace frontend { @@ -259,6 +260,8 @@ std::shared_ptr TranslateSession::apply_transformations(std::shared_ptr(); + manager.register_pass(); if (!ggml_model_decoder->is_static()) { const auto kv_param_res_names = ggml_model_decoder->get_kv_param_res_names(); @@ -267,7 +270,7 @@ std::shared_ptr TranslateSession::apply_transformations(std::shared_ptr(); + manager.register_pass(); manager.run_passes(model); } auto preprocessor = ov::preprocess::PrePostProcessor(model);