Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions ggml/src/ggml-openvino/openvino/op/mulmat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <openvino/op/slice.hpp>
#include <openvino/op/transpose.hpp>
#include <openvino/op/unsqueeze.hpp>
#include <openvino/op/util/op_types.hpp>
#include <vector>

#include "../node_context.hpp"
Expand All @@ -29,8 +30,13 @@ OutputVector translate_mulmat(const NodeContext& context) {
ov::Output<Node> res;
ov::Output<ov::Node> B = context.get_input(0);
ov::Output<ov::Node> A = context.get_input(1);
if (context.get_input_type(0) != context.get_input_type(1)) {

bool convert_out_type = false;
if (ov::op::util::is_constant(B.get_node()) && context.get_input_type(0) != context.get_input_type(1)) {
B = std::make_shared<ov::op::v0::Convert>(context.get_input(0), context.get_input_type(1));
} else if (context.get_input_type(0) != context.get_input_type(1)) {
A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
convert_out_type = true;
}

auto B_shape = context.get_input_shape(0).to_shape();
Expand Down Expand Up @@ -65,7 +71,12 @@ OutputVector translate_mulmat(const NodeContext& context) {
A = Z;
}

res = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
if (convert_out_type) {
auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
} else {
res = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
}

return rename_outputs_with_suffix({res}, context.get_name());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once

#include "mark_decompression_convert_constant_folding.hpp"
#include "openvino/pass/matcher_pass.hpp"
#include "openvino/core/visibility.hpp"

#ifdef OPENVINO_STATIC_LIBRARY
# define TRANSFORMATIONS_API
#else
# ifdef IMPLEMENT_OPENVINO_API
# define TRANSFORMATIONS_API OPENVINO_CORE_EXPORTS
# else
# define TRANSFORMATIONS_API OPENVINO_CORE_IMPORTS
# endif // IMPLEMENT_OPENVINO_API
#endif // OPENVINO_STATIC_LIBRARY

namespace ov {
namespace pass {

class TRANSFORMATIONS_API MarkCompressedFloatConstants;

} // namespace pass
} // namespace ov

class ov::pass::MarkCompressedFloatConstants : public MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("MarkCompressedFloatConstants");
MarkCompressedFloatConstants();
};
5 changes: 4 additions & 1 deletion ggml/src/ggml-openvino/openvino/translate_session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "ggml-openvino/openvino/utils.hpp"
#include "input_model.hpp"
#include "pass/fuse_to_sdpa.hpp"
#include "pass/mark_decompression_convert_constant_folding.hpp"

namespace ov {
namespace frontend {
Expand Down Expand Up @@ -259,6 +260,8 @@ std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<M
{
ov::pass::Manager manager;
manager.set_per_pass_validation(true);
manager.register_pass<ov::pass::MarkCompressedFloatConstants>();
manager.register_pass<ov::pass::ConstantFolding>();

if (!ggml_model_decoder->is_static()) {
const auto kv_param_res_names = ggml_model_decoder->get_kv_param_res_names();
Expand All @@ -267,7 +270,7 @@ std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<M
}

// SDPA is even worse on performance
// manager.register_pass<pass::FuseToSDPA>();
manager.register_pass<pass::FuseToSDPA>();
manager.run_passes(model);
}
auto preprocessor = ov::preprocess::PrePostProcessor(model);
Expand Down
Loading