From 70a3ac0846a8f9912978f514cc257f7f177c5c31 Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Fri, 12 Dec 2025 19:20:23 +0800
Subject: [PATCH 01/33] add custom pyop IR

---
 paddle/fluid/framework/CMakeLists.txt         |   2 +-
 .../fluid/framework/custom_operator_utils.h   |  39 ++++-
 paddle/fluid/framework/custom_pyoperator.cc   |  66 ++++++++
 paddle/fluid/framework/custom_pyoperator.h    |  37 +++++
 .../pir/dialect/operator/ir/op_dialect.cc     | 155 ++++++++++++++++++
 .../pir/dialect/operator/ir/op_dialect.h      |  32 ++++
 paddle/phi/api/ext/op_meta_info.h             |  26 ++-
 paddle/phi/api/lib/op_meta_info.cc            |  35 ++++
 8 files changed, 387 insertions(+), 5 deletions(-)
 create mode 100644 paddle/fluid/framework/custom_pyoperator.cc
 create mode 100644 paddle/fluid/framework/custom_pyoperator.h

diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 4153cc1673f959..039a1dacbe6c48 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -577,7 +577,7 @@ configure_file(commit.h.in commit.h)
 
 cc_library(
   custom_operator
-  SRCS custom_operator.cc
+  SRCS custom_operator.cc custom_pyoperator.cc
   DEPS tensor
        attribute
        op_registry
diff --git a/paddle/fluid/framework/custom_operator_utils.h b/paddle/fluid/framework/custom_operator_utils.h
index e17b0f2dc2bfcb..49f8c6c0405864 100644
--- a/paddle/fluid/framework/custom_operator_utils.h
+++ b/paddle/fluid/framework/custom_operator_utils.h
@@ -26,9 +26,10 @@ limitations under the License. */
 
 namespace paddle {
 namespace framework {
-constexpr char kCustomDialectPrefix[] = "custom_op.";  // NOLINT
-constexpr char kGradSuffix[] = "_grad";                // NOLINT
-constexpr char kDoubleGradSuffix[] = "_grad_grad";     // NOLINT
+constexpr char kCustomDialectPrefix[] = "custom_op.";      // NOLINT
+constexpr char kCustomPyDialectPrefix[] = "custom_pyop.";  // NOLINT
+constexpr char kGradSuffix[] = "_grad";                    // NOLINT
+constexpr char kDoubleGradSuffix[] = "_grad_grad";         // NOLINT
 
 namespace detail {
 
@@ -157,6 +158,38 @@ inline static const OpMetaInfo& GetOpInfoByPirName(
   }
 }
 
+inline static const OpMetaInfo& GetCustomPyOpInfoByPirName(
+    const std::string& pir_op_name) {
+  auto custom_name = pir_op_name.substr(strlen(kCustomPyDialectPrefix));
+  int pos = custom_name.length();
+
+  if (custom_name[pos - 1] == '_') {
+    custom_name = custom_name.substr(0, pos - 1);
+  }
+
+  pos = custom_name.length();
+  if (custom_name.find(kDoubleGradSuffix) != custom_name.npos) {
+    pos = custom_name.find(kDoubleGradSuffix);
+  } else if (custom_name.find(kGradSuffix) != custom_name.npos) {
+    pos = custom_name.find(kGradSuffix);
+  }
+  auto custom_name_prefix = custom_name.substr(0, pos);
+  auto map_iter =
+      paddle::OpMetaInfoMap::Instance().GetMap().find(custom_name_prefix);
+  if (map_iter == paddle::OpMetaInfoMap::Instance().GetMap().end()) {
+    PADDLE_THROW("The info of custom python op : " + custom_name +
+                 " is not exists!");
+  }
+  const auto& vec_op_meta = map_iter->second;
+  if (custom_name.find(kDoubleGradSuffix) != custom_name.npos) {
+    return vec_op_meta[2];
+  } else if (custom_name.find(kGradSuffix) != custom_name.npos) {
+    return vec_op_meta[1];
+  } else {
+    return vec_op_meta[0];
+  }
+}
+
 inline static bool HasGradOp(const std::string& fwd_pir_op_name) {
   auto custom_name = fwd_pir_op_name.substr(strlen(kCustomDialectPrefix));
   int pos = custom_name.length();
diff --git a/paddle/fluid/framework/custom_pyoperator.cc b/paddle/fluid/framework/custom_pyoperator.cc
new file mode 100644
index 00000000000000..d1cb3c6e82bdcf
--- /dev/null
+++ b/paddle/fluid/framework/custom_pyoperator.cc
@@ -0,0 +1,66 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <utility>  // for std::move
+
+#include "paddle/fluid/framework/custom_operator_utils.h"
+#include "paddle/fluid/framework/custom_pyoperator.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/phi/api/ext/op_meta_info.h"
+
+namespace paddle::framework {
+
+void RegisterCustomPyOp(
+    const std::string& op_name,
+    std::vector<std::string>&& op_inputs,
+    std::vector<std::string>&& op_outputs,
+    std::vector<std::string>&& op_attrs,
+    std::unordered_map<std::string, std::string>&& op_inplace_map,
+    WrapPythonFunction&& pyop_func,
+    WrapInferMetaPythonFunction&& pyop_func_infer_meta) {
+  ::paddle::OpMetaInfoBuilder op_meta_info_builder =
+      ::paddle::OpMetaInfoBuilder(std::string(op_name), 0);
+  op_meta_info_builder.Inputs(std::move(op_inputs))
+      .Outputs(std::move(op_outputs))
+      .Attrs(std::move(op_attrs))
+      .SetInplaceMap(std::move(op_inplace_map))
+      .SetPyCustomPyOpFunction(pyop_func)
+      .SetPyCustomPyOpInferMetaFunction(pyop_func_infer_meta);
+
+  const std::vector<paddle::OpMetaInfo>& op_meta_info_vector =
+      OpMetaInfoMap::Instance()[op_name];
+
+  PADDLE_ENFORCE_EQ(op_meta_info_vector.size(),
+                    1,
+                    common::errors::OutOfRange(
+                        "Current op_name(%s) must not be registered more "
+                        "than one, because it don't support gradient op."));
+
+  const auto& op_meta_info = op_meta_info_vector.back();
+
+  auto& inplace_map = OpMetaInfoHelper::GetInplaceMap(op_meta_info);
+  const auto postfix = inplace_map.empty() ? "" : "_";
+
+  ::pir::IrContext* ctx = ::pir::IrContext::Instance();
+  auto* custom_pyop_dialect =
+      ctx->GetOrRegisterDialect<paddle::dialect::CustomPyOpDialect>();
+
+  if (custom_pyop_dialect->HasRegistered(
+          paddle::framework::kCustomPyDialectPrefix + op_name + postfix)) {
+    return;
+  }
+  custom_pyop_dialect->RegisterCustomPyOp(op_meta_info);
+}
+
+}  // namespace paddle::framework
diff --git a/paddle/fluid/framework/custom_pyoperator.h b/paddle/fluid/framework/custom_pyoperator.h
new file mode 100644
index 00000000000000..84f3aced72df7f
--- /dev/null
+++ b/paddle/fluid/framework/custom_pyoperator.h
@@ -0,0 +1,37 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h"
+#include "paddle/phi/api/ext/op_meta_info.h"
+
+namespace paddle {
+namespace framework {
+
+void RegisterCustomPyOp(
+    const std::string& op_name,
+    std::vector<std::string>&& op_inputs,
+    std::vector<std::string>&& op_outputs,
+    std::vector<std::string>&& op_attrs,
+    std::unordered_map<std::string, std::string>&& op_inplace_map,
+    WrapPythonFunction&& func,
+    WrapInferMetaPythonFunction&& infer_meta);
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
index 78f01a78ad6bc6..696dffdf7c5c17 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
@@ -622,6 +622,86 @@ struct CustomOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
   CustomOpInfoInterfaceModel() : OpYamlInfoInterface::Concept(GetPirOpInfo) {}
 };
 
+struct CustomPyOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
+  static OpInfoTuple GetPirOpInfo(const std::string& pir_op_name) {
+    const auto& op_meta =
+        paddle::framework::detail::GetCustomPyOpInfoByPirName(pir_op_name);
+
+    // TODO(DrRyanHuang): we may support custom_pyop's grad op in the future
+    // const auto* grad_op_meta_ptr =
+    //     paddle::framework::detail::GetGradOpInfoByFwdPirName(pir_op_name);
+    std::vector<paddle::dialect::OpInputInfo> inputs_info;
+    std::vector<paddle::dialect::OpAttributeInfo> attributes_info;
+    std::vector<paddle::dialect::OpOutputInfo> outputs_info;
+    std::vector<std::string> param_names;
+
+    // translate input info
+    auto& op_input_names = OpMetaInfoHelper::GetInputs(op_meta);
+    for (const auto& input_name : op_input_names) {
+      param_names.push_back(input_name);
+      // Now, we only support dense tensor as input.
+      inputs_info.push_back(paddle::dialect::OpInputInfo{
+          input_name,
+          /*input_type=*/"paddle::dialect::DenseTensorType",
+          /*optional=*/false,
+          /*no_need_buffer=*/false,
+          /*is_mutable_attribute=*/false,
+          /*with_grad_semantic=*/false});
+    }
+
+    // translate attr info
+    auto& op_attrs = OpMetaInfoHelper::GetAttrs(op_meta);
+    for (const auto& op_attr : op_attrs) {
+      auto attr_name_and_type = paddle::ParseAttrStr(op_attr);
+      // CustomPyOp only has int64_t attr
+      const std::string& attr_name = attr_name_and_type[0];
+      const std::string& attr_type_str = attr_name_and_type[1];
+      PADDLE_ENFORCE_EQ(attr_type_str,
+                        "int64_t",
+                        common::errors::InvalidArgument(
+                            "CustomPyOp only has two int64_t attributes, which "
+                            "are infer_meta_fn_ptr & fn_ptr."));
+      param_names.push_back(attr_name);
+      const std::string& attr_pir_type =
+          CppTypeToAttrTypeMap().at(attr_type_str);
+      attributes_info.emplace_back(attr_name, attr_pir_type, "");
+    }
+
+    // translate output info
+    auto& op_output_names = OpMetaInfoHelper::GetOutputs(op_meta);
+    for (const auto& output_name : op_output_names) {
+      // Now, we only support dense tensor as output.
+      outputs_info.push_back(paddle::dialect::OpOutputInfo{
+          output_name,
+          /*type_name=*/"paddle::dialect::DenseTensorType",
+          /*is_optional=*/false,
+          /*intermediate=*/false});
+    }
+
+    auto& inplace_maps = OpMetaInfoHelper::GetInplaceReverseMap(op_meta);
+    if (!inplace_maps.empty()) {
+      VLOG(3) << "Register Custom Python Operator: op inplace_map: "
+              << string::join_strings(inplace_maps, ',', [](auto& pair) {
+                   return pair.first + ": " + pair.second;
+                 });
+    }
+    std::vector<std::pair<std::string, std::string>> vec_inplace;
+    for (const auto& inplace_map : inplace_maps) {
+      vec_inplace.emplace_back(inplace_map);
+    }
+
+    // we only need kernel params name in run_time_info
+    paddle::dialect::OpRunTimeInfo run_time_info =
+        paddle::dialect::OpRunTimeInfo(
+            "", {}, "", param_names, {}, {}, vec_inplace, {});
+
+    return std::make_tuple(
+        inputs_info, attributes_info, outputs_info, run_time_info, "");
+  }
+
+  CustomPyOpInfoInterfaceModel() : OpYamlInfoInterface::Concept(GetPirOpInfo) {}
+};
+
 struct CustomOpVjpInterfaceModel : public VjpInterface::Concept {
   static std::vector<std::vector<pir::Value>> CustomOpVjp(
       pir::Operation* op,
@@ -1141,6 +1221,80 @@ void CustomOpDialect::RegisterCustomOp(const paddle::OpMetaInfo& op_meta) {
                                verify_func);
 }
 
+CustomPyOpDialect::CustomPyOpDialect(pir::IrContext* context)
+    : pir::Dialect(name(), context, pir::TypeId::get<CustomPyOpDialect>()) {}
+
+void CustomPyOpDialect::PrintType(pir::Type type, std::ostream& os) const {
+  PrintTypeImpl(type, os);
+}
+
+void CustomPyOpDialect::PrintAttribute(pir::Attribute attr,
+                                       std::ostream& os) const {
+  PrintAttributeImpl(attr, os);
+}
+
+pir::OpPrintFn CustomPyOpDialect::PrintOperation(
+    const pir::Operation& op) const {
+  return nullptr;
+}
+
+void CustomPyOpDialect::RegisterCustomPyOp(const paddle::OpMetaInfo& op_meta) {
+  pir::TypeId id = IdManager::Instance().CreateId();
+  std::string op_name = paddle::framework::kCustomPyDialectPrefix +
+                        OpMetaInfoHelper::GetOpName(op_meta);
+  std::vector<pir::TypeId> traits;
+
+  auto& inplace_map = OpMetaInfoHelper::GetInplaceMap(op_meta);
+  if (!inplace_map.empty()) {
+    op_name += "_";
+    traits.push_back(pir::TypeId::get<paddle::dialect::InplaceTrait>());
+  }
+
+  char* op_name_c = new char[op_name.size() + 1];
+  snprintf(op_name_c, op_name.size() + 1, "%s", op_name.c_str());
+  op_names_.push_back(op_name_c);
+
+  auto& op_attrs = OpMetaInfoHelper::GetAttrs(op_meta);
+  std::vector<std::string> attr_names;
+  for (const auto& op_attr : op_attrs) {
+    auto attr_name_and_type = paddle::ParseAttrStr(op_attr);
+    auto attr_name = attr_name_and_type[0];
+    attr_names.push_back(attr_name);
+  }
+  const char** attr_name =
+      AttributeManager::Instance().ToCharPointers(attr_names);
+  uint32_t attr_num = attr_names.size();
+
+  std::cout << "attr_num: " << attr_num << std::endl;
+
+  std::set<pir::InterfaceValue> interface_values;
+  pir::InterfaceValue op_info_interface =
+      pir::InterfaceValue::Get<OpYamlInfoInterface,
+                               CustomPyOpInfoInterfaceModel>();
+  interface_values.insert(std::move(op_info_interface));
+
+  // TODO(DrRyanHuang): Currently, we do not support vjp for customPyOp.
+  // if (paddle::framework::detail::HasGradOp(op_name)) {
+  //   pir::InterfaceValue vjp_interface =
+  //       pir::InterfaceValue::Get<VjpInterface,
+  //       CustomPyOpVjpInterfaceModel>();
+  //   interface_values.insert(std::move(vjp_interface));
+  // }
+
+  // Currently we set empty verify function and will reset it if it is used in
+  // future.
+  pir::VerifyPtr verify_func = [](pir::Operation* op) {};
+  ir_context()->RegisterOpInfo(this,
+                               id,
+                               op_names_.back(),
+                               std::move(interface_values),
+                               traits,
+                               attr_num,
+                               attr_name,
+                               verify_func,
+                               verify_func);
+}
+
 // customEngineDialect
 
 CustomEngineDialect::CustomEngineDialect(pir::IrContext* context)
@@ -1169,4 +1323,5 @@ pir::OpPrintFn CustomEngineDialect::PrintOperation(
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OperatorDialect)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomOpDialect)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyOpDialect)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomEngineDialect)
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.h b/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
index 233457880f324d..7130543318fbd6 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
@@ -48,6 +48,11 @@ inline bool IsCustomOp(pir::Operation* op) {
   return op_name.find("custom_op") != op_name.npos;
 }
 
+inline bool IsCustomPyOp(pir::Operation* op) {
+  const std::string& op_name = op->name();
+  return op_name.find("custom_pyop") != op_name.npos;
+}
+
 inline bool IsCustomEngineOp(pir::Operation* op) {
   std::string op_name = op->name();
   return op_name.find("custom_engine") != op_name.npos;
@@ -89,6 +94,32 @@ class CustomOpDialect : public pir::Dialect {
   std::vector<const char*> op_names_;
 };
 
+class CustomPyOpDialect : public pir::Dialect {
+ public:
+  explicit CustomPyOpDialect(pir::IrContext* context);
+
+  constexpr static const char* name() { return "custom_pyop"; }
+
+  void PrintType(pir::Type type, std::ostream& os) const override;
+  void PrintAttribute(pir::Attribute type, std::ostream& os) const override;
+
+  pir::OpPrintFn PrintOperation(
+      const pir::Operation& op) const override;  // NOLINT
+
+  void RegisterCustomPyOp(const paddle::OpMetaInfo& op_meta);
+
+  bool HasRegistered(const std::string& op_name) {
+    if (std::find(op_names_.begin(), op_names_.end(), op_name) !=
+        op_names_.end()) {
+      return true;
+    }
+    return false;
+  }
+
+ private:
+  std::vector<const char*> op_names_;
+};
+
 class TEST_API CustomEngineDialect : public pir::Dialect {
  public:
   explicit CustomEngineDialect(pir::IrContext* context);
@@ -121,4 +152,5 @@ class TEST_API CustomEngineDialect : public pir::Dialect {
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OperatorDialect)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomOpDialect)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyOpDialect)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomEngineDialect)
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index a3253bb5a0098a..fb2c00a6e5749d 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -22,12 +22,12 @@ limitations under the License. */
 
 #include "paddle/common/exception.h"
 #include "paddle/common/macros.h"
+#include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/core/distributed/type_defs.h"
 #include "paddle/utils/any.h"
 #include "paddle/utils/none.h"
 #include "paddle/utils/optional.h"
-
 #ifdef PADDLE_WITH_TENSORRT
 #include "NvInfer.h"
 #endif
@@ -995,6 +995,12 @@ using InferSpmdFunc = phi::distributed::SpmdInfo (*)(
     const std::vector<CustomSpmdInferTensorArg>& inputs,
     const std::vector<CustomSpmdInferAttrArg>& attrs);
 
+using WrapPythonFunction =
+    std::function<std::vector<Tensor>(std::vector<Tensor>&)>;
+using IrTensor = paddle::dialect::IrTensor;
+using WrapInferMetaPythonFunction = std::function<std::vector<IrTensor>(
+    const std::vector<paddle::dialect::IrTensor>&)>;
+
 class PADDLE_API OpMetaInfo {
  public:
   explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {}
@@ -1025,6 +1031,11 @@ class PADDLE_API OpMetaInfo {
   // format: PD_INFER_SPMD_RULE(...)
   OpMetaInfo& SetInferSpmdFn(InferSpmdFunc&& func);
 
+  // CustomPyOp
+  OpMetaInfo& SetCustomPyOpFunction(WrapPythonFunction&& func);
+  OpMetaInfo& SetCustomPyOpInferMetaFunction(
+      WrapInferMetaPythonFunction&& func);
+
   bool IsGradOp() const;
 
   bool IsDoubleGradOp() const;
@@ -1052,6 +1063,9 @@ class PADDLE_API OpMetaInfo {
   InferShapeFunc infer_shape_fn_{nullptr};
   InferDtypeFunc infer_dtype_fn_{nullptr};
   InferSpmdFunc infer_spmd_fn_{nullptr};
+  // 3. custom pyop function
+  WrapPythonFunction pyop_func_{nullptr};
+  WrapInferMetaPythonFunction pyop_func_infer_meta_{nullptr};
 #ifdef PADDLE_WITH_TENSORRT
   TrtGetOutputDimsFunc trt_infer_shape_fn_{nullptr};
   std::vector<std::string> trt_supports_format_config_;
@@ -1077,6 +1091,12 @@ class OpMetaInfoHelper {
   static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info);
   static const InferSpmdFunc& GetInferSpmdFn(const paddle::OpMetaInfo& info);
 
+  // Python Custom Op
+  static const WrapPythonFunction& GetPyCustomPyOpFunction(
+      const paddle::OpMetaInfo& info);
+  static const WrapInferMetaPythonFunction& GetPyCustomPyOpInferMetaFunction(
+      const paddle::OpMetaInfo& info);
+
 #ifdef PADDLE_WITH_TENSORRT
   static const TrtGetOutputDimsFunc& GetTrtInferShapeFn(
       const paddle::OpMetaInfo& info);
@@ -1118,6 +1138,10 @@ class PADDLE_API OpMetaInfoBuilder {
   OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func);
   OpMetaInfoBuilder& SetInferSpmdFn(InferSpmdFunc func);
 
+  OpMetaInfoBuilder& SetPyCustomPyOpFunction(WrapPythonFunction func);
+  OpMetaInfoBuilder& SetPyCustomPyOpInferMetaFunction(
+      WrapInferMetaPythonFunction func);
+
 #ifdef PADDLE_WITH_TENSORRT
   OpMetaInfoBuilder& SetTrtInferShapeFn(TrtGetOutputDimsFunc func);
   OpMetaInfoBuilder& SetTrtSupportsFormatConfig(
diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
index 28490d48111d0f..662ec90130bf53 100644
--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -450,6 +450,16 @@ OpMetaInfo& OpMetaInfo::SetInferSpmdFn(InferSpmdFunc&& func) {
   infer_spmd_fn_ = std::forward<InferSpmdFunc>(func);
   return *this;
 }
+OpMetaInfo& OpMetaInfo::SetCustomPyOpFunction(WrapPythonFunction&& func) {
+  pyop_func_ = std::forward<WrapPythonFunction>(func);
+  return *this;
+}
+
+OpMetaInfo& OpMetaInfo::SetCustomPyOpInferMetaFunction(
+    WrapInferMetaPythonFunction&& func) {
+  pyop_func_infer_meta_ = std::forward<WrapInferMetaPythonFunction>(func);
+  return *this;
+}
 
 bool OpMetaInfo::IsDoubleGradOp() const {
   if (name_.find("_grad_grad") != name_.npos) {
@@ -519,6 +529,18 @@ const InferSpmdFunc& OpMetaInfoHelper::GetInferSpmdFn(
   return info.infer_spmd_fn_;
 }
 
+// Python Custom Op
+const WrapPythonFunction& OpMetaInfoHelper::GetPyCustomPyOpFunction(
+    const paddle::OpMetaInfo& info) {
+  return info.pyop_func_;
+}
+
+const WrapInferMetaPythonFunction&
+OpMetaInfoHelper::GetPyCustomPyOpInferMetaFunction(
+    const paddle::OpMetaInfo& info) {
+  return info.pyop_func_infer_meta_;
+}
+
 #ifdef PADDLE_WITH_TENSORRT
 const TrtGetOutputDimsFunc& OpMetaInfoHelper::GetTrtInferShapeFn(
     const paddle::OpMetaInfo& info) {
@@ -678,6 +700,19 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferSpmdFn(InferSpmdFunc func) {
   return *this;
 }
 
+OpMetaInfoBuilder& OpMetaInfoBuilder::SetPyCustomPyOpFunction(
+    WrapPythonFunction func) {
+  info_ptr_->SetCustomPyOpFunction(std::forward<WrapPythonFunction>(func));
+  return *this;
+}
+
+OpMetaInfoBuilder& OpMetaInfoBuilder::SetPyCustomPyOpInferMetaFunction(
+    WrapInferMetaPythonFunction func) {
+  info_ptr_->SetCustomPyOpInferMetaFunction(
+      std::forward<WrapInferMetaPythonFunction>(func));
+  return *this;
+}
+
 #ifdef PADDLE_WITH_TENSORRT
 OpMetaInfoBuilder& OpMetaInfoBuilder::SetTrtInferShapeFn(
     TrtGetOutputDimsFunc func) {

From 0a0c7d55ab53f900aa5f0f50a38e37b8621f6b51 Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Mon, 15 Dec 2025 11:29:44 +0800
Subject: [PATCH 02/33] rename custom_pyop -> python_operator

---
 paddle/fluid/framework/CMakeLists.txt         |  2 +-
 .../fluid/framework/custom_operator_utils.h   |  6 +--
 ...ustom_pyoperator.cc => python_operator.cc} | 24 ++++++------
 ...{custom_pyoperator.h => python_operator.h} |  6 +--
 .../pir/dialect/operator/ir/op_dialect.cc     | 37 ++++++++++---------
 .../pir/dialect/operator/ir/op_dialect.h      | 12 +++---
 paddle/phi/api/ext/op_meta_info.h             | 28 +++++++-------
 paddle/phi/api/lib/op_meta_info.cc            | 29 ++++++++-------
 8 files changed, 75 insertions(+), 69 deletions(-)
 rename paddle/fluid/framework/{custom_pyoperator.cc => python_operator.cc} (75%)
 rename paddle/fluid/framework/{custom_pyoperator.h => python_operator.h} (90%)

diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 039a1dacbe6c48..84d0fe03619e0e 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -577,7 +577,7 @@ configure_file(commit.h.in commit.h)
 
 cc_library(
   custom_operator
-  SRCS custom_operator.cc custom_pyoperator.cc
+  SRCS custom_operator.cc python_operator.cc
   DEPS tensor
        attribute
        op_registry
diff --git a/paddle/fluid/framework/custom_operator_utils.h b/paddle/fluid/framework/custom_operator_utils.h
index 49f8c6c0405864..82d1ba36f9797d 100644
--- a/paddle/fluid/framework/custom_operator_utils.h
+++ b/paddle/fluid/framework/custom_operator_utils.h
@@ -27,7 +27,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 constexpr char kCustomDialectPrefix[] = "custom_op.";      // NOLINT
-constexpr char kCustomPyDialectPrefix[] = "custom_pyop.";  // NOLINT
+constexpr char kPythonOperatorDialectPrefix[] = "py_op.";  // NOLINT
 constexpr char kGradSuffix[] = "_grad";                    // NOLINT
 constexpr char kDoubleGradSuffix[] = "_grad_grad";         // NOLINT
 
@@ -158,9 +158,9 @@ inline static const OpMetaInfo& GetOpInfoByPirName(
   }
 }
 
-inline static const OpMetaInfo& GetCustomPyOpInfoByPirName(
+inline static const OpMetaInfo& GetPythonOperatorInfoByPirName(
     const std::string& pir_op_name) {
-  auto custom_name = pir_op_name.substr(strlen(kCustomPyDialectPrefix));
+  auto custom_name = pir_op_name.substr(strlen(kPythonOperatorDialectPrefix));
   int pos = custom_name.length();
 
   if (custom_name[pos - 1] == '_') {
diff --git a/paddle/fluid/framework/custom_pyoperator.cc b/paddle/fluid/framework/python_operator.cc
similarity index 75%
rename from paddle/fluid/framework/custom_pyoperator.cc
rename to paddle/fluid/framework/python_operator.cc
index d1cb3c6e82bdcf..2a5850a4d8b0e3 100644
--- a/paddle/fluid/framework/custom_pyoperator.cc
+++ b/paddle/fluid/framework/python_operator.cc
@@ -15,28 +15,28 @@
 #include <utility>  // for std::move
 
 #include "paddle/fluid/framework/custom_operator_utils.h"
-#include "paddle/fluid/framework/custom_pyoperator.h"
+#include "paddle/fluid/framework/python_operator.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
 
 namespace paddle::framework {
 
-void RegisterCustomPyOp(
+void RegisterPythonOperator(
     const std::string& op_name,
     std::vector<std::string>&& op_inputs,
     std::vector<std::string>&& op_outputs,
     std::vector<std::string>&& op_attrs,
     std::unordered_map<std::string, std::string>&& op_inplace_map,
-    WrapPythonFunction&& pyop_func,
-    WrapInferMetaPythonFunction&& pyop_func_infer_meta) {
+    PythonOperatorFunctionType&& pyop_func,
+    PythonOperatorInferMetaFunctionType&& pyop_func_infer_meta) {
   ::paddle::OpMetaInfoBuilder op_meta_info_builder =
       ::paddle::OpMetaInfoBuilder(std::string(op_name), 0);
   op_meta_info_builder.Inputs(std::move(op_inputs))
       .Outputs(std::move(op_outputs))
       .Attrs(std::move(op_attrs))
       .SetInplaceMap(std::move(op_inplace_map))
-      .SetPyCustomPyOpFunction(pyop_func)
-      .SetPyCustomPyOpInferMetaFunction(pyop_func_infer_meta);
+      .SetPythonOperatorFunction(pyop_func)
+      .SetPythonOperatorInferMetaFunction(pyop_func_infer_meta);
 
   const std::vector<paddle::OpMetaInfo>& op_meta_info_vector =
       OpMetaInfoMap::Instance()[op_name];
@@ -50,17 +50,17 @@ void RegisterCustomPyOp(
   const auto& op_meta_info = op_meta_info_vector.back();
 
   auto& inplace_map = OpMetaInfoHelper::GetInplaceMap(op_meta_info);
-  const auto postfix = inplace_map.empty() ? "" : "_";
+  const auto suffix = inplace_map.empty() ? "" : "_";
 
   ::pir::IrContext* ctx = ::pir::IrContext::Instance();
-  auto* custom_pyop_dialect =
-      ctx->GetOrRegisterDialect<paddle::dialect::CustomPyOpDialect>();
+  auto* python_operator_dialect =
+      ctx->GetOrRegisterDialect<paddle::dialect::PythonOperatorDialect>();
 
-  if (custom_pyop_dialect->HasRegistered(
-          paddle::framework::kCustomPyDialectPrefix + op_name + postfix)) {
+  if (python_operator_dialect->HasRegistered(
+          paddle::framework::kPythonOperatorDialectPrefix + op_name + suffix)) {
     return;
   }
-  custom_pyop_dialect->RegisterCustomPyOp(op_meta_info);
+  python_operator_dialect->RegisterPythonOperator(op_meta_info);
 }
 
 }  // namespace paddle::framework
diff --git a/paddle/fluid/framework/custom_pyoperator.h b/paddle/fluid/framework/python_operator.h
similarity index 90%
rename from paddle/fluid/framework/custom_pyoperator.h
rename to paddle/fluid/framework/python_operator.h
index 84f3aced72df7f..e44ade91e0526d 100644
--- a/paddle/fluid/framework/custom_pyoperator.h
+++ b/paddle/fluid/framework/python_operator.h
@@ -24,14 +24,14 @@
 namespace paddle {
 namespace framework {
 
-void RegisterCustomPyOp(
+void RegisterPythonOperator(
     const std::string& op_name,
     std::vector<std::string>&& op_inputs,
     std::vector<std::string>&& op_outputs,
     std::vector<std::string>&& op_attrs,
     std::unordered_map<std::string, std::string>&& op_inplace_map,
-    WrapPythonFunction&& func,
-    WrapInferMetaPythonFunction&& infer_meta);
+    PythonOperatorFunctionType&& func,
+    PythonOperatorInferMetaFunctionType&& infer_meta);
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
index 696dffdf7c5c17..784d9b7e050f6c 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
@@ -625,9 +625,9 @@ struct CustomOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
 struct CustomPyOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
   static OpInfoTuple GetPirOpInfo(const std::string& pir_op_name) {
     const auto& op_meta =
-        paddle::framework::detail::GetCustomPyOpInfoByPirName(pir_op_name);
+        paddle::framework::detail::GetPythonOperatorInfoByPirName(pir_op_name);
 
-    // TODO(DrRyanHuang): we may support custom_pyop's grad op in the future
+    // TODO(DrRyanHuang): we may support py_op's grad op in the future
     // const auto* grad_op_meta_ptr =
     //     paddle::framework::detail::GetGradOpInfoByFwdPirName(pir_op_name);
     std::vector<paddle::dialect::OpInputInfo> inputs_info;
@@ -653,14 +653,15 @@ struct CustomPyOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
     auto& op_attrs = OpMetaInfoHelper::GetAttrs(op_meta);
     for (const auto& op_attr : op_attrs) {
       auto attr_name_and_type = paddle::ParseAttrStr(op_attr);
-      // CustomPyOp only has int64_t attr
+      // PythonOperator only has int64_t attr
       const std::string& attr_name = attr_name_and_type[0];
       const std::string& attr_type_str = attr_name_and_type[1];
-      PADDLE_ENFORCE_EQ(attr_type_str,
-                        "int64_t",
-                        common::errors::InvalidArgument(
-                            "CustomPyOp only has two int64_t attributes, which "
-                            "are infer_meta_fn_ptr & fn_ptr."));
+      PADDLE_ENFORCE_EQ(
+          attr_type_str,
+          "int64_t",
+          common::errors::InvalidArgument(
+              "PythonOperator only has two int64_t attributes, which "
+              "are infer_meta_fn_ptr & fn_ptr."));
       param_names.push_back(attr_name);
       const std::string& attr_pir_type =
           CppTypeToAttrTypeMap().at(attr_type_str);
@@ -1221,26 +1222,28 @@ void CustomOpDialect::RegisterCustomOp(const paddle::OpMetaInfo& op_meta) {
                                verify_func);
 }
 
-CustomPyOpDialect::CustomPyOpDialect(pir::IrContext* context)
-    : pir::Dialect(name(), context, pir::TypeId::get<CustomPyOpDialect>()) {}
+PythonOperatorDialect::PythonOperatorDialect(pir::IrContext* context)
+    : pir::Dialect(name(), context, pir::TypeId::get<PythonOperatorDialect>()) {
+}
 
-void CustomPyOpDialect::PrintType(pir::Type type, std::ostream& os) const {
+void PythonOperatorDialect::PrintType(pir::Type type, std::ostream& os) const {
   PrintTypeImpl(type, os);
 }
 
-void CustomPyOpDialect::PrintAttribute(pir::Attribute attr,
-                                       std::ostream& os) const {
+void PythonOperatorDialect::PrintAttribute(pir::Attribute attr,
+                                           std::ostream& os) const {
   PrintAttributeImpl(attr, os);
 }
 
-pir::OpPrintFn CustomPyOpDialect::PrintOperation(
+pir::OpPrintFn PythonOperatorDialect::PrintOperation(
     const pir::Operation& op) const {
   return nullptr;
 }
 
-void CustomPyOpDialect::RegisterCustomPyOp(const paddle::OpMetaInfo& op_meta) {
+void PythonOperatorDialect::RegisterPythonOperator(
+    const paddle::OpMetaInfo& op_meta) {
   pir::TypeId id = IdManager::Instance().CreateId();
-  std::string op_name = paddle::framework::kCustomPyDialectPrefix +
+  std::string op_name = paddle::framework::kPythonOperatorDialectPrefix +
                         OpMetaInfoHelper::GetOpName(op_meta);
   std::vector<pir::TypeId> traits;
 
@@ -1323,5 +1326,5 @@ pir::OpPrintFn CustomEngineDialect::PrintOperation(
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OperatorDialect)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomOpDialect)
-IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyOpDialect)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PythonOperatorDialect)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomEngineDialect)
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.h b/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
index 7130543318fbd6..946b529071914e 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
@@ -50,7 +50,7 @@ inline bool IsCustomOp(pir::Operation* op) {
 
 inline bool IsCustomPyOp(pir::Operation* op) {
   const std::string& op_name = op->name();
-  return op_name.find("custom_pyop") != op_name.npos;
+  return op_name.find("py_op") != op_name.npos;
 }
 
 inline bool IsCustomEngineOp(pir::Operation* op) {
@@ -94,11 +94,11 @@ class CustomOpDialect : public pir::Dialect {
   std::vector<const char*> op_names_;
 };
 
-class CustomPyOpDialect : public pir::Dialect {
+class PythonOperatorDialect : public pir::Dialect {
  public:
-  explicit CustomPyOpDialect(pir::IrContext* context);
+  explicit PythonOperatorDialect(pir::IrContext* context);
 
-  constexpr static const char* name() { return "custom_pyop"; }
+  constexpr static const char* name() { return "py_op"; }
 
   void PrintType(pir::Type type, std::ostream& os) const override;
   void PrintAttribute(pir::Attribute type, std::ostream& os) const override;
@@ -106,7 +106,7 @@ class CustomPyOpDialect : public pir::Dialect {
   pir::OpPrintFn PrintOperation(
       const pir::Operation& op) const override;  // NOLINT
 
-  void RegisterCustomPyOp(const paddle::OpMetaInfo& op_meta);
+  void RegisterPythonOperator(const paddle::OpMetaInfo& op_meta);
 
   bool HasRegistered(const std::string& op_name) {
     if (std::find(op_names_.begin(), op_names_.end(), op_name) !=
@@ -152,5 +152,5 @@ class TEST_API CustomEngineDialect : public pir::Dialect {
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OperatorDialect)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomOpDialect)
-IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyOpDialect)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PythonOperatorDialect)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomEngineDialect)
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index fb2c00a6e5749d..9f247a9c9e295c 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -995,11 +995,11 @@ using InferSpmdFunc = phi::distributed::SpmdInfo (*)(
     const std::vector<CustomSpmdInferTensorArg>& inputs,
     const std::vector<CustomSpmdInferAttrArg>& attrs);
 
-using WrapPythonFunction =
+using PythonOperatorFunctionType =
     std::function<std::vector<Tensor>(std::vector<Tensor>&)>;
 using IrTensor = paddle::dialect::IrTensor;
-using WrapInferMetaPythonFunction = std::function<std::vector<IrTensor>(
-    const std::vector<paddle::dialect::IrTensor>&)>;
+using PythonOperatorInferMetaFunctionType =
+    std::function<std::vector<IrTensor>(const std::vector<IrTensor>&)>;
 
 class PADDLE_API OpMetaInfo {
  public:
@@ -1031,10 +1031,10 @@ class PADDLE_API OpMetaInfo {
   // format: PD_INFER_SPMD_RULE(...)
   OpMetaInfo& SetInferSpmdFn(InferSpmdFunc&& func);
 
-  // CustomPyOp
-  OpMetaInfo& SetCustomPyOpFunction(WrapPythonFunction&& func);
+  // PythonOperator
+  OpMetaInfo& SetCustomPyOpFunction(PythonOperatorFunctionType&& func);
   OpMetaInfo& SetCustomPyOpInferMetaFunction(
-      WrapInferMetaPythonFunction&& func);
+      PythonOperatorInferMetaFunctionType&& func);
 
   bool IsGradOp() const;
 
@@ -1064,8 +1064,8 @@ class PADDLE_API OpMetaInfo {
   InferDtypeFunc infer_dtype_fn_{nullptr};
   InferSpmdFunc infer_spmd_fn_{nullptr};
   // 3. custom pyop function
-  WrapPythonFunction pyop_func_{nullptr};
-  WrapInferMetaPythonFunction pyop_func_infer_meta_{nullptr};
+  PythonOperatorFunctionType pyop_func_{nullptr};
+  PythonOperatorInferMetaFunctionType pyop_func_infer_meta_{nullptr};
 #ifdef PADDLE_WITH_TENSORRT
   TrtGetOutputDimsFunc trt_infer_shape_fn_{nullptr};
   std::vector<std::string> trt_supports_format_config_;
@@ -1092,10 +1092,10 @@ class OpMetaInfoHelper {
   static const InferSpmdFunc& GetInferSpmdFn(const paddle::OpMetaInfo& info);
 
   // Python Custom Op
-  static const WrapPythonFunction& GetPyCustomPyOpFunction(
-      const paddle::OpMetaInfo& info);
-  static const WrapInferMetaPythonFunction& GetPyCustomPyOpInferMetaFunction(
+  static const PythonOperatorFunctionType& GetPythonOperatorFunction(
       const paddle::OpMetaInfo& info);
+  static const PythonOperatorInferMetaFunctionType&
+  GetPythonOperatorInferMetaFunction(const paddle::OpMetaInfo& info);
 
 #ifdef PADDLE_WITH_TENSORRT
   static const TrtGetOutputDimsFunc& GetTrtInferShapeFn(
@@ -1138,9 +1138,9 @@ class PADDLE_API OpMetaInfoBuilder {
   OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func);
   OpMetaInfoBuilder& SetInferSpmdFn(InferSpmdFunc func);
 
-  OpMetaInfoBuilder& SetPyCustomPyOpFunction(WrapPythonFunction func);
-  OpMetaInfoBuilder& SetPyCustomPyOpInferMetaFunction(
-      WrapInferMetaPythonFunction func);
+  OpMetaInfoBuilder& SetPythonOperatorFunction(PythonOperatorFunctionType func);
+  OpMetaInfoBuilder& SetPythonOperatorInferMetaFunction(
+      PythonOperatorInferMetaFunctionType func);
 
 #ifdef PADDLE_WITH_TENSORRT
   OpMetaInfoBuilder& SetTrtInferShapeFn(TrtGetOutputDimsFunc func);
diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
index 662ec90130bf53..00832c24d71e4e 100644
--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -450,14 +450,16 @@ OpMetaInfo& OpMetaInfo::SetInferSpmdFn(InferSpmdFunc&& func) {
   infer_spmd_fn_ = std::forward<InferSpmdFunc>(func);
   return *this;
 }
-OpMetaInfo& OpMetaInfo::SetCustomPyOpFunction(WrapPythonFunction&& func) {
-  pyop_func_ = std::forward<WrapPythonFunction>(func);
+OpMetaInfo& OpMetaInfo::SetCustomPyOpFunction(
+    PythonOperatorFunctionType&& func) {
+  pyop_func_ = std::forward<PythonOperatorFunctionType>(func);
   return *this;
 }
 
 OpMetaInfo& OpMetaInfo::SetCustomPyOpInferMetaFunction(
-    WrapInferMetaPythonFunction&& func) {
-  pyop_func_infer_meta_ = std::forward<WrapInferMetaPythonFunction>(func);
+    PythonOperatorInferMetaFunctionType&& func) {
+  pyop_func_infer_meta_ =
+      std::forward<PythonOperatorInferMetaFunctionType>(func);
   return *this;
 }
 
@@ -530,13 +532,13 @@ const InferSpmdFunc& OpMetaInfoHelper::GetInferSpmdFn(
 }
 
 // Python Custom Op
-const WrapPythonFunction& OpMetaInfoHelper::GetPyCustomPyOpFunction(
+const PythonOperatorFunctionType& OpMetaInfoHelper::GetPythonOperatorFunction(
     const paddle::OpMetaInfo& info) {
   return info.pyop_func_;
 }
 
-const WrapInferMetaPythonFunction&
-OpMetaInfoHelper::GetPyCustomPyOpInferMetaFunction(
+const PythonOperatorInferMetaFunctionType&
+OpMetaInfoHelper::GetPythonOperatorInferMetaFunction(
     const paddle::OpMetaInfo& info) {
   return info.pyop_func_infer_meta_;
 }
@@ -700,16 +702,17 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferSpmdFn(InferSpmdFunc func) {
   return *this;
 }
 
-OpMetaInfoBuilder& OpMetaInfoBuilder::SetPyCustomPyOpFunction(
-    WrapPythonFunction func) {
-  info_ptr_->SetCustomPyOpFunction(std::forward<WrapPythonFunction>(func));
+OpMetaInfoBuilder& OpMetaInfoBuilder::SetPythonOperatorFunction(
+    PythonOperatorFunctionType func) {
+  info_ptr_->SetCustomPyOpFunction(
+      std::forward<PythonOperatorFunctionType>(func));
   return *this;
 }
 
-OpMetaInfoBuilder& OpMetaInfoBuilder::SetPyCustomPyOpInferMetaFunction(
-    WrapInferMetaPythonFunction func) {
+OpMetaInfoBuilder& OpMetaInfoBuilder::SetPythonOperatorInferMetaFunction(
+    PythonOperatorInferMetaFunctionType func) {
   info_ptr_->SetCustomPyOpInferMetaFunction(
-      std::forward<WrapInferMetaPythonFunction>(func));
+      std::forward<PythonOperatorInferMetaFunctionType>(func));
   return *this;
 }
 

From 4c7d96d875262e6fee2d06b5f7ee379b87e59123 Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Mon, 15 Dec 2025 15:37:12 +0800
Subject: [PATCH 03/33] add all

---
 .../custom_pyop_func_instruction.cc           | 496 ++++++++++++++++++
 .../custom_pyop_func_instruction.h            |  87 +++
 .../framework/new_executor/pir_interpreter.cc |   7 +-
 .../pir/dialect/kernel/ir/kernel_dialect.cc   |  45 ++
 .../pir/dialect/kernel/ir/kernel_dialect.h    |  18 +
 .../fluid/pir/dialect/kernel/ir/kernel_op.cc  |  41 ++
 .../fluid/pir/dialect/kernel/ir/kernel_op.h   |  13 +
 .../pir/dialect/operator/ir/op_dialect.cc     |   7 +-
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 114 +++-
 paddle/fluid/pybind/eager_utils.cc            | 182 +++++++
 paddle/fluid/pybind/eager_utils.h             |   8 +
 .../fluid/pybind/manual_static_op_function.h  | 446 ++++++++++++++++
 paddle/phi/api/ext/op_meta_info.h             |   4 +-
 paddle/pir/include/core/operation.h           |   2 +-
 paddle/pir/src/core/operation.cc              |   2 +-
 .../executor/function_graph.py                |   4 +-
 python/paddle/static/custom_pyop.py           | 293 +++++++++++
 python/paddle/static/meta_tensor.py           |  31 +-
 18 files changed, 1785 insertions(+), 15 deletions(-)
 create mode 100644 paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
 create mode 100644 paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h
 create mode 100644 python/paddle/static/custom_pyop.py

diff --git a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc b/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
new file mode 100644
index 00000000000000..5d3fb17d49b948
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
@@ -0,0 +1,496 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h"
+#include "paddle/fluid/framework/custom_operator_utils.h"
+#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
+#include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
+#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h"
+#include "paddle/fluid/pir/dialect/operator/utils/utils.h"
+#include "paddle/phi/api/ext/op_meta_info.h"
+#include "paddle/pir/include/core/builtin_attribute.h"
+#include "paddle/pir/include/core/operation.h"
+#include "paddle/pir/include/core/value.h"
+
+COMMON_DECLARE_bool(check_cuda_error);
+
+namespace paddle::framework {
+
+void CustomPyOpFuncInstruction::BuildCustomContext(
+    const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
+  PADDLE_ENFORCE_NOT_NULL(
+      custom_op_meta_,
+      common::errors::PreconditionNotMet(
+          "CustomPyOpFuncInstruction: custom_op_meta_ is null"));
+
+  auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
+  VLOG(6) << "op_inplace_map.size(): " << op_inplace_map.size();
+
+  // check inplace
+  for (auto const& pair : op_inplace_map) {
+    pir::Value output_value =
+        op_->result(op_yaml_info.OutputName2Id().at(pair.second));
+    if (paddle::framework::detail::IsDuplicableVar(pair.first) &&
+        !IsInvalid(output_value)) {
+      // make sure ctx has valid inplace optional outputs
+      PADDLE_ENFORCE(
+          paddle::framework::detail::IsOptionalVar(pair.second),
+          common::errors::InvalidArgument(
+              "Custom operator couldn't find custom output name for %s. If "
+              "you are using inplace optional inputs & outputs, please "
+              "check "
+              "your InplaceMap and `Outputs` again and make sure %s is "
+              "wrapped by `paddle::Optional`",
+              pair.second,
+              pair.second));
+    }
+  }
+
+  Scope* inner_scope = value_exec_info_.GetScope();
+  VLOG(6) << "Build custom python op infermeta param inner_scope["
+          << inner_scope << "]";
+
+  auto attr_map = op_->attributes();
+
+  // EmplaceBackInputs
+  auto& vec_input_tensor_params = op_yaml_info.TensorParams(true);
+  auto& name2id = op_yaml_info.InputName2Id();
+  auto inplace_id_map = op_yaml_info.GetInplaceIdMap();
+  int input_index = 0;
+  int vec_input_index = 0;
+
+  for (const std::string& t : vec_input_tensor_params) {
+    VLOG(6) << "for (const auto& t : vec_input_tensor_params) {   " << t;
+    PADDLE_ENFORCE_EQ(
+        name2id.count(t),
+        true,
+        common::errors::NotFound("param [%s] MUST in name2id map", t));
+
+    pir::Value ptr = op_->operand_source(op_yaml_info.InputName2Id().at(t));
+    if (!IsInvalid(ptr)) {
+      if (op_yaml_info.GetInputType(op_yaml_info.InputName2Id().at(t)) ==
+          "pir::VectorType<paddle::dialect::DenseTensorType>") {
+        vec_input_name2id_map_[t] = vec_input_index;
+        vec_input_index++;
+        vec_input_ptrs_.emplace_back();
+        // NOTE(YuanRisheng): In dygraph mode, we can not distinguish Tensor and
+        // vector<Tensor> when user inputs None, so dygraph mode appends one
+        // un-initialized Tensor to CustomOpKernelContext. To be compatible with
+        // dygraph mode, `custom_vec_in` also emplace_back one un-initialized
+        // tensor here.
+        std::vector<paddle::Tensor> custom_vec_in;
+        custom_vec_in.emplace_back(paddle::Tensor());
+        custom_kernel_ctx_.EmplaceBackInputs(std::move(custom_vec_in));
+      } else {
+        input_name2id_map_[t] = input_index;
+        input_index++;
+        input_ptrs_.emplace_back(nullptr);
+        custom_kernel_ctx_.EmplaceBackInput(paddle::Tensor());
+      }
+      VLOG(8) << "ctx->EmplaceBackInput : an optional input " << t;
+      continue;
+    }
+    auto in_var_name = value_exec_info_.GetVarName(ptr);
+
+    VLOG(6) << "ctx->EmplaceBackInput: " << t << "\t" << in_var_name;
+
+    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
+                            common::errors::PreconditionNotMet(
+                                "can not find var[%s] in scope", in_var_name));
+    auto var = inner_scope->FindVar(in_var_name);
+    if (var->IsType<phi::DenseTensor>()) {
+      auto dense_tensor_in = var->GetMutable<phi::DenseTensor>();
+
+      std::shared_ptr<phi::DenseTensor> tensor_in(
+          dense_tensor_in, [](phi::DenseTensor* ptr) {
+            VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
+          });
+      input_name2id_map_[t] = input_index;
+      input_index++;
+      input_ptrs_.push_back(dense_tensor_in);
+      paddle::Tensor custom_in;
+      custom_in.set_impl(tensor_in);
+      custom_kernel_ctx_.EmplaceBackInput(std::move(custom_in));
+    } else if (var->IsType<VariableRefArray>()) {
+      std::vector<phi::DenseTensor*> vec_input_ptrs;
+      std::vector<paddle::Tensor> vec_custom_in;
+      auto& variable_array = var->Get<VariableRefArray>();
+      for (size_t i = 0; i < variable_array.size(); ++i) {
+        if (variable_array[i]->IsType<phi::DenseTensor>()) {
+          phi::DenseTensor* dense_tensor_in = const_cast<phi::DenseTensor*>(
+              &(variable_array[i]->Get<phi::DenseTensor>()));
+          std::shared_ptr<phi::DenseTensor> tensor_in(
+              dense_tensor_in, [](phi::DenseTensor* ptr) {
+                VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
+              });
+          vec_input_ptrs.push_back(dense_tensor_in);
+          paddle::Tensor custom_in;
+          custom_in.set_impl(tensor_in);
+          vec_custom_in.push_back(std::move(custom_in));
+        } else {
+          PADDLE_THROW(common::errors::Unimplemented(
+              "Only support Vector<DenseTensor> and vector<SelectedRows> now, "
+              "not support vector<%d>.",
+              variable_array[i]->Type()));
+        }
+      }
+      vec_input_name2id_map_[t] = vec_input_index;
+      vec_input_index++;
+      vec_input_ptrs_.push_back(vec_input_ptrs);
+      custom_kernel_ctx_.EmplaceBackInputs(vec_custom_in);
+    } else {
+      PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d] ",
+                                                 var->Type()));
+    }
+  }
+
+  // EmplaceBackAttributes
+  const std::vector<std::string>& vec_attr_params =
+      op_yaml_info.AttrParams(true);
+  for (auto& t : vec_attr_params) {
+    PADDLE_ENFORCE_NE(attr_map.find(t),
+                      attr_map.end(),
+                      common::errors::NotFound(
+                          "Not found %s in attr_map, it maybe need mapping "
+                          "it in OpTranslator.",
+                          t));
+    auto& attr_type_name = op_yaml_info.AttrTypeName(t);
+    if (attr_type_name == "pir::Int32Attribute") {
+      custom_attrs_.push_back(
+          attr_map[t].dyn_cast<pir::Int32Attribute>().data());
+      custom_kernel_ctx_.EmplaceBackAttr(
+          attr_map[t].dyn_cast<pir::Int32Attribute>().data());
+    } else if (attr_type_name == "pir::Int64Attribute") {
+      custom_attrs_.push_back(
+          attr_map[t].dyn_cast<pir::Int64Attribute>().data());
+      custom_kernel_ctx_.EmplaceBackAttr(
+          attr_map[t].dyn_cast<pir::Int64Attribute>().data());
+    } else {
+      PADDLE_THROW(common::errors::Unimplemented("attr type not support [%s] ",
+                                                 attr_type_name));
+    }
+    VLOG(6) << "ctx->EmplaceBackAttr: " << t;
+  }
+
+  // EmplaceBackOutputs
+  VLOG(8) << "ctx->EmplaceBackOutput: ";
+  for (size_t i = 0; i < op_->num_results(); ++i) {
+    pir::Value out_ptr = op_->result(i);
+    auto out_name = op_yaml_info.OutputNames()[i];
+    VLOG(0) << "out_name: " << out_name;
+    VLOG(0) << "!IsInvalid(out_ptr)" << !IsInvalid(out_ptr);
+    if (!IsInvalid(out_ptr)) {
+      PADDLE_ENFORCE(
+          paddle::framework::detail::IsOptionalVar(out_name) &&
+              !inplace_id_map.empty(),
+          common::errors::InvalidArgument(
+              "Custom operator couldn't find custom output for name %s. If "
+              "you "
+              "are using inplace optional inputs & outputs, please check "
+              "your "
+              "InplaceMap and `Outputs` again and make sure %s is wrapped by "
+              "`paddle::Optional`",
+              out_name,
+              out_name));
+      VLOG(3) << "Custom Operator: BuildContext - inplace optional outputs : "
+              << out_name << " is None.";
+      custom_kernel_ctx_.EmplaceBackOutput(paddle::Tensor());
+
+      VLOG(8) << "ctx->EmplaceBackOutput : an optional output";
+      continue;
+    }
+    VLOG(0) << "WHere am I?";
+    if (out_ptr.type().isa<paddle::dialect::AllocatedDenseTensorType>()) {
+      VLOG(0) << "WHere am I?  1111111111";
+      auto dense_tensor_out =
+          inner_scope->FindVar(value_exec_info_.GetVarName(out_ptr))
+              ->GetMutable<phi::DenseTensor>();
+      cache_out_ptrs_.push_back(dense_tensor_out);
+      std::shared_ptr<phi::DenseTensor> tensor_out(
+          dense_tensor_out, [](phi::DenseTensor* ptr) {
+            VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
+          });
+      paddle::Tensor custom_out;
+      // here only can copy the output tensor into context
+      custom_out.set_impl(tensor_out);
+
+      custom_kernel_ctx_.EmplaceBackOutput(std::move(custom_out));
+      VLOG(8) << "ctx->EmplaceBackOutput DenseTensor: "
+              << value_exec_info_.GetVarName(out_ptr);
+    } else if (out_ptr.type().isa<pir::VectorType>()) {
+      VLOG(0) << "WHere am I?  222222222222";
+      std::vector<paddle::Tensor> vec_custom_out;
+      auto& variable_array =
+          inner_scope->FindVar(value_exec_info_.GetVarName(out_ptr))
+              ->Get<VariableRefArray>();
+      std::vector<paddle::Tensor> custom_vec_out;
+      PADDLE_ENFORCE(
+          !inplace_id_map.empty() || (i == 0UL && op_->num_results() == 1UL),
+          common::errors::PreconditionNotMet(
+              "If custom operator's outputs contains `paddle::Vec()` type "
+              "without setting InplaceMap, it only can hold one output."));
+      for (size_t j = 0; j < variable_array.size(); ++j) {
+        if (variable_array[j]->IsType<phi::DenseTensor>()) {
+          auto dense_tensor_out = const_cast<phi::DenseTensor*>(
+              &(variable_array[j]->Get<phi::DenseTensor>()));
+          cache_out_ptrs_.emplace_back(dense_tensor_out);
+          std::shared_ptr<phi::DenseTensor> tensor_out(
+              dense_tensor_out, [](phi::DenseTensor* ptr) {
+                VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
+              });
+          paddle::Tensor custom_out;
+          custom_out.set_impl(tensor_out);
+          custom_vec_out.push_back(std::move(custom_out));
+        } else {
+          PADDLE_THROW(common::errors::Unimplemented(
+              "Only support Vector<DenseTensor> now, "
+              "not support vector<%d>.",
+              variable_array[j]->Type()));
+        }
+      }
+      VLOG(8) << "ctx->EmplaceBackOutput VariableRefArray: "
+              << value_exec_info_.GetVarName(out_ptr);
+      custom_kernel_ctx_.EmplaceBackOutputs(custom_vec_out);
+    } else {
+      PADDLE_THROW(common::errors::Unimplemented(
+          "only support DenseTensor and vector "));
+    }
+  }
+
+  auto& op_inputs = OpMetaInfoHelper::GetInputs(*custom_op_meta_);
+  auto& op_outputs = OpMetaInfoHelper::GetOutputs(*custom_op_meta_);
+
+  // handle inplace map
+  custom_kernel_ctx_.UpdatePlainOutputs(op_inputs, op_outputs, op_inplace_map);
+  VLOG(6) << "Done build custom context";
+}
+
+CustomPyOpFuncInstruction::CustomPyOpFuncInstruction(
+    size_t id,
+    const phi::Place& place,
+    pir::Operation* op,
+    const ValueExecutionInfo& value_exec_info)
+    : InstructionBase(id, place),
+      input_name2id_map_(),
+      vec_input_name2id_map_(),
+      input_shapes_(),
+      vec_input_shapes_(),
+      custom_attrs_(),
+      input_dtypes_(),
+      vec_input_dtypes_(),
+      input_ptrs_(),
+      vec_input_ptrs_(),
+      cache_out_ptrs_(),
+      value_exec_info_(value_exec_info) {
+  std::cout << "CustomPyOpFuncInstruction::CustomPyOpFuncInstruction"
+            << std::endl;
+
+  // auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
+  auto op_attributes = op->attributes();
+
+  for (const auto& attr : op_attributes) {
+    VLOG(6) << "111111attr name: " << attr.first
+            << " attr type: " << attr.second;
+  }
+
+  auto op_name =
+      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  custom_op_name_ = op_name;
+  pir::OpInfo op_info =
+      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
+  op_ = op;
+  VLOG(6) << "construct custom kernel instruction for: " << op_name;
+
+  VLOG(6) << "finish process dist attributes";
+
+  SetKernelType(AnalyseOpFuncType(op, place));
+  VLOG(6) << "finish process analyse kernel type";
+
+  auto yaml_interface =
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+  PADDLE_ENFORCE_NOT_NULL(
+      yaml_interface,
+      common::errors::PreconditionNotMet(
+          "can not find OpYamlInfoInterface from [%s]", op_name));
+  paddle::dialect::OpYamlInfoParser yaml_info_parser(
+      yaml_interface->get_op_info_(op_name),
+      paddle::dialect::IsLegacyOp(op_name));
+  VLOG(6) << "finish process yaml_info_parser";
+  const auto& op_meta =
+      paddle::framework::detail::GetPythonOperatorInfoByPirName(op_name);
+  custom_op_meta_ = &op_meta;  // 后面把这个 custom_op_meta_ 删了吧啊？没啥用
+  // infershape_func_ = OpMetaInfoHelper::GetInferShapeFn(op_meta);
+  // inferdtype_func_ = OpMetaInfoHelper::GetInferDtypeFn(op_meta);
+
+  // kernel_func_ = OpMetaInfoHelper::GetKernelFn(op_meta);
+  py_func_ptr_ = &(OpMetaInfoHelper::GetPythonOperatorFunction(op_meta));
+  py_func_infer_meta_ptr_ =
+      &(OpMetaInfoHelper::GetPythonOperatorInferMetaFunction(op_meta));
+
+  // VLOG(6) << "infershape_func_: " << infershape_func_;
+  // VLOG(6) << "inferdtype_func_: " << inferdtype_func_;
+  // VLOG(6) << "kernel_func_: " << kernel_func_;
+
+  BuildCustomContext(yaml_info_parser);
+  VLOG(6) << "finish process custom context";
+  auto kernel_key = op_attributes.at("kernel_key")
+                        .dyn_cast<paddle::dialect::KernelAttribute>()
+                        .data();
+  SetDeviceContext(
+      ParseDeviceContext(op,
+                         phi::DeviceContextPool::Instance().Get(
+                             phi::TransToPhiPlace(kernel_key.backend())),
+                         place,
+                         GetExecutionStream(),
+                         GetStreamPriority()));
+  VLOG(6) << "finish process device context";
+
+  auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(op_meta);
+  for (auto const& pair : op_inplace_map) {
+    pir::Value input_value =
+        op->operand_source(yaml_info_parser.InputName2Id().at(pair.first));
+    pir::Value output_value =
+        op->result(yaml_info_parser.OutputName2Id().at(pair.second));
+    if (IsInvalid(output_value) && IsInvalid(input_value)) {
+      this->AddInplace(value_exec_info_.GetVarByValue(input_value),
+                       value_exec_info_.GetVarByValue(output_value));
+    }
+  }
+
+  InitInputsOutputsIds(op, value_exec_info_);
+  VLOG(6) << "finish process inputs outputs index";
+
+  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
+  std::unordered_set<pir::Value> no_need_buffer_values;
+  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
+    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
+  }
+  SetNoNeedBuffer(no_need_buffer_values);
+  VLOG(6) << "finish process no need buffer";
+}
+
+using IrTensor = paddle::dialect::IrTensor;
+
+void CustomPyOpFuncInstruction::UpdateOutputMeta() {
+  VLOG(0) << "enter CustomPyOpFuncInstruction::UpdateOutputMeta()";
+
+  std::vector<IrTensor> vec_dense_inputs;
+  for (size_t i = 0; i < this->op_->operands().size(); ++i) {
+    vec_dense_inputs.emplace_back(IrTensor());
+    vec_dense_inputs.back().SetDims(phi::make_ddim(input_shapes_[i]));
+    vec_dense_inputs.back().SetDtype(input_dtypes_[i]);
+  }
+
+  std::vector<IrTensor*> vec_ptr_inputs_;
+  for (auto& x : vec_dense_inputs) {
+    vec_ptr_inputs_.push_back(&x);
+  }
+
+  VLOG(0) << "CustomPyOpFuncInstruction finish vec_dense_inputs";
+
+  std::vector<IrTensor*> output = (*py_func_infer_meta_ptr_)(vec_ptr_inputs_);
+
+  VLOG(0) << "CustomPyOpFuncInstruction finish "
+             "(*py_func_infer_meta_ptr_)(vec_dense_inputs);";
+
+  for (size_t i = 0; i < cache_out_ptrs_.size(); ++i) {
+    auto out_in_scope = cache_out_ptrs_.at(i);
+    // update dims and dtype
+    phi::DenseTensorMeta* out_meta =
+        phi::DenseTensorUtils::GetMutableMeta(out_in_scope);
+    out_meta->dims = output[i]->dims();
+    out_meta->dtype = output[i]->dtype();
+    out_meta->strides = out_meta->calc_strides(out_meta->dims);
+  }
+
+  VLOG(0) << "CustomPyOpFuncInstruction finish out_meta";
+}
+
+void CustomPyOpFuncInstruction::BuildShapeDtype() {
+  input_shapes_.clear();
+  input_dtypes_.clear();
+  vec_input_shapes_.clear();
+  vec_input_dtypes_.clear();
+  for (auto in_tensor : input_ptrs_) {
+    if (in_tensor) {
+      input_shapes_.push_back(phi::vectorize(in_tensor->dims()));
+      input_dtypes_.push_back(in_tensor->dtype());
+    } else {
+      input_shapes_.emplace_back();
+      input_dtypes_.emplace_back();
+    }
+  }
+  for (auto in_tensors : vec_input_ptrs_) {
+    std::vector<std::vector<int64_t>> input_shapes;
+    std::vector<phi::DataType> input_dtypes;
+    if (in_tensors.size() > 0) {
+      for (auto in_tensor : in_tensors) {
+        input_shapes.push_back(phi::vectorize(in_tensor->dims()));
+        input_dtypes.push_back(in_tensor->dtype());
+      }
+    }
+    vec_input_shapes_.push_back(input_shapes);
+    vec_input_dtypes_.push_back(input_dtypes);
+  }
+}
+
+void CustomPyOpFuncInstruction::Run() {
+  if (FLAGS_check_cuda_error) [[unlikely]] {
+    CUDAErrorCheck("CustomPyOpFuncInstruction " + custom_op_name_ + " begin");
+  }
+
+  VLOG(3) << "Custom Operator: InferShape - calc output ddim.";
+  BuildShapeDtype();
+
+  UpdateOutputMeta();
+  for (auto& pair : this->InplaceInfo()) {
+    ShareVarBuffer(pair.first, pair.second);
+  }
+
+  // auto& vec_dense_inputs = custom_kernel_ctx_;
+
+  VLOG(6) << "Run custom op " << custom_op_name_ << " kernel.";
+  // check kernel_func_ is nullptr
+  // PADDLE_ENFORCE_NOT_NULL(kernel_func_,
+  //                         common::errors::InvalidArgument(
+  //                             "Custom kernel function is nullptr."));
+  PADDLE_ENFORCE_NOT_NULL(
+      py_func_ptr_,
+      common::errors::InvalidArgument("Custom kernel function is nullptr."));
+
+  // 这里假设只有俩参数
+  std::vector<Tensor*> vec_dense_inputs;
+  size_t num = op_->num_operands();
+  VLOG(0) << "Op num_operands: " << num;
+  for (size_t i = 0; i < num; ++i) {
+    vec_dense_inputs.push_back(&custom_kernel_ctx_.InputAt(i));
+  }
+
+  // VLOG(0) << "vec_dense_inputs[0]: " << vec_dense_inputs[0];
+  // VLOG(0) << "vec_dense_inputs[1]: " << vec_dense_inputs[1];
+
+  auto out = (*py_func_ptr_)(vec_dense_inputs);
+
+  std::vector<Tensor> out_tensor_vector;
+  for (auto& x : out) {
+    out_tensor_vector.emplace_back(*x);
+  }
+
+  custom_kernel_ctx_.ValidateAndAssignOutputs(
+      out_tensor_vector);  // 从宏里面扒出来
+  if (FLAGS_check_cuda_error) [[unlikely]] {
+    CUDAErrorCheck("CustomPyOpFuncInstruction " + custom_op_name_ + " finish");
+  }
+}
+}  // namespace paddle::framework
diff --git a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h b/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h
new file mode 100644
index 00000000000000..70cbeb6fe4e1b5
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"
+#include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h"
+#include "paddle/phi/api/ext/op_meta_info.h"
+
+namespace pir {
+class Operation;
+}  // namespace pir
+
+namespace paddle {
+namespace framework {
+class Scope;
+class CustomPyOpFuncInstruction : public InstructionBase {
+ public:
+  CustomPyOpFuncInstruction(size_t id,
+                            const phi::Place& place,
+                            ::pir::Operation* op,
+                            const ValueExecutionInfo& value_exec_info);
+
+  ::pir::Operation* Operation() const override { return op_; }
+
+  void Run() override;
+
+  const std::string& Name() const override { return custom_op_name_; }
+
+  void clear();
+
+ private:
+  void BuildCustomContext(
+      const paddle::dialect::OpYamlInfoParser& op_yaml_info);
+
+  void BuildShapeDtype();
+
+  void UpdateOutputMeta();
+
+  paddle::CustomOpKernelContext custom_kernel_ctx_;
+  paddle::KernelFunc kernel_func_ = nullptr;
+
+  const paddle::PythonOperatorFunctionType* py_func_ptr_ = nullptr;
+  const paddle::PythonOperatorInferMetaFunctionType* py_func_infer_meta_ptr_ =
+      nullptr;
+
+  // key is input name, value is a index in input_shapes_ or vec_input_shapes_
+  std::unordered_map<std::string, int> input_name2id_map_;
+  std::unordered_map<std::string, int> vec_input_name2id_map_;
+
+  // use for running infershape
+  std::vector<std::vector<int64_t>> input_shapes_;
+  std::vector<std::vector<std::vector<int64_t>>> vec_input_shapes_;
+  std::vector<paddle::any> custom_attrs_;
+
+  // use for running inferdtype
+  std::vector<DataType> input_dtypes_;
+  std::vector<std::vector<DataType>> vec_input_dtypes_;
+
+  // use for calculate input shapes and dtypes in runtime
+  std::vector<phi::DenseTensor*> input_ptrs_;
+  std::vector<std::vector<phi::DenseTensor*>> vec_input_ptrs_;
+
+  // use for update output
+  std::vector<phi::DenseTensor*> cache_out_ptrs_;
+
+  std::string custom_op_name_;
+
+  ::pir::Operation* op_{nullptr};  // not owned
+
+  const paddle::OpMetaInfo* custom_op_meta_;   // not owned
+  const ValueExecutionInfo& value_exec_info_;  // not owned
+};
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 8c85ad0d63657e..f31a3e2dd96070 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -62,6 +62,7 @@
 #include "paddle/fluid/framework/new_executor/instruction/control_flow/yield_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/cuda_graph_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h"
@@ -958,6 +959,10 @@ void PirInterpreter::BuildInstruction() {
       vec_instruction_base_.emplace_back(
           std::make_unique<CustomKernelInstruction>(
               op_idx++, place_, &op, *(value_exe_info_.get())));
+    } else if (op.dialect()->name() == "custom_py_func") {
+      vec_instruction_base_.emplace_back(
+          std::make_unique<CustomPyOpFuncInstruction>(
+              op_idx++, place_, &op, *(value_exe_info_.get())));
     } else if (paddle::dialect::IsCustomEngineOp(&op)) {
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
       vec_instruction_base_.emplace_back(
@@ -2038,7 +2043,7 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) {
     exception_holder_.Catch(std::current_exception());
   } catch (std::exception& ex) {
     LOG(WARNING) << instr_node->Name() << " raises an exception "
-                 << common::demangle(typeid(ex).name());
+                 << common::demangle(typeid(ex).name()) << ": " << ex.what();
     exception_holder_.Catch(std::current_exception());
   } catch (...) {
     LOG(WARNING) << instr_node->Name() << " raises an unknown exception";
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
index 4fc0f322bc9c76..490b31e09959f6 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
@@ -192,6 +192,50 @@ pir::OpPrintFn CustomKernelDialect::PrintOperation(
     printer.PrintOpReturnType(op);
   };
 }
+
+CustomPyFuncDialect::CustomPyFuncDialect(pir::IrContext *context)
+    : pir::Dialect(name(), context, pir::TypeId::get<CustomPyFuncDialect>()) {
+  initialize();
+}
+
+void CustomPyFuncDialect::initialize() {
+  RegisterOps<dialect::CustomPyFuncOp>();
+}
+
+void CustomPyFuncDialect::PrintType(pir::Type type, std::ostream &os) const {
+  PrintKernelType(type, os);
+}
+
+void CustomPyFuncDialect::PrintAttribute(pir::Attribute attr,
+                                         std::ostream &os) const {
+  PrintKernelAttribute(attr, os);
+}
+
+pir::OpPrintFn CustomPyFuncDialect::PrintOperation(
+    const pir::Operation &op) const {
+  return [](const pir::Operation &op, pir::IrPrinter &printer) {
+    auto &os = printer.os;
+    printer.PrintOpResult(op);
+    os << " =";
+    auto custom_py_func = op.dyn_cast<CustomPyFuncOp>();
+    std::string kernel_name = custom_py_func.kernel_name();
+    if (op.attributes().count("is_inplace") != 0 &&
+        op.attributes()
+            .at("is_inplace")
+            .dyn_cast<pir::BoolAttribute>()
+            .data()) {
+      kernel_name = kernel_name + "_";
+    }
+    os << " \"" << kernel_name << "(custom_py_func)\"";
+    printer.PrintOpOperands(op);
+    printer.PrintAttributeMap(op);
+    os << " :";
+    printer.PrintOperandsType(op);
+    os << " -> ";
+    printer.PrintOpReturnType(op);
+  };
+}
+
 #ifdef PADDLE_WITH_DNNL
 OneDNNKernelDialect::OneDNNKernelDialect(pir::IrContext *context)
     : pir::Dialect(name(), context, pir::TypeId::get<OneDNNKernelDialect>()) {
@@ -258,6 +302,7 @@ pir::OpPrintFn OneDNNKernelDialect::PrintOperation(
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelDialect)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncDialect)
 #ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
 #endif
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
index 128a8490c93d42..8891ef7618001f 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
@@ -53,6 +53,23 @@ class CustomKernelDialect : public pir::Dialect {
   void initialize();
 };
 
+class CustomPyFuncDialect : public pir::Dialect {
+ public:
+  explicit CustomPyFuncDialect(pir::IrContext* context);
+
+  static const char* name() { return "custom_py_func"; }
+
+  void PrintType(pir::Type type, std::ostream& os) const override;
+
+  void PrintAttribute(pir::Attribute attr, std::ostream& os) const override;
+
+  pir::OpPrintFn PrintOperation(
+      const pir::Operation& op) const override;  // NOLINT
+
+ private:
+  void initialize();
+};
+
 #ifdef PADDLE_WITH_DNNL
 class OneDNNKernelDialect : public pir::Dialect {
  public:
@@ -77,6 +94,7 @@ class OneDNNKernelDialect : public pir::Dialect {
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelDialect)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncDialect)
 #ifdef PADDLE_WITH_DNNL
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
 #endif
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
index cfdf5747e76fa6..ecf159c61da154 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
@@ -139,6 +139,46 @@ phi::KernelKey CustomKernelOp::kernel_key() {
   return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
 }
 
+const char* CustomPyFuncOp::attributes_name[attributes_num] = {  // NOLINT
+    "op_name",
+    "kernel_name",
+    "kernel_key"};
+
+void CustomPyFuncOp::VerifySig() {
+  VLOG(4) << "Verifying inputs, outputs and attributes for: CustomPyFuncOp.";
+  auto& attributes = this->attributes();
+
+  PADDLE_ENFORCE(attributes.count("op_name") > 0 &&
+                     attributes.at("op_name").isa<pir::StrAttribute>(),
+                 common::errors::PreconditionNotMet(
+                     "Type of attribute: op_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_name") > 0 &&
+                     attributes.at("kernel_name").isa<pir::StrAttribute>(),
+                 common::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_key") > 0 &&
+                     attributes.at("kernel_key").isa<KernelAttribute>(),
+                 common::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_key is not right."));
+}
+
+std::string CustomPyFuncOp::op_name() {
+  return attributes().at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+}
+
+std::string CustomPyFuncOp::kernel_name() {
+  return attributes()
+      .at("kernel_name")
+      .dyn_cast<pir::StrAttribute>()
+      .AsString();
+}
+
+phi::KernelKey CustomPyFuncOp::kernel_key() {
+  return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
+}
+
 #ifdef PADDLE_WITH_DNNL
 const char* OneDNNPhiKernelOp::attributes_name[attributes_num] = {  // NOLINT
     "op_name",
@@ -264,6 +304,7 @@ phi::KernelKey OneDNNLegacyKernelOp::kernel_key() {
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelOp)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncOp)
 #ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
index 2e5b2dded6ec8a..1e3ebac6260ebe 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
@@ -56,6 +56,18 @@ class CustomKernelOp : public pir::Op<CustomKernelOp> {
   void VerifySig();
 };
 
+class CustomPyFuncOp : public pir::Op<CustomPyFuncOp> {
+ public:
+  using Op::Op;
+  static const char *name() { return "custom_py_func"; }
+  static constexpr uint32_t attributes_num = 3;
+  static const char *attributes_name[attributes_num];
+  std::string op_name();
+  std::string kernel_name();
+  phi::KernelKey kernel_key();
+  void VerifySig();
+};
+
 #ifdef PADDLE_WITH_DNNL
 class OneDNNPhiKernelOp : public pir::Op<OneDNNPhiKernelOp> {
  public:
@@ -100,6 +112,7 @@ class OneDNNLegacyKernelOp : public pir::Op<OneDNNLegacyKernelOp> {
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelOp)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncOp)
 #ifdef PADDLE_WITH_DNNL
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
index 784d9b7e050f6c..db81173c8ac9d2 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
@@ -622,7 +622,7 @@ struct CustomOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
   CustomOpInfoInterfaceModel() : OpYamlInfoInterface::Concept(GetPirOpInfo) {}
 };
 
-struct CustomPyOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
+struct PythonOperatorInfoInterfaceModel : public OpYamlInfoInterface::Concept {
   static OpInfoTuple GetPirOpInfo(const std::string& pir_op_name) {
     const auto& op_meta =
         paddle::framework::detail::GetPythonOperatorInfoByPirName(pir_op_name);
@@ -700,7 +700,8 @@ struct CustomPyOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
         inputs_info, attributes_info, outputs_info, run_time_info, "");
   }
 
-  CustomPyOpInfoInterfaceModel() : OpYamlInfoInterface::Concept(GetPirOpInfo) {}
+  PythonOperatorInfoInterfaceModel()
+      : OpYamlInfoInterface::Concept(GetPirOpInfo) {}
 };
 
 struct CustomOpVjpInterfaceModel : public VjpInterface::Concept {
@@ -1273,7 +1274,7 @@ void PythonOperatorDialect::RegisterPythonOperator(
   std::set<pir::InterfaceValue> interface_values;
   pir::InterfaceValue op_info_interface =
       pir::InterfaceValue::Get<OpYamlInfoInterface,
-                               CustomPyOpInfoInterfaceModel>();
+                               PythonOperatorInfoInterfaceModel>();
   interface_values.insert(std::move(op_info_interface));
 
   // TODO(DrRyanHuang): Currently, we do not support vjp for customPyOp.
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index de172d07a4cb57..9a2af3f2632f82 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -2505,6 +2505,105 @@ void HandleForCustomOp(
   block->push_back(op);
 }
 
+void HandleForCustomPyOp(
+    pir::IrContext* ctx,
+    pir::Operation* op_item,
+    const phi::KernelKey& kernel_key,
+    const phi::Place place,
+    const OpYamlInfoParser* op_info_parser,
+    std::unordered_map<pir::Operation*, pir::Operation*>* map_op_pair,
+    std::unordered_map<pir::Value, pir::Value>* map_value_pair,
+    pir::Block* block) {
+  // Prepare output
+  std::vector<pir::Type> op_output_types;
+  for (size_t i = 0; i < op_item->num_results(); ++i) {
+    phi::Place out_place = phi::TransToPhiPlace(kernel_key.backend());
+    PushBackOutputTypes(ctx,
+                        op_item,
+                        op_item->result(i).type(),
+                        out_place,
+                        kernel_key,
+                        &op_output_types);
+  }
+
+  // Prepare input
+  std::vector<pir::Value> vec_inputs;
+  for (size_t i = 0; i < op_item->num_operands(); ++i) {
+    auto cur_in = op_item->operand_source(i);
+    if (!cur_in) {
+      vec_inputs.emplace_back();
+      continue;
+    }
+    PADDLE_ENFORCE_EQ(
+        map_value_pair->count(cur_in),
+        true,
+        common::errors::PreconditionNotMet(
+            "[%d]'s input of [%s] op MUST in map pair", i, op_item->name()));
+
+    auto new_in = map_value_pair->at(cur_in);
+    auto new_in_type = new_in.type();
+
+    if (new_in_type.isa<AllocatedDenseTensorType>()) {
+      auto in_place = new_in_type.dyn_cast<AllocatedDenseTensorType>().place();
+      // need trans from GPU_PINNED to GPU, refer to PR#41972
+      if (phi::AllocationType::GPUPINNED == place.GetType()) {
+        // build memcopy op
+        auto out_place = phi::TransToPhiPlace(phi::Backend::GPU);
+        auto new_in_alloc_type =
+            new_in_type.dyn_cast<AllocatedDenseTensorType>();
+        auto out_type =
+            AllocatedDenseTensorType::get(ctx,
+                                          out_place,
+                                          new_in_alloc_type.dtype(),
+                                          new_in_alloc_type.dims(),
+                                          new_in_alloc_type.data_layout(),
+                                          new_in_alloc_type.lod(),
+                                          new_in_alloc_type.offset());
+        new_in = AddPlaceTransferOp(
+            new_in, out_type, in_place, out_place, kernel_key, block);
+      }
+    }
+
+    vec_inputs.push_back(new_in);
+  }
+
+  // Prepare attr
+  std::unordered_map<std::string, pir::Attribute> op_attribute{
+      {"op_name", pir::StrAttribute::get(ctx, op_item->name())},
+      {"kernel_name", pir::StrAttribute::get(ctx, op_item->name())},
+      {"kernel_key", KernelAttribute::get(ctx, kernel_key)}};
+
+  auto op_attr_map = op_item->attributes();
+  for (auto& map_item : op_attr_map) {
+    op_attribute.emplace(map_item.first, map_item.second);
+  }
+  if (op_item->HasTrait<InplaceTrait>()) {
+    op_attribute.emplace("is_inplace", pir::BoolAttribute::get(ctx, true));
+  }
+  op_attribute.emplace("origin_id",
+                       pir::Int64Attribute::get(ctx, op_item->id()));
+
+  VLOG(6) << "Lower custom pyop: " << op_item->name()
+          << " to : " << CustomPyFuncOp::name();
+
+  pir::OpInfo custom_py_func_op_info =
+      ctx->GetRegisteredOpInfo(CustomPyFuncOp::name());
+
+  pir::Operation* op = nullptr;
+  op = pir::Operation::Create(
+      vec_inputs, op_attribute, op_output_types, custom_py_func_op_info);
+  op->set_attribute("origin_id", pir::Int64Attribute::get(ctx, op->id()));
+
+  (*map_op_pair)[op_item] = op;
+
+  if (op_item->num_results() > 0) {
+    for (size_t i = 0; i < op_item->num_results(); ++i) {
+      (*map_value_pair)[op_item->result(i)] = op->result(i);
+    }
+  }
+  block->push_back(op);
+}
+
 void HandleForTensorRTOp(
     pir::IrContext* ctx,
     pir::Operation* op_item,
@@ -3588,7 +3687,7 @@ void ProcessBlock(
     auto kernel_name = GetKernelName(op_info_parser.get(), op_item);
     auto kernel_key = GetKernelKey(
         op_item, place, kernel_name, *map_value_pair, op_info_parser.get());
-    VLOG(6) << "kernel type " << kernel_key;
+    VLOG(0) << "kernel type " << kernel_key;
 
     if (paddle::dialect::IsCustomOp(op_item)) {
       HandleForCustomOp(ctx,
@@ -3602,6 +3701,18 @@ void ProcessBlock(
       continue;
     }
 
+    if (paddle::dialect::IsCustomPyOp(op_item)) {
+      HandleForCustomPyOp(ctx,
+                          op_item,
+                          kernel_key,
+                          place,
+                          op_info_parser.get(),
+                          map_op_pair,
+                          map_value_pair,
+                          new_block);
+      continue;
+    }
+
     if (paddle::dialect::IsTensorRTOp(op_item)) {
       HandleForTensorRTOp(ctx,
                           op_item,
@@ -3714,6 +3825,7 @@ std::unique_ptr<pir::Program> PdOpLowerToKernelPass(pir::Program* prog,
   ctx->GetOrRegisterDialect<OperatorDialect>();
   ctx->GetOrRegisterDialect<KernelDialect>();
   ctx->GetOrRegisterDialect<CustomKernelDialect>();
+  ctx->GetOrRegisterDialect<CustomPyFuncDialect>();
 
 #ifdef PADDLE_WITH_DNNL
   ctx->GetOrRegisterDialect<OneDNNOperatorDialect>();
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 05be08144f9d0f..5b9ea5dcb9987a 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -3587,4 +3587,186 @@ void Check_PIR_not_support_out(PyObject* kwargs) {
   }
 }
 
+/**
+ * 将 Python 列表 (list of str) 转换为 C++ vector<string>
+ * @param py_list 传入的 Python 列表对象
+ * @param cpp_vec 用于存储结果的 C++ vector
+ * @return 0 成功，-1 失败（例如，列表元素不是字符串）
+ */
+int py_list_to_vector_string(PyObject* py_list,
+                             std::vector<std::string>* cpp_vec) {
+  if (!PyList_Check(py_list)) {
+    PyErr_SetString(PyExc_TypeError, "Expected a list for string conversion.");
+    return -1;
+  }
+
+  Py_ssize_t size = PyList_Size(py_list);
+  cpp_vec->reserve(size);
+
+  for (Py_ssize_t i = 0; i < size; ++i) {
+    // 获取列表中的元素 (借用引用)
+    PyObject* item = PyList_GetItem(py_list, i);
+
+    // 检查是否为字符串
+    if (!PyUnicode_Check(item)) {
+      PyErr_SetString(PyExc_TypeError, "List element must be a string.");
+      cpp_vec->clear();  // 清空已转换的元素
+      return -1;
+    }
+
+    // 转换为 C 字符串 (char *)
+    // PyUnicode_AsUTF8AndSize 是推荐的现代方法
+    Py_ssize_t length;
+    const char* c_str = PyUnicode_AsUTF8AndSize(item, &length);
+    if (c_str == NULL) {
+      // PyUnicode_AsUTF8AndSize 失败会设置异常
+      cpp_vec->clear();
+      return -1;
+    }
+
+    // 构造 C++ std::string 并添加到 vector
+    cpp_vec->emplace_back(c_str, length);
+  }
+  return 0;
+}
+
+/**
+ * 将 Python 字典 (dict[str, str]) 转换为 C++ unordered_map<string, string>
+ * @param py_dict 传入的 Python 字典对象
+ * @param cpp_map 用于存储结果的 C++ unordered_map
+ * @return 0 成功，-1 失败（例如，key 或 value 不是字符串）
+ */
+int py_dict_to_unordered_map_string(
+    PyObject* py_dict, std::unordered_map<std::string, std::string>* cpp_map) {
+  if (!PyDict_Check(py_dict)) {
+    PyErr_SetString(PyExc_TypeError,
+                    "Expected a dictionary for map conversion.");
+    return -1;
+  }
+
+  PyObject *key, *value;
+  Py_ssize_t pos = 0;  // 迭代器位置
+
+  while (PyDict_Next(py_dict, &pos, &key, &value)) {
+    // --- 1. 转换 Key ---
+    if (!PyUnicode_Check(key)) {
+      PyErr_SetString(PyExc_TypeError, "Dictionary key must be a string.");
+      cpp_map->clear();
+      return -1;
+    }
+    Py_ssize_t key_len;
+    const char* c_key = PyUnicode_AsUTF8AndSize(key, &key_len);
+    if (c_key == NULL) {  // 转换失败
+      cpp_map->clear();
+      return -1;
+    }
+    std::string cpp_key(c_key, key_len);
+
+    // --- 2. 转换 Value ---
+    if (!PyUnicode_Check(value)) {
+      PyErr_SetString(PyExc_TypeError, "Dictionary value must be a string.");
+      cpp_map->clear();
+      return -1;
+    }
+    Py_ssize_t value_len;
+    const char* c_value = PyUnicode_AsUTF8AndSize(value, &value_len);
+    if (c_value == NULL) {  // 转换失败
+      cpp_map->clear();
+      return -1;
+    }
+    std::string cpp_value(c_value, value_len);
+
+    // --- 3. 插入到 C++ Map ---
+    cpp_map->emplace(std::move(cpp_key), std::move(cpp_value));
+  }
+  return 0;
+}
+
+int py_str_to_cpp_str(PyObject* py_str, std::string* cpp_str) {
+  // 检查输入指针是否有效
+  if (!py_str || !cpp_str) {
+    return -1;
+  }
+
+  // 检查PyObject是否为字符串类型
+  if (!PyUnicode_Check(py_str)) {
+    PyErr_SetString(PyExc_TypeError, "Expected a string object");
+    return -1;
+  }
+
+  // 将Python字符串转换为UTF-8编码的C字符串
+  PyObject* temp_bytes = PyUnicode_AsUTF8String(py_str);
+  if (!temp_bytes) {
+    // 转换失败，Python异常已被设置
+    return -1;
+  }
+
+  // 获取C字符串指针和长度
+  char* c_str = nullptr;
+  Py_ssize_t length = 0;
+  if (PyBytes_AsStringAndSize(temp_bytes, &c_str, &length) == -1) {
+    Py_DECREF(temp_bytes);
+    return -1;
+  }
+
+  // 将内容复制到std::string
+  try {
+    *cpp_str = std::string(c_str, length);
+  } catch (const std::exception& e) {
+    Py_DECREF(temp_bytes);
+    PyErr_SetString(PyExc_RuntimeError, "Failed to create std::string");
+    return -1;
+  }
+
+  // 释放临时字节对象
+  Py_DECREF(temp_bytes);
+
+  return 0;
+}
+
+int parse_attrs_dict(PyObject* py_attrs_dict,
+                     std::unordered_map<std::string, uintptr_t>* attrs) {
+  // 检查输入参数是否有效
+  if (!py_attrs_dict || !attrs) {
+    PyErr_SetString(PyExc_ValueError, "Invalid input parameters");
+    return -1;
+  }
+
+  // 检查是否为字典类型
+  if (!PyDict_Check(py_attrs_dict)) {
+    PyErr_SetString(PyExc_TypeError, "Expected a dictionary object");
+    return -1;
+  }
+
+  // 获取特定的函数对象
+  PyObject* py_infer_meta =
+      PyDict_GetItemString(py_attrs_dict, "infer_meta_fn_ptr");
+  PyObject* py_real_fn = PyDict_GetItemString(py_attrs_dict, "fn_ptr");
+
+  if (!py_infer_meta || !py_real_fn) {
+    PyErr_SetString(PyExc_KeyError,
+                    "Missing required keys 'infer_meta_fn_ptr' or 'fn_ptr'");
+    return -1;
+  }
+
+  // 检查是否为可调用对象
+  if (!PyCallable_Check(py_infer_meta) || !PyCallable_Check(py_real_fn)) {
+    PyErr_SetString(
+        PyExc_TypeError,
+        "Expected callable objects for 'infer_meta_fn_ptr' and 'fn_ptr'");
+    return -1;
+  }
+
+  // 增加引用计数，确保在C++端使用时不会被垃圾回收
+  Py_INCREF(py_infer_meta);
+  Py_INCREF(py_real_fn);
+
+  // 将PyObject指针转换为uintptr_t存储
+  // 注意：这里存储的是Python函数对象的地址
+  (*attrs)["infer_meta_fn_ptr"] = reinterpret_cast<uintptr_t>(py_infer_meta);
+  (*attrs)["fn_ptr"] = reinterpret_cast<uintptr_t>(py_real_fn);
+
+  return 0;
+}
+
 }  // namespace paddle::pybind
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 0a88d00d50e5ca..77e25af4879b5c 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -587,6 +587,14 @@ GetPredefinedOutTupleTensorFromKwargs_7(PyObject* kwargs);
 
 void Check_PIR_not_support_out(PyObject* kwargs);
 
+int py_list_to_vector_string(PyObject* py_list,
+                             std::vector<std::string>* cpp_vec);
+int py_dict_to_unordered_map_string(
+    PyObject* py_dict, std::unordered_map<std::string, std::string>* cpp_map);
+
+int py_str_to_cpp_str(PyObject* py_str, std::string* cpp_str);
+int parse_attrs_dict(PyObject* py_attrs_dict,
+                     std::unordered_map<std::string, uintptr_t>* attrs);
 /*----------------------for arg parse-----------------------------*/
 paddle::Tensor& GetTensorFromArgsOrKWArgs(
     const std::string& op_type,
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index e75526e429384d..d9ba54bca5c1a2 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #pragma once
+#include <functional>
 
 #include "paddle/fluid/eager/api/utils/global_utils.h"
 #include "paddle/fluid/framework/custom_operator_utils.h"
@@ -35,6 +36,7 @@
 #include "paddle/pir/include/core/attribute.h"
 #include "paddle/pir/include/core/builtin_op.h"
 
+#include "paddle/fluid/framework/python_operator.h"
 namespace paddle {
 
 namespace pybind {
@@ -990,6 +992,446 @@ static PyObject *run_custom_op(PyObject *self,
   }
 }
 
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const std::vector<T> &vec) {
+  os << "[";
+  for (size_t i = 0; i < vec.size(); ++i) {
+    os << vec[i];
+    if (i != vec.size() - 1) {
+      os << ", ";
+    }
+  }
+  os << "]";
+  return os;
+}
+
+using IrTensor = paddle::dialect::IrTensor;
+
+template <typename T>
+auto CreatePyFuncRunner(int64_t py_func_ptr, const std::string &op_name) {
+  static_assert(std::is_same_v<T, Tensor> || std::is_same_v<T, IrTensor>,
+                "T must be either Tensor or paddle::dialect::IrTensor");
+  using FuncInputType = std::vector<std::shared_ptr<T>>;
+  using FuncOutputType = std::vector<std::shared_ptr<T>>;
+
+  // 返回 Lambda
+  return [=](const FuncInputType &inputs) -> FuncOutputType {
+    py::gil_scoped_acquire acquire;  // 获取 GIL
+
+    py::handle func_handle =
+        py::handle(reinterpret_cast<PyObject *>(py_func_ptr));
+    if (!func_handle.ptr()) {
+      throw std::runtime_error("Python function pointer is null for op: " +
+                               op_name);
+    }
+    py::function py_func = py::reinterpret_borrow<py::function>(func_handle);
+
+    py::tuple py_args(inputs.size());
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      py_args[i] = py::cast(inputs[i]);
+    }
+
+    py::object result;
+    try {
+      result = py_func(*py_args);  // 解包调用
+    } catch (const py::error_already_set &e) {
+      throw std::runtime_error("Execution of customPythonOp (" + op_name +
+                               ") failed: " + e.what());
+    }
+
+    FuncOutputType outputs;
+    auto cast_to_shared = [&](const py::handle &h) -> std::shared_ptr<T> {
+      if (h.is_none()) return nullptr;
+      try {
+        return h.cast<std::shared_ptr<T>>();
+      } catch (const py::cast_error &) {
+        throw std::runtime_error("Output of customPythonOp (" + op_name +
+                                 ") is not of expected type.");
+      }
+    };
+
+    if (py::isinstance<py::tuple>(result)) {
+      py::tuple tuple_result = py::cast<py::tuple>(result);
+      outputs.reserve(tuple_result.size());
+      for (const auto &item : tuple_result) {
+        outputs.push_back(cast_to_shared(item));
+      }
+    } else {
+      outputs.push_back(cast_to_shared(result));
+    }
+
+    return outputs;
+  };
+}
+
+static PyObject *run_custom_pyop(PyObject *self,
+                                 PyObject *args,
+                                 PyObject *kwargs) {
+  VLOG(6) << "Call run_custom_pyop";
+
+  if (kwargs == NULL) {
+    PyErr_SetString(
+        PyExc_TypeError,
+        "kwargs cannot be NULL. Please add inputs/outputs/attr/inplace_map!");
+    return NULL;
+  }
+
+  PyObject *py_op_name = PyDict_GetItemString(kwargs, "name");
+  PyObject *py_input_names = PyDict_GetItemString(kwargs, "input_names");
+  PyObject *py_output_names = PyDict_GetItemString(kwargs, "output_names");
+  PyObject *py_attrs_dict = PyDict_GetItemString(kwargs, "attrs");
+  PyObject *py_inplace_dict = PyDict_GetItemString(kwargs, "inplace_map");
+
+  if (!py_op_name || !py_input_names || !py_output_names || !py_attrs_dict ||
+      !py_inplace_dict) {
+    PyErr_SetString(
+        PyExc_KeyError,
+        "Required key (inputs/outputs/attr/inplace_map) missing from kwargs.");
+    ThrowExceptionToPython(std::current_exception());
+    return nullptr;
+  }
+
+  std::string op_name;
+  std::vector<std::string> inputs_vec;
+  std::vector<std::string> outputs_vec;
+  std::unordered_map<std::string, uintptr_t> attrs_map;
+  std::unordered_map<std::string, std::string> op_inplace_map;
+
+  if (py_str_to_cpp_str(py_op_name, &op_name) == -1 ||
+      py_list_to_vector_string(py_input_names, &inputs_vec) == -1 ||
+      py_list_to_vector_string(py_output_names, &outputs_vec) == -1 ||
+      parse_attrs_dict(py_attrs_dict, &attrs_map) == -1 ||
+      py_dict_to_unordered_map_string(py_inplace_dict, &op_inplace_map) == -1) {
+    PyErr_SetString(PyExc_KeyError,
+                    "inputs/outputs/attr/inplace_map is Empty!");
+    return nullptr;
+  }
+
+  std::cout << "Get things from python for Custom PyOp: [" << op_name << "]"
+            << std::endl;
+  std::cout << "op_name: " << op_name << std::endl;
+  std::cout << "inputs: " << inputs_vec << std::endl;
+  std::cout << "outputs: " << outputs_vec << std::endl;
+  std::cout << "attrs[infer_meta_fn_ptr]: " << attrs_map["infer_meta_fn_ptr"]
+            << std::endl;
+  std::cout << "attrs[fn_ptr]: " << attrs_map["fn_ptr"] << std::endl;
+
+  const auto &meta_info_map = OpMetaInfoMap::Instance().GetMap();
+
+  auto py_func = CreatePyFuncRunner<Tensor>(attrs_map["fn_ptr"], op_name);
+  auto infer_meta_py_func =
+      CreatePyFuncRunner<IrTensor>(attrs_map["infer_meta_fn_ptr"], op_name);
+
+  if (meta_info_map.find(op_name) == meta_info_map.end()) {
+    std::cout << "We need to register this op first! " << op_name << std::endl;
+    // 不存在先注册一下
+    paddle::framework::RegisterPythonOperator(
+        op_name,
+        std::move(inputs_vec),
+        std::move(outputs_vec),
+        {"infer_meta_fn_ptr: int64_t", "fn_ptr: int64_t"},
+        std::move(op_inplace_map),
+        std::move(py_func),
+        std::move(infer_meta_py_func));
+  }
+
+  // 不知道为啥不能跑?
+  PADDLE_ENFORCE_NE(meta_info_map.find(op_name),
+                    meta_info_map.end(),
+                    common::errors::NotFound(
+                        "Can't find %s in Eager OpMetaInfoMap which should be "
+                        "created by LoadOpMetaInfoAndRegisterOp, please make "
+                        "sure you registered your op first and try again. ",
+                        op_name));
+
+  const auto &vec_map = meta_info_map.at(op_name);
+  const auto &inputs = paddle::OpMetaInfoHelper::GetInputs(vec_map[0]);
+  const auto &attrs = paddle::OpMetaInfoHelper::GetAttrs(vec_map[0]);
+  const auto &outputs = paddle::OpMetaInfoHelper::GetOutputs(vec_map[0]);
+  const auto &inplace_map = paddle::OpMetaInfoHelper::GetInplaceMap(vec_map[0]);
+  const auto &inplace_reverse_map =
+      paddle::OpMetaInfoHelper::GetInplaceReverseMap(vec_map[0]);
+  // auto infershape_func = OpMetaInfoHelper::GetInferShapeFn(vec_map[0]);
+  // auto inferdtype_func = OpMetaInfoHelper::GetInferDtypeFn(vec_map[0]);
+
+  std::string pir_op_name =
+      paddle::framework::kPythonOperatorDialectPrefix + op_name;
+  if (!inplace_map.empty()) {
+    pir_op_name += "_";
+  }
+  pir::IrContext *ctx = pir::IrContext::Instance();
+  pir::OpInfo pir_info = ctx->GetRegisteredOpInfo(pir_op_name);
+  pir::OperationArgument argument(pir_info);
+  std::vector<pir::Value> argument_inputs;
+  std::vector<pir::Type> argument_outputs;
+
+  std::vector<std::vector<int64_t>> input_shapes;
+  std::vector<DataType> input_dtypes;
+  std::unordered_map<std::string, int> input_name2id_map;
+  std::vector<std::vector<std::vector<int64_t>>> vec_input_shapes;
+  std::vector<std::vector<DataType>> vec_input_dtypes;
+  std::unordered_map<std::string, int> vec_input_name2id_map;
+  std::vector<paddle::any> custom_attrs;
+  int input_index = 0;
+  int vec_input_index = 0;
+
+  std::vector<std::shared_ptr<IrTensor>> inputs_ptr_vector;
+
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    const auto &input = inputs.at(i);
+    PyObject *obj = PyTuple_GET_ITEM(args, i);
+    // Emplace Py_None from python, this means optional inputs passed to C++,
+    // use one un-initialized tensor to indicate both Tensor and
+    // vector<Tensor> inputs.
+    if (obj == Py_None) {
+      std::cout << "Add un-initialized tensor "
+                   "because the optional input is None"
+                << std::endl;
+      if (paddle::framework::detail::IsDuplicableVar(input)) {
+        std::vector<std::vector<int64_t>> vec_input_shape;
+        std::vector<DataType> vec_input_dtype;
+        vec_input_shapes.emplace_back(vec_input_shape);
+        vec_input_dtypes.emplace_back(vec_input_dtype);
+        vec_input_name2id_map[inputs[i]] = vec_input_index;
+        vec_input_index++;
+      } else {
+        std::vector<int64_t> input_shape;
+        DataType input_dtype = DataType::UNDEFINED;
+        input_shapes.emplace_back(input_shape);
+        input_dtypes.emplace_back(input_dtype);
+        input_name2id_map[inputs[i]] = input_index;
+        input_index++;
+      }
+      argument_inputs.emplace_back();
+      continue;
+    }
+    if (paddle::framework::detail::IsDuplicableVar(input)) {
+      // std::vector<std::vector<int64_t>> tmp_input_shapes;
+      // std::vector<phi::DataType> tmp_input_dtypes;
+      // vec_input_name2id_map[inputs[i]] = vec_input_index;
+      // vec_input_index++;
+      // std::vector<pir::Value> input_values =
+      //     CastPyArg2VectorOfValue(obj, op_name, i, false);
+      // for (auto &input_value : input_values) {
+      //   paddle::dialect::DenseTensorType input_tensor =
+      //       input_value.type().dyn_cast<paddle::dialect::DenseTensorType>();
+      //   tmp_input_shapes.push_back(phi::vectorize(input_tensor.dims()));
+      //   tmp_input_dtypes.push_back(
+      //       paddle::dialect::TransToPhiDataType(input_tensor.dtype()));
+      // }
+      // vec_input_shapes.push_back(tmp_input_shapes);
+      // vec_input_dtypes.push_back(tmp_input_dtypes);
+      // auto combine_op = paddle::dialect::ApiBuilder::Instance()
+      //                       .GetBuilder()
+      //                       ->Build<pir::CombineOp>(input_values);
+      // argument_inputs.push_back(combine_op.out());
+    } else {
+      input_name2id_map[inputs[i]] = input_index;
+      input_index++;
+      pir::Value input_value =
+          CastPyArg2Value(obj, op_name, i, false);  // NOLINT
+      paddle::dialect::DenseTensorType input_tensor =
+          input_value.type().dyn_cast<paddle::dialect::DenseTensorType>();
+      // input_shapes.push_back(phi::vectorize(input_tensor.dims()));
+      // input_dtypes.push_back(
+      //     paddle::dialect::TransToPhiDataType(input_tensor.dtype()));
+      argument_inputs.push_back(input_value);
+
+      inputs_ptr_vector.push_back(std::make_shared<IrTensor>(
+          paddle::dialect::TransToPhiDataType(input_tensor.dtype()),
+          input_tensor.dims(),
+          input_tensor.data_layout(),
+          {}));
+    }
+  }
+  argument.AddInputs(argument_inputs);
+  std::cout << "finish argument.AddInputs(argument_inputs)! " << std::endl;
+
+  custom_attrs.push_back(attrs_map["infer_meta_fn_ptr"]);
+  custom_attrs.push_back(attrs_map["fn_ptr"]);
+  argument.AddAttribute(
+      "infer_meta_fn_ptr",
+      pir::Int64Attribute::get(pir::IrContext::Instance(),
+                               attrs_map["infer_meta_fn_ptr"]));
+  argument.AddAttribute("fn_ptr",
+                        pir::Int64Attribute::get(pir::IrContext::Instance(),
+                                                 attrs_map["fn_ptr"]));
+
+  // 做 infer_meta
+  std::vector<std::shared_ptr<IrTensor>> process_result =
+      infer_meta_py_func(inputs_ptr_vector);
+  PADDLE_ENFORCE_EQ(
+      process_result.size(),
+      outputs.size(),
+      common::errors::InvalidArgument(
+          "%d and %d not equal!", process_result.size(), outputs.size()));
+
+  dialect::ProcessMeshAttribute op_mesh;
+  bool run_auto_parallel = false;
+  std::vector<pir::Attribute> dist_result_attrs;
+  phi::distributed::SpmdInfo spmd_info;
+  if (dialect::HasDistInput(argument_inputs, &op_mesh)) {
+    VLOG(7) << "Custom Op: " << op_name << " InferSPMD";
+    run_auto_parallel = true;
+    spmd_info = paddle::framework::RunInferSpmd(
+        vec_map[0], op_name, op_mesh, argument_inputs, custom_attrs);
+  }
+
+  size_t all_values_num = 0;
+  // output name -> value num (that output should hold)
+  std::unordered_map<std::string, size_t> output_name2value_num;
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    const auto &output = outputs.at(i);
+    if (paddle::framework::detail::IsDuplicableVar(output)) {
+      PADDLE_ENFORCE_NE(
+          inplace_reverse_map.find(output),
+          inplace_reverse_map.end(),
+          common::errors::InvalidArgument(
+              "Only support vector output that is set for inplace, Please use "
+              "`SetInplaceMap` in your output when registry custom operator."));
+      const auto &input = inplace_reverse_map.at(output);
+      auto index = vec_input_name2id_map[input];
+      auto &vec_input_shape = vec_input_shapes[index];
+      output_name2value_num[output] = vec_input_shape.size();
+    } else {
+      if (inplace_reverse_map.find(output) != inplace_reverse_map.end()) {
+        const auto &input = inplace_reverse_map.at(output);
+        auto index = input_name2id_map[input];
+        // input_shapes[index] is dim of tensor, if the dim doesn't have
+        // element, it must be a optional tensor that is None in custom operator
+        output_name2value_num[output] = input_shapes[index].size() == 0 ? 0 : 1;
+      } else {
+        ++(output_name2value_num[output]);
+      }
+    }
+    all_values_num += output_name2value_num[output];
+  }
+
+  std::cout << "all_values_num: " << all_values_num << std::endl;
+
+  if (run_auto_parallel) {
+    PADDLE_ENFORCE_EQ(
+        spmd_info.second.size(),
+        all_values_num,
+        common::errors::InvalidArgument(
+            "The number of output dist_attr after running custom operator's "
+            "InferSPMD is wrong, "
+            "expected contains %d Tensors' dist_attr, but actually contains %d "
+            "Tensors' dist_attr",
+            all_values_num,
+            spmd_info.second.size()));
+  }
+
+  size_t value_index = 0;
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    const auto &output = outputs.at(i);
+    auto value_num = output_name2value_num[output];
+    if (value_num == 0) {
+      // Optional value condition
+      pir::Type out_type;
+      argument_outputs.push_back(out_type);
+      continue;
+    }
+    if (paddle::framework::detail::IsDuplicableVar(output)) {
+      std::vector<pir::Type> out_types;
+      std::vector<pir::Attribute> dist_attrs;
+      for (size_t j = 0; j < value_num; ++j) {
+        // auto ddims = phi::make_ddim(output_shapes[value_index]);
+        // auto dtype = output_dtypes[value_index];
+        // phi::DataLayout layout{DataLayout::NCHW};
+        // phi::LegacyLoD lod;
+        // auto type = paddle::dialect::DenseTensorType::get(
+        //     pir::IrContext::Instance(),
+        //     paddle::dialect::TransToIrDataType(dtype),
+        //     ddims,
+        //     layout,
+        //     lod,
+        //     0);
+
+        // if (run_auto_parallel) {
+        //   auto dist_attr =
+        //   dialect::CvtToPirAttr(spmd_info.second[value_index]);
+        //   out_types.push_back(dialect::CvtToPirDistType(type, dist_attr));
+        //   dist_attrs.push_back(dist_attr);
+        // } else {
+        //   out_types.push_back(std::move(type));
+        // }
+        // value_index++;
+      }
+      pir::Type out_vector_type =
+          pir::VectorType::get(pir::IrContext::Instance(), out_types);
+      argument_outputs.push_back(out_vector_type);
+      if (run_auto_parallel) {
+        dist_result_attrs.push_back(
+            pir::ArrayAttribute::get(pir::IrContext::Instance(), dist_attrs));
+      }
+    } else {
+      auto dense_out = *(process_result[value_index]);
+      auto out_type = paddle::dialect::DenseTensorType::get(
+          pir::IrContext::Instance(),
+          paddle::dialect::TransToIrDataType(dense_out.dtype()),
+          dense_out.dims(),
+          dense_out.layout(),
+          dense_out.lod(),
+          dense_out.offset());
+      if (run_auto_parallel) {
+        auto dist_attr = dialect::CvtToPirAttr(spmd_info.second[value_index]);
+        argument_outputs.push_back(
+            dialect::CvtToPirDistType(out_type, dist_attr));
+        dist_result_attrs.push_back(dist_attr);
+      } else {
+        argument_outputs.push_back(out_type);
+      }
+      value_index++;
+    }
+  }
+
+  // construct operator_dist_attr
+  if (run_auto_parallel) {
+    std::vector<pir::Attribute> dist_operand_attrs;
+    for (auto &arg_dist : spmd_info.first) {
+      dist_operand_attrs.push_back(dialect::CvtToPirAttr(arg_dist));
+    }
+    auto op_dist_attr = dialect::OperationDistAttribute::get(
+        ctx, op_mesh, dist_operand_attrs, dist_result_attrs);
+    std::ostringstream print_stream;
+    print_stream << op_dist_attr;
+    VLOG(7) << "Custom Op: " << op_name << " InferSPMD Operator dist attr"
+            << print_stream.str();
+    argument.AddAttribute(
+        kAttrOpDistAttr,
+        dialect::OperationDistAttribute::get(
+            ctx, op_mesh, dist_operand_attrs, dist_result_attrs));
+  }
+
+  argument.AddOutputs(argument_outputs.begin(), argument_outputs.end());
+  ::pir::PassStopGradientsDefaultly(argument);
+  CallStackRecorder callstack_recorder("_run_custom_pyop");
+  callstack_recorder.Record();
+  std::vector<pir::Value> op_results;
+  pir::Operation *op =
+      paddle::dialect::ApiBuilder::Instance().GetBuilder()->Build(
+          std::move(argument));
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    const auto &output = outputs.at(i);
+    if (paddle::framework::detail::IsDuplicableVar(output)) {
+      if (op->result(i).type().dyn_cast<pir::VectorType>()) {
+        auto split_op = paddle::dialect::ApiBuilder::Instance()
+                            .GetBuilder()
+                            ->Build<pir::SplitOp>(op->result(i));
+        auto split_outputs = split_op.outputs();
+        op_results.insert(
+            op_results.end(), split_outputs.begin(), split_outputs.end());
+      }
+    } else {
+      op_results.push_back(op->result(i));
+    }
+  }
+  callstack_recorder.AttachToOps();
+  VLOG(0) << "return ToPyObject(op_results);";
+  return ToPyObject(op_results);
+}
+
 static PyObject *builtin_combine_op(PyObject *self,
                                     PyObject *args,
                                     PyObject *kwargs) {
@@ -1294,6 +1736,10 @@ static PyMethodDef ManualOpsAPI[] = {
      (PyCFunction)(void (*)(void))run_custom_op,
      METH_VARARGS | METH_KEYWORDS,
      "C++ interface function for run_custom_op."},
+    {"_run_custom_pyop",
+     (PyCFunction)(void (*)(void))run_custom_pyop,
+     METH_VARARGS | METH_KEYWORDS,
+     "C++ interface function for run_custom_pyop."},
     {"builtin_combine",
      (PyCFunction)(void (*)(void))builtin_combine_op,
      METH_VARARGS | METH_KEYWORDS,
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index 9f247a9c9e295c..3a3aea33964c77 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -996,10 +996,10 @@ using InferSpmdFunc = phi::distributed::SpmdInfo (*)(
     const std::vector<CustomSpmdInferAttrArg>& attrs);
 
 using PythonOperatorFunctionType =
-    std::function<std::vector<Tensor>(std::vector<Tensor>&)>;
+    std::function<std::vector<Tensor*>(std::vector<Tensor*>&)>;
 using IrTensor = paddle::dialect::IrTensor;
 using PythonOperatorInferMetaFunctionType =
-    std::function<std::vector<IrTensor>(const std::vector<IrTensor>&)>;
+    std::function<std::vector<IrTensor*>(const std::vector<IrTensor*>&)>;
 
 class PADDLE_API OpMetaInfo {
  public:
diff --git a/paddle/pir/include/core/operation.h b/paddle/pir/include/core/operation.h
index cce23e35ec8067..462e3a758fa495 100644
--- a/paddle/pir/include/core/operation.h
+++ b/paddle/pir/include/core/operation.h
@@ -75,7 +75,7 @@ class alignas(8) Operation final : public DoubleLevelContainer<Operation> {
       const std::vector<pir::Value> &inputs,
       const AttributeMap &attributes,
       const std::vector<pir::Type> &output_types,
-      pir::OpInfo op_info,
+      const pir::OpInfo &op_info,
       size_t num_regions = 0,
       const std::vector<Block *> &successors = {},
       bool verify = true);
diff --git a/paddle/pir/src/core/operation.cc b/paddle/pir/src/core/operation.cc
index 3b073760700c5c..05fcd737835c1f 100644
--- a/paddle/pir/src/core/operation.cc
+++ b/paddle/pir/src/core/operation.cc
@@ -54,7 +54,7 @@ Operation *Operation::Create(OperationArgument &&argument) {
 Operation *Operation::Create(const std::vector<Value> &inputs,
                              const AttributeMap &attributes,
                              const std::vector<Type> &output_types,
-                             pir::OpInfo op_info,
+                             const pir::OpInfo &op_info,
                              size_t num_regions,
                              const std::vector<Block *> &successors,
                              bool verify) {
diff --git a/python/paddle/jit/sot/opcode_translator/executor/function_graph.py b/python/paddle/jit/sot/opcode_translator/executor/function_graph.py
index d1703fd9000e39..548f10f478763a 100644
--- a/python/paddle/jit/sot/opcode_translator/executor/function_graph.py
+++ b/python/paddle/jit/sot/opcode_translator/executor/function_graph.py
@@ -756,7 +756,7 @@ def fallback_symbolic_to_constant(args, kwargs, err):
                     # TODO(zrr1999): maybe we can continue to fallback to all args are constant.
                     raise BreakGraphError(
                         InferMetaBreak(
-                            f"InferMeta encountered {type(err)}, but all args are not symbolic."
+                            f"InferMeta encountered {type(err)}, but all args are not symbolic.\n\n, {err}"
                         )
                     )
 
@@ -782,7 +782,7 @@ def fallback_symbolic_to_constant(args, kwargs, err):
                 ):
                     raise BreakGraphError(
                         InferMetaBreak(
-                            f"InferMeta encountered {type(err)}, but all args are not symbolic."
+                            f"InferMeta encountered {type(err)}, but all args are not symbolic.\n\n, {err}"
                         )
                     )
 
diff --git a/python/paddle/static/custom_pyop.py b/python/paddle/static/custom_pyop.py
new file mode 100644
index 00000000000000..3c6881bb4ea430
--- /dev/null
+++ b/python/paddle/static/custom_pyop.py
@@ -0,0 +1,293 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import inspect
+import types
+from collections.abc import Sequence
+from functools import partial, wraps
+from typing import (
+    Any,
+    Callable,
+    ParamSpec,
+    TypeVar,
+    overload,
+)
+
+import paddle
+from paddle import _C_ops
+from paddle.static.meta_tensor import MetaTensorWrapper
+
+HAS_ARGS_OR_KWARGS: int = inspect.CO_VARARGS | inspect.CO_VARKEYWORDS
+
+
+P1 = ParamSpec("P1")
+R1 = TypeVar("R1")
+P2 = ParamSpec("P2")
+R2 = TypeVar("R2")
+
+
+class MissingArgument:
+    def __init__(self, fn: Callable[P1, R1], name: str):
+        self.fn = fn
+        self.name = name
+
+    def __repr__(self):
+        return f"<Required parameter '{self.name}' for function {self.fn.__name__}>"
+
+
+def extract_default(fn: Callable[P1, R1], parameter: inspect.Parameter):
+    if parameter.kind is inspect.Parameter.VAR_POSITIONAL:
+        return ()
+    elif parameter.kind is inspect.Parameter.VAR_KEYWORD:
+        return {}
+    elif parameter.default is inspect.Parameter.empty:
+        return MissingArgument(fn, parameter.name)
+    return parameter.default
+
+
+def get_fn_defaults_params(fn: Callable[P1, R1]) -> tuple:
+    fn_defaults_params = [
+        extract_default(fn, param)
+        for param in inspect.signature(fn).parameters.values()
+    ]
+    for i, default in enumerate(fn_defaults_params):
+        if not isinstance(default, MissingArgument):
+            fn_defaults_params = fn_defaults_params[i:]
+            break
+    return tuple(fn_defaults_params)
+
+
+def eliminate_positional_only(fn: Callable[P1, R1]) -> Callable[P1, R1]:
+    code = fn.__code__
+    co_flags: int = code.co_flags & ~HAS_ARGS_OR_KWARGS
+    co_flags = code.co_flags
+
+    # TODO: currently, only support Python3.10
+    if hasattr(code, "co_posonlyargcount"):
+        argcount = (
+            code.co_argcount
+            + code.co_kwonlyargcount
+            + bool(code.co_flags & inspect.CO_VARARGS)
+            + bool(code.co_flags & inspect.CO_VARKEYWORDS)
+        )
+
+        new_code = types.CodeType(
+            argcount,  # co_argcount
+            0,  # posonlyargcount
+            0,  # kwonlyargcount
+            code.co_nlocals,
+            code.co_stacksize,
+            co_flags,
+            code.co_code,
+            code.co_consts,
+            code.co_names,
+            code.co_varnames,
+            code.co_filename,
+            code.co_name,
+            code.co_firstlineno,
+            code.co_lnotab,
+            code.co_freevars,
+            code.co_cellvars,
+        )
+    else:
+        raise ValueError
+
+    fn_defaults_params = get_fn_defaults_params(fn)
+    new_fn = types.FunctionType(
+        new_code,
+        fn.__globals__,
+        fn.__name__,
+        fn_defaults_params,
+        fn.__closure__,
+    )
+    new_fn.__name__ = fn.__name__
+    new_fn.__doc__ = fn.__doc__
+    new_fn.__annotations__ = fn.__annotations__
+    new_fn.__kwdefaults__ = None
+    return new_fn
+
+
+def bind_constants(fn, infer_meta, input_names, *args, **kwargs):
+    sig = inspect.signature(fn)
+    bound_args = sig.bind(*args, **kwargs)
+    bound_args.apply_defaults()
+    params = bound_args.arguments
+
+    # NOTE: dict 可以保留顺序
+    mutable_params = {}
+    const_params = {}
+
+    for k, v in params.items():
+        if isinstance(v, paddle.pir.Value):
+            mutable_params[k] = v
+        else:
+            const_params[k] = v
+
+    mutable_arg_names = list(mutable_params.keys())
+    fn = eliminate_positional_only(fn)
+    infer_meta = eliminate_positional_only(infer_meta)
+    return (
+        mutable_arg_names,
+        partial(fn, **const_params),
+        partial(infer_meta, **const_params),
+        list(mutable_params.values()),
+        const_params,
+    )
+
+
+def run_in_dynamic_mode(fn):
+    def dynamic_mode_fn(*args, **kwargs):
+        with paddle.base.dygraph.base.guard():
+            try:
+                return fn(*args, **kwargs)
+            except Exception as e:
+                print(e)
+                raise
+
+    return dynamic_mode_fn
+
+
+from collections.abc import Mapping
+
+
+def custom_hash(obj):
+    if isinstance(obj, (int, float, str, bool, bytes)):
+        return hash(obj)
+
+    # 2. 已知可哈希容器 (如 tuple, frozenset)
+    if isinstance(obj, (tuple, frozenset)):
+        try:
+            return hash(obj)
+        except TypeError:
+            pass
+
+    # 3. 不可哈希的 Sequence (如 list)
+    if isinstance(obj, (Sequence, set)):
+        # 使用 (0, ...) 前缀是为了区分列表和元组的哈希值
+        try:
+            return hash((0, *tuple(custom_hash(item) for item in obj)))
+        except TypeError:
+            pass
+
+    # 4. 不可哈希的 Mapping (如 dict)
+    if isinstance(obj, Mapping):
+        # 使用 (1, ...) 前缀是为了区分字典和其他容器的哈希值
+        try:
+            items_hashed = tuple(
+                sorted((custom_hash(k), custom_hash(v)) for k, v in obj.items())
+            )
+            return hash((1, *items_hashed))
+        except TypeError:
+            pass
+
+    try:
+        return hash(obj)
+    except TypeError:
+        return id(obj)
+
+
+@overload
+def register_op(
+    fn: Callable[P1, R1],
+    /,
+    *,
+    name: str | None = None,
+    infer_meta: Callable[..., Any] | None = None,
+    input_names: list[str] | None = None,
+    output_names: list[str] | None = None,
+    inplace_map: list[str, str] | None = None,
+) -> Callable[P1, R1]: ...
+
+
+@overload
+def register_op(
+    fn: None = None,
+    /,
+    *,
+    name: str | None = None,
+    infer_meta: Callable[..., Any] | None = None,
+    input_names: list[str] | None = None,
+    output_names: list[str] | None = None,
+    inplace_map: list[str, str] | None = None,
+) -> Callable[[Callable[P1, R1]], Callable[P1, R1]]: ...
+
+
+def register_op(
+    fn: Callable[P1, R1] | None = None,
+    /,
+    *,
+    name: str | None = None,
+    infer_meta: Callable[..., Any] | None = None,
+    input_names: list[str] | None = None,
+    output_names: list[str] | None = None,
+    inplace_map: list[str, str] | None = None,
+):
+    """
+    注册算子的装饰器，支持传入元数据推导函数和输入输出配置。
+    """
+
+    # 内部装饰器逻辑
+    def _register_op(
+        real_fn: Callable[P1, R1],
+    ) -> Callable[P1, R1]:
+        op_name = name or real_fn.__name__
+
+        @paddle.jit.marker.unified
+        @wraps(real_fn)
+        def wrapped_fn(*args: P1.args, **kwargs: P1.kwargs) -> R1:
+            if paddle.in_dynamic_mode():
+                return real_fn(*args, **kwargs)
+
+            (
+                mutable_arg_names,
+                bound_constants_fn,
+                bound_constants_infer_meta,
+                args,
+                const_params,
+            ) = bind_constants(
+                real_fn, infer_meta, input_names, *args, **kwargs
+            )
+            assert len(mutable_arg_names) == len(input_names), (
+                f"{mutable_arg_names=} != {input_names=}"
+            )
+
+            const_params_hash = custom_hash(const_params)
+
+            # 调用底层算子运行逻辑
+            out = _C_ops._run_custom_pyop(
+                *args,  # kwargs 还需要吗？
+                name=f"{op_name}_{const_params_hash}",  # 每次绑定一次，说明绑定的
+                # inputs=inputs,
+                input_names=input_names,
+                output_names=output_names,
+                attrs={
+                    "infer_meta_fn_ptr": MetaTensorWrapper(
+                        bound_constants_infer_meta
+                    ),
+                    "fn_ptr": run_in_dynamic_mode(bound_constants_fn),
+                },
+                inplace_map=inplace_map or {},
+            )
+
+            return out[0] if len(output_names) == 1 else out
+
+        return wrapped_fn
+
+    # 处理装饰器调用的两种方式：
+    # 1. @register_op(...) -> fn is None
+    # 2. @register_op -> fn is not None (不带括号，但在本例中不适用，因为必须传参)
+    if fn is None:
+        return _register_op
+    return _register_op(fn)
diff --git a/python/paddle/static/meta_tensor.py b/python/paddle/static/meta_tensor.py
index 1c96015deef308..24c7124d5a7cb0 100644
--- a/python/paddle/static/meta_tensor.py
+++ b/python/paddle/static/meta_tensor.py
@@ -12,15 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import wraps
+
 from ..base.libpaddle import IrMetaTensor, IrTensor
 
 
 class MetaTensor:
-    def __init__(self, shape=[], dtype="float32"):
-        self.ir_tensor = IrTensor()
+    def __init__(self, *, shape=None, dtype=None, ir_tensor=None):
+        self.ir_tensor = IrTensor() if ir_tensor is None else ir_tensor
         self.ir_meta_tensor = IrMetaTensor(self.ir_tensor)
-        self.ir_meta_tensor.set_shape(shape)
-        self.ir_meta_tensor.set_dtype(dtype)
+        if shape is not None:
+            self.ir_meta_tensor.set_shape(shape)
+        if dtype is not None:
+            self.ir_meta_tensor.set_dtype(dtype)
 
     def set_shape(self, shape):
         self.ir_meta_tensor.set_shape(shape)
@@ -41,3 +45,22 @@ def __eq__(self, other):
             self.ir_meta_tensor.dtype == other.ir_meta_tensor.dtype
             and self.ir_meta_tensor.shape == other.ir_meta_tensor.shape
         )
+
+
+def MetaTensorWrapper(fn):
+    @wraps(fn)
+    def wrapper(*args, **kwargs):
+        # IrTensor -> MetaTensor
+        new_args = list(args)
+        for i, arg in enumerate(args):
+            if isinstance(arg, IrTensor):
+                new_args[i] = MetaTensor(ir_tensor=arg)
+        for key, value in kwargs.items():
+            if isinstance(value, IrTensor):
+                kwargs[key] = MetaTensor(ir_tensor=value)
+        outputs = fn(*new_args, **kwargs)
+        if isinstance(outputs, (list, tuple)):
+            return [output.ir_tensor for output in outputs]
+        return outputs.ir_tensor
+
+    return wrapper

From e14ac1f0bc661c67e8721c84d2a3945546a53ac2 Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Mon, 15 Dec 2025 17:16:43 +0800
Subject: [PATCH 04/33] recover

---
 .../custom_pyop_func_instruction.cc           | 31 +++-----
 .../fluid/pybind/manual_static_op_function.h  | 73 ++++++++-----------
 paddle/phi/api/ext/op_meta_info.h             |  4 +-
 3 files changed, 43 insertions(+), 65 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc b/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
index 5d3fb17d49b948..e9033831c2460c 100644
--- a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
@@ -380,26 +380,20 @@ CustomPyOpFuncInstruction::CustomPyOpFuncInstruction(
   VLOG(6) << "finish process no need buffer";
 }
 
-using IrTensor = paddle::dialect::IrTensor;
-
 void CustomPyOpFuncInstruction::UpdateOutputMeta() {
   VLOG(0) << "enter CustomPyOpFuncInstruction::UpdateOutputMeta()";
 
-  std::vector<IrTensor> vec_dense_inputs;
+  std::vector<paddle::dialect::IrTensor> vec_dense_inputs;
   for (size_t i = 0; i < this->op_->operands().size(); ++i) {
-    vec_dense_inputs.emplace_back(IrTensor());
+    vec_dense_inputs.emplace_back(paddle::dialect::IrTensor());
     vec_dense_inputs.back().SetDims(phi::make_ddim(input_shapes_[i]));
     vec_dense_inputs.back().SetDtype(input_dtypes_[i]);
   }
 
-  std::vector<IrTensor*> vec_ptr_inputs_;
-  for (auto& x : vec_dense_inputs) {
-    vec_ptr_inputs_.push_back(&x);
-  }
-
   VLOG(0) << "CustomPyOpFuncInstruction finish vec_dense_inputs";
 
-  std::vector<IrTensor*> output = (*py_func_infer_meta_ptr_)(vec_ptr_inputs_);
+  std::vector<paddle::dialect::IrTensor> output =
+      (*py_func_infer_meta_ptr_)(vec_dense_inputs);
 
   VLOG(0) << "CustomPyOpFuncInstruction finish "
              "(*py_func_infer_meta_ptr_)(vec_dense_inputs);";
@@ -409,8 +403,8 @@ void CustomPyOpFuncInstruction::UpdateOutputMeta() {
     // update dims and dtype
     phi::DenseTensorMeta* out_meta =
         phi::DenseTensorUtils::GetMutableMeta(out_in_scope);
-    out_meta->dims = output[i]->dims();
-    out_meta->dtype = output[i]->dtype();
+    out_meta->dims = output[i].dims();
+    out_meta->dtype = output[i].dtype();
     out_meta->strides = out_meta->calc_strides(out_meta->dims);
   }
 
@@ -470,25 +464,18 @@ void CustomPyOpFuncInstruction::Run() {
       common::errors::InvalidArgument("Custom kernel function is nullptr."));
 
   // 这里假设只有俩参数
-  std::vector<Tensor*> vec_dense_inputs;
+  std::vector<Tensor> vec_dense_inputs;
   size_t num = op_->num_operands();
   VLOG(0) << "Op num_operands: " << num;
   for (size_t i = 0; i < num; ++i) {
-    vec_dense_inputs.push_back(&custom_kernel_ctx_.InputAt(i));
+    vec_dense_inputs.push_back(custom_kernel_ctx_.InputAt(i));
   }
 
   // VLOG(0) << "vec_dense_inputs[0]: " << vec_dense_inputs[0];
   // VLOG(0) << "vec_dense_inputs[1]: " << vec_dense_inputs[1];
 
   auto out = (*py_func_ptr_)(vec_dense_inputs);
-
-  std::vector<Tensor> out_tensor_vector;
-  for (auto& x : out) {
-    out_tensor_vector.emplace_back(*x);
-  }
-
-  custom_kernel_ctx_.ValidateAndAssignOutputs(
-      out_tensor_vector);  // 从宏里面扒出来
+  custom_kernel_ctx_.ValidateAndAssignOutputs(out);  // 从宏里面扒出来
   if (FLAGS_check_cuda_error) [[unlikely]] {
     CUDAErrorCheck("CustomPyOpFuncInstruction " + custom_op_name_ + " finish");
   }
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index d9ba54bca5c1a2..cb5c34be494a47 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1011,55 +1011,46 @@ template <typename T>
 auto CreatePyFuncRunner(int64_t py_func_ptr, const std::string &op_name) {
   static_assert(std::is_same_v<T, Tensor> || std::is_same_v<T, IrTensor>,
                 "T must be either Tensor or paddle::dialect::IrTensor");
-  using FuncInputType = std::vector<std::shared_ptr<T>>;
-  using FuncOutputType = std::vector<std::shared_ptr<T>>;
-
-  // 返回 Lambda
-  return [=](const FuncInputType &inputs) -> FuncOutputType {
-    py::gil_scoped_acquire acquire;  // 获取 GIL
-
-    py::handle func_handle =
-        py::handle(reinterpret_cast<PyObject *>(py_func_ptr));
-    if (!func_handle.ptr()) {
-      throw std::runtime_error("Python function pointer is null for op: " +
-                               op_name);
-    }
-    py::function py_func = py::reinterpret_borrow<py::function>(func_handle);
+
+  using FuncInputType = std::conditional_t<std::is_same_v<T, IrTensor>,
+                                           const std::vector<IrTensor>,
+                                           std::vector<Tensor>>;
+
+  using FuncOutputType = std::conditional_t<std::is_same_v<T, IrTensor>,
+                                            std::vector<IrTensor>,
+                                            std::vector<Tensor>>;
+
+  return [=](FuncInputType &inputs) -> FuncOutputType {
+    py::gil_scoped_acquire acquire;
+    PyObject *py_func = reinterpret_cast<PyObject *>(py_func_ptr);
 
     py::tuple py_args(inputs.size());
-    for (size_t i = 0; i < inputs.size(); ++i) {
-      py_args[i] = py::cast(inputs[i]);
+    size_t index = 0;
+    for (auto &tensor : inputs) {
+      py_args[index++] = py::cast(tensor);
     }
+    Py_INCREF(py_func);
+    PyObject *raw_result = PyObject_CallObject(py_func, py_args.ptr());
+    Py_DECREF(py_func);
 
-    py::object result;
-    try {
-      result = py_func(*py_args);  // 解包调用
-    } catch (const py::error_already_set &e) {
-      throw std::runtime_error("Execution of customPythonOp (" + op_name +
-                               ") failed: " + e.what());
-    }
+    PADDLE_ENFORCE_NOT_NULL(
+        raw_result,
+        common::errors::Fatal(
+            "Execution of the customPythonOp (%s) failed. Please review your "
+            "code, and you may use breakpoint() for debugging.",
+            op_name));
 
-    FuncOutputType outputs;
-    auto cast_to_shared = [&](const py::handle &h) -> std::shared_ptr<T> {
-      if (h.is_none()) return nullptr;
-      try {
-        return h.cast<std::shared_ptr<T>>();
-      } catch (const py::cast_error &) {
-        throw std::runtime_error("Output of customPythonOp (" + op_name +
-                                 ") is not of expected type.");
-      }
-    };
+    py::object result = py::reinterpret_steal<py::object>(raw_result);
+    std::vector<T> outputs;
 
     if (py::isinstance<py::tuple>(result)) {
       py::tuple tuple_result = py::cast<py::tuple>(result);
-      outputs.reserve(tuple_result.size());
       for (const auto &item : tuple_result) {
-        outputs.push_back(cast_to_shared(item));
+        outputs.push_back(py::cast<T>(item));
       }
     } else {
-      outputs.push_back(cast_to_shared(result));
+      outputs.push_back(py::cast<T>(result));
     }
-
     return outputs;
   };
 }
@@ -1176,6 +1167,7 @@ static PyObject *run_custom_pyop(PyObject *self,
   int vec_input_index = 0;
 
   std::vector<std::shared_ptr<IrTensor>> inputs_ptr_vector;
+  std::vector<IrTensor> vec_dense_inputs;
 
   for (size_t i = 0; i < inputs.size(); ++i) {
     const auto &input = inputs.at(i);
@@ -1237,7 +1229,7 @@ static PyObject *run_custom_pyop(PyObject *self,
       //     paddle::dialect::TransToPhiDataType(input_tensor.dtype()));
       argument_inputs.push_back(input_value);
 
-      inputs_ptr_vector.push_back(std::make_shared<IrTensor>(
+      vec_dense_inputs.push_back(paddle::dialect::IrTensor(
           paddle::dialect::TransToPhiDataType(input_tensor.dtype()),
           input_tensor.dims(),
           input_tensor.data_layout(),
@@ -1258,8 +1250,7 @@ static PyObject *run_custom_pyop(PyObject *self,
                                                  attrs_map["fn_ptr"]));
 
   // 做 infer_meta
-  std::vector<std::shared_ptr<IrTensor>> process_result =
-      infer_meta_py_func(inputs_ptr_vector);
+  std::vector<IrTensor> process_result = infer_meta_py_func(vec_dense_inputs);
   PADDLE_ENFORCE_EQ(
       process_result.size(),
       outputs.size(),
@@ -1366,7 +1357,7 @@ static PyObject *run_custom_pyop(PyObject *self,
             pir::ArrayAttribute::get(pir::IrContext::Instance(), dist_attrs));
       }
     } else {
-      auto dense_out = *(process_result[value_index]);
+      auto dense_out = process_result[value_index];
       auto out_type = paddle::dialect::DenseTensorType::get(
           pir::IrContext::Instance(),
           paddle::dialect::TransToIrDataType(dense_out.dtype()),
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index 3a3aea33964c77..9f247a9c9e295c 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -996,10 +996,10 @@ using InferSpmdFunc = phi::distributed::SpmdInfo (*)(
     const std::vector<CustomSpmdInferAttrArg>& attrs);
 
 using PythonOperatorFunctionType =
-    std::function<std::vector<Tensor*>(std::vector<Tensor*>&)>;
+    std::function<std::vector<Tensor>(std::vector<Tensor>&)>;
 using IrTensor = paddle::dialect::IrTensor;
 using PythonOperatorInferMetaFunctionType =
-    std::function<std::vector<IrTensor*>(const std::vector<IrTensor*>&)>;
+    std::function<std::vector<IrTensor>(const std::vector<IrTensor>&)>;
 
 class PADDLE_API OpMetaInfo {
  public:

From a3101ed5882166f74930b2c4f60d29aad606748a Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Mon, 15 Dec 2025 20:26:05 +0800
Subject: [PATCH 05/33] rename some file && function

---
 ... python_operation_function_instruction.cc} | 221 ++++++++----------
 ...> python_operation_function_instruction.h} |  13 +-
 .../framework/new_executor/pir_interpreter.cc |   4 +-
 .../pir/transforms/pd_op_to_kernel_pass.cc    |   1 -
 paddle/phi/api/ext/op_meta_info.h             |   4 +-
 paddle/phi/api/lib/op_meta_info.cc            |   8 +-
 6 files changed, 113 insertions(+), 138 deletions(-)
 rename paddle/fluid/framework/new_executor/instruction/{custom_pyop_func_instruction.cc => python_operation_function_instruction.cc} (68%)
 rename paddle/fluid/framework/new_executor/instruction/{custom_pyop_func_instruction.h => python_operation_function_instruction.h} (86%)

diff --git a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
similarity index 68%
rename from paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
rename to paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
index e9033831c2460c..2db89da0fb08c2 100644
--- a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h"
 #include "paddle/fluid/framework/custom_operator_utils.h"
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
@@ -27,12 +27,12 @@ COMMON_DECLARE_bool(check_cuda_error);
 
 namespace paddle::framework {
 
-void CustomPyOpFuncInstruction::BuildCustomContext(
+void PythonOperationFunctionInstruction::BuildCustomContext(
     const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
   PADDLE_ENFORCE_NOT_NULL(
       custom_op_meta_,
       common::errors::PreconditionNotMet(
-          "CustomPyOpFuncInstruction: custom_op_meta_ is null"));
+          "PythonOperationFunctionInstruction: custom_op_meta_ is null"));
 
   auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
   VLOG(6) << "op_inplace_map.size(): " << op_inplace_map.size();
@@ -58,8 +58,6 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
   }
 
   Scope* inner_scope = value_exec_info_.GetScope();
-  VLOG(6) << "Build custom python op infermeta param inner_scope["
-          << inner_scope << "]";
 
   auto attr_map = op_->attributes();
 
@@ -81,24 +79,25 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
     if (!IsInvalid(ptr)) {
       if (op_yaml_info.GetInputType(op_yaml_info.InputName2Id().at(t)) ==
           "pir::VectorType<paddle::dialect::DenseTensorType>") {
-        vec_input_name2id_map_[t] = vec_input_index;
-        vec_input_index++;
-        vec_input_ptrs_.emplace_back();
-        // NOTE(YuanRisheng): In dygraph mode, we can not distinguish Tensor and
-        // vector<Tensor> when user inputs None, so dygraph mode appends one
-        // un-initialized Tensor to CustomOpKernelContext. To be compatible with
-        // dygraph mode, `custom_vec_in` also emplace_back one un-initialized
-        // tensor here.
-        std::vector<paddle::Tensor> custom_vec_in;
-        custom_vec_in.emplace_back(paddle::Tensor());
-        custom_kernel_ctx_.EmplaceBackInputs(std::move(custom_vec_in));
+        // vec_input_name2id_map_[t] = vec_input_index;
+        // vec_input_index++;
+        // vec_input_ptrs_.emplace_back();
+        // // NOTE(YuanRisheng): In dygraph mode, we can not distinguish Tensor
+        // and
+        // // vector<Tensor> when user inputs None, so dygraph mode appends one
+        // // un-initialized Tensor to CustomOpKernelContext. To be compatible
+        // with
+        // // dygraph mode, `custom_vec_in` also emplace_back one un-initialized
+        // // tensor here.
+        // std::vector<paddle::Tensor> custom_vec_in;
+        // custom_vec_in.emplace_back(paddle::Tensor());
+        // python_operator_function_ctx_.EmplaceBackInputs(std::move(custom_vec_in));
       } else {
         input_name2id_map_[t] = input_index;
         input_index++;
         input_ptrs_.emplace_back(nullptr);
-        custom_kernel_ctx_.EmplaceBackInput(paddle::Tensor());
+        python_operator_function_ctx_.EmplaceBackInput(paddle::Tensor());
       }
-      VLOG(8) << "ctx->EmplaceBackInput : an optional input " << t;
       continue;
     }
     auto in_var_name = value_exec_info_.GetVarName(ptr);
@@ -121,34 +120,33 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
       input_ptrs_.push_back(dense_tensor_in);
       paddle::Tensor custom_in;
       custom_in.set_impl(tensor_in);
-      custom_kernel_ctx_.EmplaceBackInput(std::move(custom_in));
+      python_operator_function_ctx_.EmplaceBackInput(std::move(custom_in));
     } else if (var->IsType<VariableRefArray>()) {
-      std::vector<phi::DenseTensor*> vec_input_ptrs;
-      std::vector<paddle::Tensor> vec_custom_in;
-      auto& variable_array = var->Get<VariableRefArray>();
-      for (size_t i = 0; i < variable_array.size(); ++i) {
-        if (variable_array[i]->IsType<phi::DenseTensor>()) {
-          phi::DenseTensor* dense_tensor_in = const_cast<phi::DenseTensor*>(
-              &(variable_array[i]->Get<phi::DenseTensor>()));
-          std::shared_ptr<phi::DenseTensor> tensor_in(
-              dense_tensor_in, [](phi::DenseTensor* ptr) {
-                VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
-              });
-          vec_input_ptrs.push_back(dense_tensor_in);
-          paddle::Tensor custom_in;
-          custom_in.set_impl(tensor_in);
-          vec_custom_in.push_back(std::move(custom_in));
-        } else {
-          PADDLE_THROW(common::errors::Unimplemented(
-              "Only support Vector<DenseTensor> and vector<SelectedRows> now, "
-              "not support vector<%d>.",
-              variable_array[i]->Type()));
-        }
-      }
-      vec_input_name2id_map_[t] = vec_input_index;
-      vec_input_index++;
-      vec_input_ptrs_.push_back(vec_input_ptrs);
-      custom_kernel_ctx_.EmplaceBackInputs(vec_custom_in);
+      // std::vector<phi::DenseTensor*> vec_input_ptrs;
+      // std::vector<paddle::Tensor> vec_custom_in;
+      // auto& variable_array = var->Get<VariableRefArray>();
+      // for (size_t i = 0; i < variable_array.size(); ++i) {
+      //   if (variable_array[i]->IsType<phi::DenseTensor>()) {
+      //     phi::DenseTensor* dense_tensor_in = const_cast<phi::DenseTensor*>(
+      //         &(variable_array[i]->Get<phi::DenseTensor>()));
+      //     std::shared_ptr<phi::DenseTensor> tensor_in(
+      //         dense_tensor_in, [](phi::DenseTensor* ptr) {
+      //           VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
+      //         });
+      //     vec_input_ptrs.push_back(dense_tensor_in);
+      //     paddle::Tensor custom_in;
+      //     custom_in.set_impl(tensor_in);
+      //     vec_custom_in.push_back(std::move(custom_in));
+      //   } else {
+      //     PADDLE_THROW(common::errors::Unimplemented(
+      //         "Only support Vector<DenseTensor> and vector<SelectedRows> now,
+      //         " "not support vector<%d>.", variable_array[i]->Type()));
+      //   }
+      // }
+      // vec_input_name2id_map_[t] = vec_input_index;
+      // vec_input_index++;
+      // vec_input_ptrs_.push_back(vec_input_ptrs);
+      // python_operator_function_ctx_.EmplaceBackInputs(vec_custom_in);
     } else {
       PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d] ",
                                                  var->Type()));
@@ -169,12 +167,12 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
     if (attr_type_name == "pir::Int32Attribute") {
       custom_attrs_.push_back(
           attr_map[t].dyn_cast<pir::Int32Attribute>().data());
-      custom_kernel_ctx_.EmplaceBackAttr(
+      python_operator_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int32Attribute>().data());
     } else if (attr_type_name == "pir::Int64Attribute") {
       custom_attrs_.push_back(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
-      custom_kernel_ctx_.EmplaceBackAttr(
+      python_operator_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
     } else {
       PADDLE_THROW(common::errors::Unimplemented("attr type not support [%s] ",
@@ -205,14 +203,12 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
               out_name));
       VLOG(3) << "Custom Operator: BuildContext - inplace optional outputs : "
               << out_name << " is None.";
-      custom_kernel_ctx_.EmplaceBackOutput(paddle::Tensor());
+      python_operator_function_ctx_.EmplaceBackOutput(paddle::Tensor());
 
       VLOG(8) << "ctx->EmplaceBackOutput : an optional output";
       continue;
     }
-    VLOG(0) << "WHere am I?";
     if (out_ptr.type().isa<paddle::dialect::AllocatedDenseTensorType>()) {
-      VLOG(0) << "WHere am I?  1111111111";
       auto dense_tensor_out =
           inner_scope->FindVar(value_exec_info_.GetVarName(out_ptr))
               ->GetMutable<phi::DenseTensor>();
@@ -225,43 +221,43 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
       // here only can copy the output tensor into context
       custom_out.set_impl(tensor_out);
 
-      custom_kernel_ctx_.EmplaceBackOutput(std::move(custom_out));
+      python_operator_function_ctx_.EmplaceBackOutput(std::move(custom_out));
       VLOG(8) << "ctx->EmplaceBackOutput DenseTensor: "
               << value_exec_info_.GetVarName(out_ptr);
     } else if (out_ptr.type().isa<pir::VectorType>()) {
-      VLOG(0) << "WHere am I?  222222222222";
-      std::vector<paddle::Tensor> vec_custom_out;
-      auto& variable_array =
-          inner_scope->FindVar(value_exec_info_.GetVarName(out_ptr))
-              ->Get<VariableRefArray>();
-      std::vector<paddle::Tensor> custom_vec_out;
-      PADDLE_ENFORCE(
-          !inplace_id_map.empty() || (i == 0UL && op_->num_results() == 1UL),
-          common::errors::PreconditionNotMet(
-              "If custom operator's outputs contains `paddle::Vec()` type "
-              "without setting InplaceMap, it only can hold one output."));
-      for (size_t j = 0; j < variable_array.size(); ++j) {
-        if (variable_array[j]->IsType<phi::DenseTensor>()) {
-          auto dense_tensor_out = const_cast<phi::DenseTensor*>(
-              &(variable_array[j]->Get<phi::DenseTensor>()));
-          cache_out_ptrs_.emplace_back(dense_tensor_out);
-          std::shared_ptr<phi::DenseTensor> tensor_out(
-              dense_tensor_out, [](phi::DenseTensor* ptr) {
-                VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
-              });
-          paddle::Tensor custom_out;
-          custom_out.set_impl(tensor_out);
-          custom_vec_out.push_back(std::move(custom_out));
-        } else {
-          PADDLE_THROW(common::errors::Unimplemented(
-              "Only support Vector<DenseTensor> now, "
-              "not support vector<%d>.",
-              variable_array[j]->Type()));
-        }
-      }
-      VLOG(8) << "ctx->EmplaceBackOutput VariableRefArray: "
-              << value_exec_info_.GetVarName(out_ptr);
-      custom_kernel_ctx_.EmplaceBackOutputs(custom_vec_out);
+      // VLOG(0) << "WHere am I?  222222222222";
+      // std::vector<paddle::Tensor> vec_custom_out;
+      // auto& variable_array =
+      //     inner_scope->FindVar(value_exec_info_.GetVarName(out_ptr))
+      //         ->Get<VariableRefArray>();
+      // std::vector<paddle::Tensor> custom_vec_out;
+      // PADDLE_ENFORCE(
+      //     !inplace_id_map.empty() || (i == 0UL && op_->num_results() == 1UL),
+      //     common::errors::PreconditionNotMet(
+      //         "If custom operator's outputs contains `paddle::Vec()` type "
+      //         "without setting InplaceMap, it only can hold one output."));
+      // for (size_t j = 0; j < variable_array.size(); ++j) {
+      //   if (variable_array[j]->IsType<phi::DenseTensor>()) {
+      //     auto dense_tensor_out = const_cast<phi::DenseTensor*>(
+      //         &(variable_array[j]->Get<phi::DenseTensor>()));
+      //     cache_out_ptrs_.emplace_back(dense_tensor_out);
+      //     std::shared_ptr<phi::DenseTensor> tensor_out(
+      //         dense_tensor_out, [](phi::DenseTensor* ptr) {
+      //           VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
+      //         });
+      //     paddle::Tensor custom_out;
+      //     custom_out.set_impl(tensor_out);
+      //     custom_vec_out.push_back(std::move(custom_out));
+      //   } else {
+      //     PADDLE_THROW(common::errors::Unimplemented(
+      //         "Only support Vector<DenseTensor> now, "
+      //         "not support vector<%d>.",
+      //         variable_array[j]->Type()));
+      //   }
+      // }
+      // VLOG(8) << "ctx->EmplaceBackOutput VariableRefArray: "
+      //         << value_exec_info_.GetVarName(out_ptr);
+      // python_operator_function_ctx_.EmplaceBackOutputs(custom_vec_out);
     } else {
       PADDLE_THROW(common::errors::Unimplemented(
           "only support DenseTensor and vector "));
@@ -272,11 +268,11 @@ void CustomPyOpFuncInstruction::BuildCustomContext(
   auto& op_outputs = OpMetaInfoHelper::GetOutputs(*custom_op_meta_);
 
   // handle inplace map
-  custom_kernel_ctx_.UpdatePlainOutputs(op_inputs, op_outputs, op_inplace_map);
-  VLOG(6) << "Done build custom context";
+  python_operator_function_ctx_.UpdatePlainOutputs(
+      op_inputs, op_outputs, op_inplace_map);
 }
 
-CustomPyOpFuncInstruction::CustomPyOpFuncInstruction(
+PythonOperationFunctionInstruction::PythonOperationFunctionInstruction(
     size_t id,
     const phi::Place& place,
     pir::Operation* op,
@@ -293,17 +289,12 @@ CustomPyOpFuncInstruction::CustomPyOpFuncInstruction(
       vec_input_ptrs_(),
       cache_out_ptrs_(),
       value_exec_info_(value_exec_info) {
-  std::cout << "CustomPyOpFuncInstruction::CustomPyOpFuncInstruction"
+  std::cout << "PythonOperationFunctionInstruction::"
+               "PythonOperationFunctionInstruction"
             << std::endl;
 
   // auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
   auto op_attributes = op->attributes();
-
-  for (const auto& attr : op_attributes) {
-    VLOG(6) << "111111attr name: " << attr.first
-            << " attr type: " << attr.second;
-  }
-
   auto op_name =
       op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
   custom_op_name_ = op_name;
@@ -330,18 +321,11 @@ CustomPyOpFuncInstruction::CustomPyOpFuncInstruction(
   const auto& op_meta =
       paddle::framework::detail::GetPythonOperatorInfoByPirName(op_name);
   custom_op_meta_ = &op_meta;  // 后面把这个 custom_op_meta_ 删了吧啊？没啥用
-  // infershape_func_ = OpMetaInfoHelper::GetInferShapeFn(op_meta);
-  // inferdtype_func_ = OpMetaInfoHelper::GetInferDtypeFn(op_meta);
 
-  // kernel_func_ = OpMetaInfoHelper::GetKernelFn(op_meta);
   py_func_ptr_ = &(OpMetaInfoHelper::GetPythonOperatorFunction(op_meta));
   py_func_infer_meta_ptr_ =
       &(OpMetaInfoHelper::GetPythonOperatorInferMetaFunction(op_meta));
 
-  // VLOG(6) << "infershape_func_: " << infershape_func_;
-  // VLOG(6) << "inferdtype_func_: " << inferdtype_func_;
-  // VLOG(6) << "kernel_func_: " << kernel_func_;
-
   BuildCustomContext(yaml_info_parser);
   VLOG(6) << "finish process custom context";
   auto kernel_key = op_attributes.at("kernel_key")
@@ -380,8 +364,8 @@ CustomPyOpFuncInstruction::CustomPyOpFuncInstruction(
   VLOG(6) << "finish process no need buffer";
 }
 
-void CustomPyOpFuncInstruction::UpdateOutputMeta() {
-  VLOG(0) << "enter CustomPyOpFuncInstruction::UpdateOutputMeta()";
+void PythonOperationFunctionInstruction::UpdateOutputMeta() {
+  VLOG(0) << "enter PythonOperationFunctionInstruction::UpdateOutputMeta()";
 
   std::vector<paddle::dialect::IrTensor> vec_dense_inputs;
   for (size_t i = 0; i < this->op_->operands().size(); ++i) {
@@ -390,12 +374,12 @@ void CustomPyOpFuncInstruction::UpdateOutputMeta() {
     vec_dense_inputs.back().SetDtype(input_dtypes_[i]);
   }
 
-  VLOG(0) << "CustomPyOpFuncInstruction finish vec_dense_inputs";
+  VLOG(0) << "PythonOperationFunctionInstruction finish vec_dense_inputs";
 
   std::vector<paddle::dialect::IrTensor> output =
       (*py_func_infer_meta_ptr_)(vec_dense_inputs);
 
-  VLOG(0) << "CustomPyOpFuncInstruction finish "
+  VLOG(0) << "PythonOperationFunctionInstruction finish "
              "(*py_func_infer_meta_ptr_)(vec_dense_inputs);";
 
   for (size_t i = 0; i < cache_out_ptrs_.size(); ++i) {
@@ -408,10 +392,10 @@ void CustomPyOpFuncInstruction::UpdateOutputMeta() {
     out_meta->strides = out_meta->calc_strides(out_meta->dims);
   }
 
-  VLOG(0) << "CustomPyOpFuncInstruction finish out_meta";
+  VLOG(0) << "PythonOperationFunctionInstruction finish out_meta";
 }
 
-void CustomPyOpFuncInstruction::BuildShapeDtype() {
+void PythonOperationFunctionInstruction::BuildShapeDtype() {
   input_shapes_.clear();
   input_dtypes_.clear();
   vec_input_shapes_.clear();
@@ -439,9 +423,10 @@ void CustomPyOpFuncInstruction::BuildShapeDtype() {
   }
 }
 
-void CustomPyOpFuncInstruction::Run() {
+void PythonOperationFunctionInstruction::Run() {
   if (FLAGS_check_cuda_error) [[unlikely]] {
-    CUDAErrorCheck("CustomPyOpFuncInstruction " + custom_op_name_ + " begin");
+    CUDAErrorCheck("PythonOperationFunctionInstruction " + custom_op_name_ +
+                   " begin");
   }
 
   VLOG(3) << "Custom Operator: InferShape - calc output ddim.";
@@ -452,13 +437,6 @@ void CustomPyOpFuncInstruction::Run() {
     ShareVarBuffer(pair.first, pair.second);
   }
 
-  // auto& vec_dense_inputs = custom_kernel_ctx_;
-
-  VLOG(6) << "Run custom op " << custom_op_name_ << " kernel.";
-  // check kernel_func_ is nullptr
-  // PADDLE_ENFORCE_NOT_NULL(kernel_func_,
-  //                         common::errors::InvalidArgument(
-  //                             "Custom kernel function is nullptr."));
   PADDLE_ENFORCE_NOT_NULL(
       py_func_ptr_,
       common::errors::InvalidArgument("Custom kernel function is nullptr."));
@@ -468,16 +446,15 @@ void CustomPyOpFuncInstruction::Run() {
   size_t num = op_->num_operands();
   VLOG(0) << "Op num_operands: " << num;
   for (size_t i = 0; i < num; ++i) {
-    vec_dense_inputs.push_back(custom_kernel_ctx_.InputAt(i));
+    vec_dense_inputs.push_back(python_operator_function_ctx_.InputAt(i));
   }
 
-  // VLOG(0) << "vec_dense_inputs[0]: " << vec_dense_inputs[0];
-  // VLOG(0) << "vec_dense_inputs[1]: " << vec_dense_inputs[1];
-
   auto out = (*py_func_ptr_)(vec_dense_inputs);
-  custom_kernel_ctx_.ValidateAndAssignOutputs(out);  // 从宏里面扒出来
+  python_operator_function_ctx_.ValidateAndAssignOutputs(
+      out);  // 从宏里面扒出来
   if (FLAGS_check_cuda_error) [[unlikely]] {
-    CUDAErrorCheck("CustomPyOpFuncInstruction " + custom_op_name_ + " finish");
+    CUDAErrorCheck("PythonOperationFunctionInstruction " + custom_op_name_ +
+                   " finish");
   }
 }
 }  // namespace paddle::framework
diff --git a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h b/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h
similarity index 86%
rename from paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h
rename to paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h
index 70cbeb6fe4e1b5..e46e98bb38223a 100644
--- a/paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h
@@ -25,12 +25,12 @@ class Operation;
 namespace paddle {
 namespace framework {
 class Scope;
-class CustomPyOpFuncInstruction : public InstructionBase {
+class PythonOperationFunctionInstruction : public InstructionBase {
  public:
-  CustomPyOpFuncInstruction(size_t id,
-                            const phi::Place& place,
-                            ::pir::Operation* op,
-                            const ValueExecutionInfo& value_exec_info);
+  PythonOperationFunctionInstruction(size_t id,
+                                     const phi::Place& place,
+                                     ::pir::Operation* op,
+                                     const ValueExecutionInfo& value_exec_info);
 
   ::pir::Operation* Operation() const override { return op_; }
 
@@ -45,10 +45,9 @@ class CustomPyOpFuncInstruction : public InstructionBase {
       const paddle::dialect::OpYamlInfoParser& op_yaml_info);
 
   void BuildShapeDtype();
-
   void UpdateOutputMeta();
 
-  paddle::CustomOpKernelContext custom_kernel_ctx_;
+  paddle::CustomOpKernelContext python_operator_function_ctx_;
   paddle::KernelFunc kernel_func_ = nullptr;
 
   const paddle::PythonOperatorFunctionType* py_func_ptr_ = nullptr;
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index f31a3e2dd96070..4e0c63c0d6ac60 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -62,10 +62,10 @@
 #include "paddle/fluid/framework/new_executor/instruction/control_flow/yield_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/cuda_graph_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.h"
-#include "paddle/fluid/framework/new_executor/instruction/custom_pyop_func_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/tensorrt_engine_instruction.h"
 #include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
 #include "paddle/fluid/pir/dialect/kernel/ir/kernel_attribute.h"
@@ -961,7 +961,7 @@ void PirInterpreter::BuildInstruction() {
               op_idx++, place_, &op, *(value_exe_info_.get())));
     } else if (op.dialect()->name() == "custom_py_func") {
       vec_instruction_base_.emplace_back(
-          std::make_unique<CustomPyOpFuncInstruction>(
+          std::make_unique<PythonOperationFunctionInstruction>(
               op_idx++, place_, &op, *(value_exe_info_.get())));
     } else if (paddle::dialect::IsCustomEngineOp(&op)) {
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 9a2af3f2632f82..11dec59c8c4600 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -3687,7 +3687,6 @@ void ProcessBlock(
     auto kernel_name = GetKernelName(op_info_parser.get(), op_item);
     auto kernel_key = GetKernelKey(
         op_item, place, kernel_name, *map_value_pair, op_info_parser.get());
-    VLOG(0) << "kernel type " << kernel_key;
 
     if (paddle::dialect::IsCustomOp(op_item)) {
       HandleForCustomOp(ctx,
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index 9f247a9c9e295c..f5036c6a785d40 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -1032,8 +1032,8 @@ class PADDLE_API OpMetaInfo {
   OpMetaInfo& SetInferSpmdFn(InferSpmdFunc&& func);
 
   // PythonOperator
-  OpMetaInfo& SetCustomPyOpFunction(PythonOperatorFunctionType&& func);
-  OpMetaInfo& SetCustomPyOpInferMetaFunction(
+  OpMetaInfo& SetPythonOperatorFunction(PythonOperatorFunctionType&& func);
+  OpMetaInfo& SetPythonOperatorInferMetaFunction(
       PythonOperatorInferMetaFunctionType&& func);
 
   bool IsGradOp() const;
diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
index 00832c24d71e4e..e48fbb1a4d9922 100644
--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -450,13 +450,13 @@ OpMetaInfo& OpMetaInfo::SetInferSpmdFn(InferSpmdFunc&& func) {
   infer_spmd_fn_ = std::forward<InferSpmdFunc>(func);
   return *this;
 }
-OpMetaInfo& OpMetaInfo::SetCustomPyOpFunction(
+OpMetaInfo& OpMetaInfo::SetPythonOperatorFunction(
     PythonOperatorFunctionType&& func) {
   pyop_func_ = std::forward<PythonOperatorFunctionType>(func);
   return *this;
 }
 
-OpMetaInfo& OpMetaInfo::SetCustomPyOpInferMetaFunction(
+OpMetaInfo& OpMetaInfo::SetPythonOperatorInferMetaFunction(
     PythonOperatorInferMetaFunctionType&& func) {
   pyop_func_infer_meta_ =
       std::forward<PythonOperatorInferMetaFunctionType>(func);
@@ -704,14 +704,14 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferSpmdFn(InferSpmdFunc func) {
 
 OpMetaInfoBuilder& OpMetaInfoBuilder::SetPythonOperatorFunction(
     PythonOperatorFunctionType func) {
-  info_ptr_->SetCustomPyOpFunction(
+  info_ptr_->SetPythonOperatorFunction(
       std::forward<PythonOperatorFunctionType>(func));
   return *this;
 }
 
 OpMetaInfoBuilder& OpMetaInfoBuilder::SetPythonOperatorInferMetaFunction(
     PythonOperatorInferMetaFunctionType func) {
-  info_ptr_->SetCustomPyOpInferMetaFunction(
+  info_ptr_->SetPythonOperatorInferMetaFunction(
       std::forward<PythonOperatorInferMetaFunctionType>(func));
   return *this;
 }

From 4ca76577bfbefccd9b5829f175e4636e1cfabe49 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Mon, 15 Dec 2025 23:16:30 +0800
Subject: [PATCH 06/33] use NativeMetaTensor instead of IrTensor

---
 .../python_operation_function_instruction.cc  | 51 ++++++-----
 paddle/fluid/pybind/CMakeLists.txt            |  1 +
 .../fluid/pybind/manual_static_op_function.h  | 45 +++++----
 paddle/fluid/pybind/native_meta_tensor.cc     | 83 +++++++++++++++++
 paddle/fluid/pybind/native_meta_tensor.h      | 22 +++++
 paddle/fluid/pybind/pybind.cc                 |  2 +
 paddle/phi/api/ext/native_meta_tensor.h       | 36 ++++++++
 paddle/phi/api/ext/op_meta_info.h             |  6 +-
 paddle/phi/api/lib/CMakeLists.txt             |  1 +
 paddle/phi/api/lib/native_meta_tensor.cc      | 22 +++++
 python/paddle/static/custom_pyop.py           | 18 ++--
 python/paddle/static/meta_tensor.py           | 91 ++++++++++---------
 12 files changed, 281 insertions(+), 97 deletions(-)
 create mode 100644 paddle/fluid/pybind/native_meta_tensor.cc
 create mode 100644 paddle/fluid/pybind/native_meta_tensor.h
 create mode 100644 paddle/phi/api/ext/native_meta_tensor.h
 create mode 100644 paddle/phi/api/lib/native_meta_tensor.cc

diff --git a/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
index 2db89da0fb08c2..8071dd42ce691a 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
@@ -17,6 +17,7 @@
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
 #include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h"
+#include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h"
 #include "paddle/fluid/pir/dialect/operator/utils/utils.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
 #include "paddle/pir/include/core/builtin_attribute.h"
@@ -364,36 +365,36 @@ PythonOperationFunctionInstruction::PythonOperationFunctionInstruction(
   VLOG(6) << "finish process no need buffer";
 }
 
-void PythonOperationFunctionInstruction::UpdateOutputMeta() {
-  VLOG(0) << "enter PythonOperationFunctionInstruction::UpdateOutputMeta()";
+// void PythonOperationFunctionInstruction::UpdateOutputMeta() {
+//   VLOG(0) << "enter PythonOperationFunctionInstruction::UpdateOutputMeta()";
 
-  std::vector<paddle::dialect::IrTensor> vec_dense_inputs;
-  for (size_t i = 0; i < this->op_->operands().size(); ++i) {
-    vec_dense_inputs.emplace_back(paddle::dialect::IrTensor());
-    vec_dense_inputs.back().SetDims(phi::make_ddim(input_shapes_[i]));
-    vec_dense_inputs.back().SetDtype(input_dtypes_[i]);
-  }
+//   std::vector<paddle::dialect::IrTensor> vec_dense_inputs;
+//   for (size_t i = 0; i < this->op_->operands().size(); ++i) {
+//     vec_dense_inputs.emplace_back(paddle::dialect::IrTensor());
+//     vec_dense_inputs.back().SetDims(phi::make_ddim(input_shapes_[i]));
+//     vec_dense_inputs.back().SetDtype(input_dtypes_[i]);
+//   }
 
-  VLOG(0) << "PythonOperationFunctionInstruction finish vec_dense_inputs";
+//   VLOG(0) << "PythonOperationFunctionInstruction finish vec_dense_inputs";
 
-  std::vector<paddle::dialect::IrTensor> output =
-      (*py_func_infer_meta_ptr_)(vec_dense_inputs);
+//   std::vector<paddle::dialect::IrTensor> output =
+//       (*py_func_infer_meta_ptr_)(vec_dense_inputs);
 
-  VLOG(0) << "PythonOperationFunctionInstruction finish "
-             "(*py_func_infer_meta_ptr_)(vec_dense_inputs);";
+//   VLOG(0) << "PythonOperationFunctionInstruction finish "
+//              "(*py_func_infer_meta_ptr_)(vec_dense_inputs);";
 
-  for (size_t i = 0; i < cache_out_ptrs_.size(); ++i) {
-    auto out_in_scope = cache_out_ptrs_.at(i);
-    // update dims and dtype
-    phi::DenseTensorMeta* out_meta =
-        phi::DenseTensorUtils::GetMutableMeta(out_in_scope);
-    out_meta->dims = output[i].dims();
-    out_meta->dtype = output[i].dtype();
-    out_meta->strides = out_meta->calc_strides(out_meta->dims);
-  }
+//   for (size_t i = 0; i < cache_out_ptrs_.size(); ++i) {
+//     auto out_in_scope = cache_out_ptrs_.at(i);
+//     // update dims and dtype
+//     phi::DenseTensorMeta* out_meta =
+//         phi::DenseTensorUtils::GetMutableMeta(out_in_scope);
+//     out_meta->dims = output[i].dims();
+//     out_meta->dtype = output[i].dtype();
+//     out_meta->strides = out_meta->calc_strides(out_meta->dims);
+//   }
 
-  VLOG(0) << "PythonOperationFunctionInstruction finish out_meta";
-}
+//   VLOG(0) << "PythonOperationFunctionInstruction finish out_meta";
+// }
 
 void PythonOperationFunctionInstruction::BuildShapeDtype() {
   input_shapes_.clear();
@@ -432,7 +433,7 @@ void PythonOperationFunctionInstruction::Run() {
   VLOG(3) << "Custom Operator: InferShape - calc output ddim.";
   BuildShapeDtype();
 
-  UpdateOutputMeta();
+  // UpdateOutputMeta();
   for (auto& pair : this->InplaceInfo()) {
     ShareVarBuffer(pair.first, pair.second);
   }
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 3e99ab2744ef10..d9bd49e164204f 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -115,6 +115,7 @@ set(PYBIND_SRCS
     graph.cc
     ir_tensor.cc
     ir_meta_tensor.cc
+    native_meta_tensor.cc
     reader_py.cc
     protobuf.cc
     exception.cc
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index cb5c34be494a47..34b585e358bb41 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -30,6 +30,7 @@
 #include "paddle/fluid/pybind/op_callstack_utils.h"
 #include "paddle/fluid/pybind/op_function_common.h"
 #include "paddle/fluid/pybind/static_op_function.h"
+#include "paddle/phi/api/ext/native_meta_tensor.h"
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/infermeta/spmd_rules/rules.h"
@@ -1009,16 +1010,20 @@ using IrTensor = paddle::dialect::IrTensor;
 
 template <typename T>
 auto CreatePyFuncRunner(int64_t py_func_ptr, const std::string &op_name) {
-  static_assert(std::is_same_v<T, Tensor> || std::is_same_v<T, IrTensor>,
-                "T must be either Tensor or paddle::dialect::IrTensor");
+  static_assert(
+      std::is_same_v<T, Tensor> || std::is_same_v<T, phi::NativeMetaTensor>,
+      "T must be either Tensor or phi::NativeMetaTensor");
 
-  using FuncInputType = std::conditional_t<std::is_same_v<T, IrTensor>,
-                                           const std::vector<IrTensor>,
-                                           std::vector<Tensor>>;
+  using FuncInputType =
+      std::conditional_t<std::is_same_v<T, phi::NativeMetaTensor>,
+                         const std::vector<phi::NativeMetaTensor>,
+                         std::vector<Tensor>>;
 
-  using FuncOutputType = std::conditional_t<std::is_same_v<T, IrTensor>,
-                                            std::vector<IrTensor>,
-                                            std::vector<Tensor>>;
+  // using FuncOutputType = std::conditional_t<std::is_same_v<T,
+  // phi::NativeMetaTensor>,
+  //                                           std::vector<phi::NativeMetaTensor>,
+  //                                           std::vector<Tensor>>;
+  using FuncOutputType = std::vector<T>;
 
   return [=](FuncInputType &inputs) -> FuncOutputType {
     py::gil_scoped_acquire acquire;
@@ -1110,8 +1115,8 @@ static PyObject *run_custom_pyop(PyObject *self,
   const auto &meta_info_map = OpMetaInfoMap::Instance().GetMap();
 
   auto py_func = CreatePyFuncRunner<Tensor>(attrs_map["fn_ptr"], op_name);
-  auto infer_meta_py_func =
-      CreatePyFuncRunner<IrTensor>(attrs_map["infer_meta_fn_ptr"], op_name);
+  auto infer_meta_py_func = CreatePyFuncRunner<phi::NativeMetaTensor>(
+      attrs_map["infer_meta_fn_ptr"], op_name);
 
   if (meta_info_map.find(op_name) == meta_info_map.end()) {
     std::cout << "We need to register this op first! " << op_name << std::endl;
@@ -1166,8 +1171,9 @@ static PyObject *run_custom_pyop(PyObject *self,
   int input_index = 0;
   int vec_input_index = 0;
 
-  std::vector<std::shared_ptr<IrTensor>> inputs_ptr_vector;
-  std::vector<IrTensor> vec_dense_inputs;
+  // std::vector<IrTensor> vec_dense_inputs;
+  std::vector<phi::NativeMetaTensor> inputs_meta;
+  inputs_meta.reserve(inputs.size());
 
   for (size_t i = 0; i < inputs.size(); ++i) {
     const auto &input = inputs.at(i);
@@ -1229,11 +1235,9 @@ static PyObject *run_custom_pyop(PyObject *self,
       //     paddle::dialect::TransToPhiDataType(input_tensor.dtype()));
       argument_inputs.push_back(input_value);
 
-      vec_dense_inputs.push_back(paddle::dialect::IrTensor(
+      inputs_meta.push_back(phi::NativeMetaTensor(
           paddle::dialect::TransToPhiDataType(input_tensor.dtype()),
-          input_tensor.dims(),
-          input_tensor.data_layout(),
-          {}));
+          input_tensor.dims()));
     }
   }
   argument.AddInputs(argument_inputs);
@@ -1250,7 +1254,14 @@ static PyObject *run_custom_pyop(PyObject *self,
                                                  attrs_map["fn_ptr"]));
 
   // 做 infer_meta
-  std::vector<IrTensor> process_result = infer_meta_py_func(vec_dense_inputs);
+  std::vector<phi::NativeMetaTensor> outputs_meta =
+      infer_meta_py_func(inputs_meta);
+  std::vector<IrTensor> process_result;
+  process_result.reserve(outputs.size());
+  for (auto &out_meta : outputs_meta) {
+    process_result.push_back(
+        IrTensor(out_meta.dtype(), out_meta.dims(), phi::DataLayout::NCHW, {}));
+  }
   PADDLE_ENFORCE_EQ(
       process_result.size(),
       outputs.size(),
diff --git a/paddle/fluid/pybind/native_meta_tensor.cc b/paddle/fluid/pybind/native_meta_tensor.cc
new file mode 100644
index 00000000000000..8bf21b3d6e06f8
--- /dev/null
+++ b/paddle/fluid/pybind/native_meta_tensor.cc
@@ -0,0 +1,83 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/api/ext/native_meta_tensor.h"
+#include "paddle/fluid/pybind/native_meta_tensor.h"
+#include "pybind11/functional.h"
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+namespace paddle::pybind {
+
+void BindNativeMetaTensor(py::module* m) {
+  py::class_<phi::NativeMetaTensor>(*m, "NativeMetaTensor")
+      .def(py::init<>())
+      .def(py::init<const phi::NativeMetaTensor&>())
+      .def(
+          "set_shape",
+          [](phi::NativeMetaTensor& self, const std::vector<int64_t>& dims) {
+            phi::DDim ddim = phi::make_ddim(dims);
+            self.set_dims(ddim);
+          },
+          "Set tensor dimensions")
+      .def(
+          "set_dtype",
+          [](phi::NativeMetaTensor& self, const std::string& dtype_str) {
+            self.set_dtype(phi::StringToDataType(dtype_str));
+          },
+          "Set tensor data type from string")
+      .def(
+          "set_dtype",
+          [](phi::NativeMetaTensor& self, const phi::DataType& dtype) {
+            self.set_dtype(dtype);
+          },
+          "Set tensor data type from DataType object")
+      .def_property_readonly(
+          "dtype",
+          [](const phi::NativeMetaTensor& self) -> phi::DataType {
+            return self.dtype();
+          },
+          "Get tensor data type")
+      .def_property_readonly(
+          "shape",
+          [](const phi::NativeMetaTensor& self) -> std::vector<int64_t> {
+            const phi::DDim& dims = self.dims();
+            return common::vectorize<int64_t>(dims);
+          },
+          "Get tensor shape")
+      .def("__eq__",
+           [](const phi::NativeMetaTensor& self,
+              const phi::NativeMetaTensor& other) {
+             return self.dtype() == other.dtype() &&
+                    self.dims() == other.dims();
+           })
+      .def("__repr__", [](const phi::NativeMetaTensor& self) {
+        const phi::DDim& dims = self.dims();
+        std::ostringstream shape_ss;
+        shape_ss << "[";
+        for (int i = 0; i < dims.size(); ++i) {
+          if (i > 0) {
+            shape_ss << ", ";
+          }
+          shape_ss << dims[i];
+        }
+        shape_ss << "]";
+        std::string dtype_str = phi::DataTypeToString(self.dtype());
+        return "NativeMetaTensor(shape=" + shape_ss.str() +
+               ", dtype=" + dtype_str + ")";
+      });
+}
+}  // namespace paddle::pybind
diff --git a/paddle/fluid/pybind/native_meta_tensor.h b/paddle/fluid/pybind/native_meta_tensor.h
new file mode 100644
index 00000000000000..afcd1e7ecffe5e
--- /dev/null
+++ b/paddle/fluid/pybind/native_meta_tensor.h
@@ -0,0 +1,22 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include "pybind11/pybind11.h"
+
+namespace py = pybind11;
+namespace paddle {
+namespace pybind {
+
+void BindNativeMetaTensor(py::module* m);
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 461fdda7b2e715..910ad32b4fcfaf 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -129,6 +129,7 @@ limitations under the License. */
 #include "paddle/fluid/pybind/ir_tensor.h"
 #include "paddle/fluid/pybind/jit.h"
 #include "paddle/fluid/pybind/metrics_py.h"
+#include "paddle/fluid/pybind/native_meta_tensor.h"
 #include "paddle/fluid/pybind/pir.h"
 #include "paddle/fluid/pybind/pybind_variant_caster.h"
 #include "paddle/fluid/pybind/python_callable_registry.h"
@@ -1575,6 +1576,7 @@ PYBIND11_MODULE(libpaddle, m) {
   BindCustomDevicePy(&m);
   BindIrTensor(&m);
   BindIrMetaTensor(&m);
+  BindNativeMetaTensor(&m);
   BindEagerUtils(m.ptr());
   BindOpFunctionCommon(m.ptr());
 
diff --git a/paddle/phi/api/ext/native_meta_tensor.h b/paddle/phi/api/ext/native_meta_tensor.h
new file mode 100644
index 00000000000000..60ff1506f5039b
--- /dev/null
+++ b/paddle/phi/api/ext/native_meta_tensor.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/common/macros.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/core/ddim.h"
+
+namespace phi {
+class PADDLE_API NativeMetaTensor {
+ public:
+  NativeMetaTensor() = default;
+  NativeMetaTensor(phi::DataType dtype, phi::DDim dims)
+      : dtype_(dtype), dims_(dims) {}
+  DDim dims() const;
+  DataType dtype() const;
+  void set_dims(const DDim& dims);
+  void set_dtype(DataType dtype);
+
+ private:
+  phi::DDim dims_;
+  phi::DataType dtype_{phi::DataType::FLOAT32};
+};
+}  // namespace phi
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index f5036c6a785d40..106831c588a876 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -22,7 +22,7 @@ limitations under the License. */
 
 #include "paddle/common/exception.h"
 #include "paddle/common/macros.h"
-#include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h"
+#include "paddle/phi/api/ext/native_meta_tensor.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/core/distributed/type_defs.h"
 #include "paddle/utils/any.h"
@@ -997,9 +997,9 @@ using InferSpmdFunc = phi::distributed::SpmdInfo (*)(
 
 using PythonOperatorFunctionType =
     std::function<std::vector<Tensor>(std::vector<Tensor>&)>;
-using IrTensor = paddle::dialect::IrTensor;
 using PythonOperatorInferMetaFunctionType =
-    std::function<std::vector<IrTensor>(const std::vector<IrTensor>&)>;
+    std::function<std::vector<phi::NativeMetaTensor>(
+        const std::vector<phi::NativeMetaTensor>&)>;
 
 class PADDLE_API OpMetaInfo {
  public:
diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt
index 00edcf3cba5059..cb7d5e8b20fd73 100644
--- a/paddle/phi/api/lib/CMakeLists.txt
+++ b/paddle/phi/api/lib/CMakeLists.txt
@@ -335,6 +335,7 @@ collect_srcs(
   SRCS
   tensor.cc
   op_meta_info.cc
+  native_meta_tensor.cc
   context_pool.cc
   tensor_utils.cc
   kernel_dispatch.cc
diff --git a/paddle/phi/api/lib/native_meta_tensor.cc b/paddle/phi/api/lib/native_meta_tensor.cc
new file mode 100644
index 00000000000000..32d4a2b22c5a7d
--- /dev/null
+++ b/paddle/phi/api/lib/native_meta_tensor.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/api/ext/native_meta_tensor.h"
+
+namespace phi {
+DDim NativeMetaTensor::dims() const { return dims_; }
+DataType NativeMetaTensor::dtype() const { return dtype_; }
+void NativeMetaTensor::set_dims(const DDim& dims) { dims_ = dims; }
+void NativeMetaTensor::set_dtype(DataType dtype) { dtype_ = dtype; }
+}  // namespace phi
diff --git a/python/paddle/static/custom_pyop.py b/python/paddle/static/custom_pyop.py
index 3c6881bb4ea430..93bec47eb099c2 100644
--- a/python/paddle/static/custom_pyop.py
+++ b/python/paddle/static/custom_pyop.py
@@ -17,17 +17,12 @@
 import types
 from collections.abc import Sequence
 from functools import partial, wraps
-from typing import (
-    Any,
-    Callable,
-    ParamSpec,
-    TypeVar,
-    overload,
-)
+from typing import Any, Callable, ParamSpec, TypeVar, overload
 
 import paddle
 from paddle import _C_ops
-from paddle.static.meta_tensor import MetaTensorWrapper
+
+# from paddle.static.meta_tensor import MetaTensorWrapper
 
 HAS_ARGS_OR_KWARGS: int = inspect.CO_VARARGS | inspect.CO_VARKEYWORDS
 
@@ -273,9 +268,10 @@ def wrapped_fn(*args: P1.args, **kwargs: P1.kwargs) -> R1:
                 input_names=input_names,
                 output_names=output_names,
                 attrs={
-                    "infer_meta_fn_ptr": MetaTensorWrapper(
-                        bound_constants_infer_meta
-                    ),
+                    "infer_meta_fn_ptr": bound_constants_infer_meta,
+                    # MetaTensorWrapper(
+                    #     bound_constants_infer_meta
+                    # ),
                     "fn_ptr": run_in_dynamic_mode(bound_constants_fn),
                 },
                 inplace_map=inplace_map or {},
diff --git a/python/paddle/static/meta_tensor.py b/python/paddle/static/meta_tensor.py
index 24c7124d5a7cb0..33262030403c3e 100644
--- a/python/paddle/static/meta_tensor.py
+++ b/python/paddle/static/meta_tensor.py
@@ -12,55 +12,64 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from functools import wraps
 
-from ..base.libpaddle import IrMetaTensor, IrTensor
+from ..base.libpaddle import NativeMetaTensor
 
+# class MetaTensor:
+#     def __init__(self, *, shape=None, dtype=None):
+#         self.native_meta_tensor = NativeMetaTensor()
+#         if shape is not None:
+#             self.native_meta_tensor.set_shape(shape)
+#         if dtype is not None:
+#             self.native_meta_tensor.set_dtype(dtype)
 
-class MetaTensor:
-    def __init__(self, *, shape=None, dtype=None, ir_tensor=None):
-        self.ir_tensor = IrTensor() if ir_tensor is None else ir_tensor
-        self.ir_meta_tensor = IrMetaTensor(self.ir_tensor)
-        if shape is not None:
-            self.ir_meta_tensor.set_shape(shape)
-        if dtype is not None:
-            self.ir_meta_tensor.set_dtype(dtype)
+#     def set_shape(self, shape):
+#         self.native_meta_tensor.set_shape(shape)
 
-    def set_shape(self, shape):
-        self.ir_meta_tensor.set_shape(shape)
+#     @property
+#     def shape(self):
+#         return self.native_meta_tensor.shape
 
-    @property
-    def shape(self):
-        return self.ir_meta_tensor.shape
+#     def set_dtype(self, dtype):
+#         self.native_meta_tensor.set_dtype(dtype)
 
-    def set_dtype(self, dtype):
-        self.ir_meta_tensor.set_dtype(dtype)
+#     @property
+#     def dtype(self):
+#         return self.native_meta_tensor.dtype
 
-    @property
-    def dtype(self):
-        return self.ir_meta_tensor.dtype
+#     def __eq__(self, other):
+#         return (
+#             self.native_meta_tensor.dtype == other.native_meta_tensor.dtype
+#             and self.native_meta_tensor.shape == other.native_meta_tensor.shape
+#         )
 
-    def __eq__(self, other):
-        return (
-            self.ir_meta_tensor.dtype == other.ir_meta_tensor.dtype
-            and self.ir_meta_tensor.shape == other.ir_meta_tensor.shape
-        )
 
+MetaTensor = NativeMetaTensor
 
-def MetaTensorWrapper(fn):
-    @wraps(fn)
-    def wrapper(*args, **kwargs):
-        # IrTensor -> MetaTensor
-        new_args = list(args)
-        for i, arg in enumerate(args):
-            if isinstance(arg, IrTensor):
-                new_args[i] = MetaTensor(ir_tensor=arg)
-        for key, value in kwargs.items():
-            if isinstance(value, IrTensor):
-                kwargs[key] = MetaTensor(ir_tensor=value)
-        outputs = fn(*new_args, **kwargs)
-        if isinstance(outputs, (list, tuple)):
-            return [output.ir_tensor for output in outputs]
-        return outputs.ir_tensor
+# def map_type(fn, type_, structure):
+#     map_fn = lambda v: fn(v) if isinstance(v, type_) else v
+#     return map_structure(map_fn, structure)
 
-    return wrapper
+# def wrap_infer_meta(fn):
+#     @wraps(fn)
+#     def infer_meta(*args, **kwargs):
+#         args, kwargs = map_type((args, kwargs))
+
+
+# def MetaTensorWrapper(fn):
+#     @wraps(fn)
+#     def wrapper(*args, **kwargs):
+#         # IrTensor -> MetaTensor
+#         new_args = list(args)
+#         for i, arg in enumerate(args):
+#             if isinstance(arg, IrTensor):
+#                 new_args[i] = MetaTensor(ir_tensor=arg)
+#         for key, value in kwargs.items():
+#             if isinstance(value, IrTensor):
+#                 kwargs[key] = MetaTensor(ir_tensor=value)
+#         outputs = fn(*new_args, **kwargs)
+#         if isinstance(outputs, (list, tuple)):
+#             return [output.ir_tensor for output in outputs]
+#         return outputs.ir_tensor
+
+#     return wrapper

From d9268674ad916375533a885f21491ebe23711649 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 10:21:14 +0800
Subject: [PATCH 07/33] export `paddle/phi/core/ddim.h`

---
 cmake/inference_lib.cmake | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index c28ba5cc320591..783ad47f33717a 100755
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -392,6 +392,10 @@ copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/enforce.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/ddim.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
 copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/string/*.h

From b7231388d5ce482d3b40d8e610e1660db05747c4 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 14:50:55 +0800
Subject: [PATCH 08/33] rm MetaTensor

---
 .../fluid/pybind/manual_static_op_function.h  | 22 +++---
 paddle/fluid/pybind/native_meta_tensor.cc     | 26 +++++++
 python/paddle/static/__init__.py              |  2 +-
 python/paddle/static/meta_tensor.py           | 75 -------------------
 4 files changed, 38 insertions(+), 87 deletions(-)
 delete mode 100644 python/paddle/static/meta_tensor.py

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 34b585e358bb41..93316a7a68ba11 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1019,10 +1019,6 @@ auto CreatePyFuncRunner(int64_t py_func_ptr, const std::string &op_name) {
                          const std::vector<phi::NativeMetaTensor>,
                          std::vector<Tensor>>;
 
-  // using FuncOutputType = std::conditional_t<std::is_same_v<T,
-  // phi::NativeMetaTensor>,
-  //                                           std::vector<phi::NativeMetaTensor>,
-  //                                           std::vector<Tensor>>;
   using FuncOutputType = std::vector<T>;
 
   return [=](FuncInputType &inputs) -> FuncOutputType {
@@ -1038,12 +1034,14 @@ auto CreatePyFuncRunner(int64_t py_func_ptr, const std::string &op_name) {
     PyObject *raw_result = PyObject_CallObject(py_func, py_args.ptr());
     Py_DECREF(py_func);
 
-    PADDLE_ENFORCE_NOT_NULL(
-        raw_result,
-        common::errors::Fatal(
-            "Execution of the customPythonOp (%s) failed. Please review your "
-            "code, and you may use breakpoint() for debugging.",
-            op_name));
+    if (raw_result == nullptr) {
+      PyErr_Print();
+      PADDLE_THROW(
+          common::errors::Fatal("Execution of the customPythonOp (%s) failed.\n"
+                                "Please review your code, and you may use "
+                                "breakpoint() for debugging.",
+                                op_name));
+    }
 
     py::object result = py::reinterpret_steal<py::object>(raw_result);
     std::vector<T> outputs;
@@ -1253,9 +1251,11 @@ static PyObject *run_custom_pyop(PyObject *self,
                         pir::Int64Attribute::get(pir::IrContext::Instance(),
                                                  attrs_map["fn_ptr"]));
 
-  // 做 infer_meta
+  // Run infer meta
+  VLOG(4) << "Start to run infer meta for " << op_name;
   std::vector<phi::NativeMetaTensor> outputs_meta =
       infer_meta_py_func(inputs_meta);
+  VLOG(4) << "End to run infer meta for " << op_name;
   std::vector<IrTensor> process_result;
   process_result.reserve(outputs.size());
   for (auto &out_meta : outputs_meta) {
diff --git a/paddle/fluid/pybind/native_meta_tensor.cc b/paddle/fluid/pybind/native_meta_tensor.cc
index 8bf21b3d6e06f8..8f891e015d0d2c 100644
--- a/paddle/fluid/pybind/native_meta_tensor.cc
+++ b/paddle/fluid/pybind/native_meta_tensor.cc
@@ -16,6 +16,7 @@
 
 #include "paddle/phi/api/ext/native_meta_tensor.h"
 #include "paddle/fluid/pybind/native_meta_tensor.h"
+#include "paddle/utils/pybind.h"
 #include "pybind11/functional.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@@ -26,6 +27,31 @@ void BindNativeMetaTensor(py::module* m) {
   py::class_<phi::NativeMetaTensor>(*m, "NativeMetaTensor")
       .def(py::init<>())
       .def(py::init<const phi::NativeMetaTensor&>())
+      .def(py::init([](const py::object& dtype, const py::object& shape) {
+             phi::DataType dt = phi::DataType::FLOAT32;
+             if (!dtype.is_none()) {
+               dt = dtype.cast<phi::DataType>();
+             }
+             std::vector<int64_t> dims;
+             if (py::isinstance<py::list>(shape) ||
+                 py::isinstance<py::tuple>(shape)) {
+               dims = shape.cast<std::vector<int64_t>>();
+             } else {
+               PADDLE_THROW(common::errors::InvalidArgument(
+                   "The shape argument must be a list or tuple of integers "
+                   "or None, but got %s.",
+                   py::str(shape)));
+             }
+             return phi::NativeMetaTensor(dt, phi::make_ddim(dims));
+           }),
+           py::arg("dtype") = py::none(),
+           py::arg("shape") = py::list())
+      .def(
+          "copy",
+          [](const phi::NativeMetaTensor& self) {
+            return phi::NativeMetaTensor(self);
+          },
+          "Create a deep copy of this tensor")
       .def(
           "set_shape",
           [](phi::NativeMetaTensor& self, const std::vector<int64_t>& dims) {
diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py
index b77f30cf86bf88..b625b0b80c13a8 100644
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -38,6 +38,7 @@
     set_ipu_shard,
     xpu_places,
 )
+from ..base.libpaddle import NativeMetaTensor as MetaTensor  # noqa: F401
 from ..base.param_attr import WeightNormParamAttr
 from ..tensor.creation import create_global_var, create_parameter
 from . import amp, nn  # noqa: F401
@@ -64,7 +65,6 @@
     serialize_program,
     set_program_state,
 )
-from .meta_tensor import MetaTensor  # noqa: F401
 from .nn.common import ExponentialMovingAverage, py_func
 from .nn.control_flow import Print
 from .nn.metric import accuracy, auc, ctr_metric_bundle
diff --git a/python/paddle/static/meta_tensor.py b/python/paddle/static/meta_tensor.py
deleted file mode 100644
index 33262030403c3e..00000000000000
--- a/python/paddle/static/meta_tensor.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from ..base.libpaddle import NativeMetaTensor
-
-# class MetaTensor:
-#     def __init__(self, *, shape=None, dtype=None):
-#         self.native_meta_tensor = NativeMetaTensor()
-#         if shape is not None:
-#             self.native_meta_tensor.set_shape(shape)
-#         if dtype is not None:
-#             self.native_meta_tensor.set_dtype(dtype)
-
-#     def set_shape(self, shape):
-#         self.native_meta_tensor.set_shape(shape)
-
-#     @property
-#     def shape(self):
-#         return self.native_meta_tensor.shape
-
-#     def set_dtype(self, dtype):
-#         self.native_meta_tensor.set_dtype(dtype)
-
-#     @property
-#     def dtype(self):
-#         return self.native_meta_tensor.dtype
-
-#     def __eq__(self, other):
-#         return (
-#             self.native_meta_tensor.dtype == other.native_meta_tensor.dtype
-#             and self.native_meta_tensor.shape == other.native_meta_tensor.shape
-#         )
-
-
-MetaTensor = NativeMetaTensor
-
-# def map_type(fn, type_, structure):
-#     map_fn = lambda v: fn(v) if isinstance(v, type_) else v
-#     return map_structure(map_fn, structure)
-
-# def wrap_infer_meta(fn):
-#     @wraps(fn)
-#     def infer_meta(*args, **kwargs):
-#         args, kwargs = map_type((args, kwargs))
-
-
-# def MetaTensorWrapper(fn):
-#     @wraps(fn)
-#     def wrapper(*args, **kwargs):
-#         # IrTensor -> MetaTensor
-#         new_args = list(args)
-#         for i, arg in enumerate(args):
-#             if isinstance(arg, IrTensor):
-#                 new_args[i] = MetaTensor(ir_tensor=arg)
-#         for key, value in kwargs.items():
-#             if isinstance(value, IrTensor):
-#                 kwargs[key] = MetaTensor(ir_tensor=value)
-#         outputs = fn(*new_args, **kwargs)
-#         if isinstance(outputs, (list, tuple)):
-#             return [output.ir_tensor for output in outputs]
-#         return outputs.ir_tensor
-
-#     return wrapper

From 88893b55b12bf318135d12e84a5943051becb331 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 15:42:19 +0800
Subject: [PATCH 09/33] `PythonOperationFunctionInstruction` ->
 `PythonFunctionInstruction`

---
 ...tion.cc => python_function_instruction.cc} | 53 ++++---------------
 ...uction.h => python_function_instruction.h} | 10 ++--
 .../framework/new_executor/pir_interpreter.cc |  4 +-
 .../pir/dialect/kernel/ir/kernel_dialect.cc   | 18 +++----
 .../pir/dialect/kernel/ir/kernel_dialect.h    |  6 +--
 .../pir/transforms/pd_op_to_kernel_pass.cc    |  2 +-
 paddle/phi/api/ext/op_meta_info.h             |  2 +-
 paddle/phi/api/lib/op_meta_info.cc            |  2 +-
 8 files changed, 32 insertions(+), 65 deletions(-)
 rename paddle/fluid/framework/new_executor/instruction/{python_operation_function_instruction.cc => python_function_instruction.cc} (89%)
 rename paddle/fluid/framework/new_executor/instruction/{python_operation_function_instruction.h => python_function_instruction.h} (89%)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
similarity index 89%
rename from paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
rename to paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 8071dd42ce691a..5381e178271554 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/python_function_instruction.h"
 #include "paddle/fluid/framework/custom_operator_utils.h"
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
@@ -28,12 +28,12 @@ COMMON_DECLARE_bool(check_cuda_error);
 
 namespace paddle::framework {
 
-void PythonOperationFunctionInstruction::BuildCustomContext(
+void PythonFunctionInstruction::BuildCustomContext(
     const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
   PADDLE_ENFORCE_NOT_NULL(
       custom_op_meta_,
       common::errors::PreconditionNotMet(
-          "PythonOperationFunctionInstruction: custom_op_meta_ is null"));
+          "PythonFunctionInstruction: custom_op_meta_ is null"));
 
   auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
   VLOG(6) << "op_inplace_map.size(): " << op_inplace_map.size();
@@ -273,7 +273,7 @@ void PythonOperationFunctionInstruction::BuildCustomContext(
       op_inputs, op_outputs, op_inplace_map);
 }
 
-PythonOperationFunctionInstruction::PythonOperationFunctionInstruction(
+PythonFunctionInstruction::PythonFunctionInstruction(
     size_t id,
     const phi::Place& place,
     pir::Operation* op,
@@ -290,8 +290,8 @@ PythonOperationFunctionInstruction::PythonOperationFunctionInstruction(
       vec_input_ptrs_(),
       cache_out_ptrs_(),
       value_exec_info_(value_exec_info) {
-  std::cout << "PythonOperationFunctionInstruction::"
-               "PythonOperationFunctionInstruction"
+  std::cout << "PythonFunctionInstruction::"
+               "PythonFunctionInstruction"
             << std::endl;
 
   // auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
@@ -365,38 +365,7 @@ PythonOperationFunctionInstruction::PythonOperationFunctionInstruction(
   VLOG(6) << "finish process no need buffer";
 }
 
-// void PythonOperationFunctionInstruction::UpdateOutputMeta() {
-//   VLOG(0) << "enter PythonOperationFunctionInstruction::UpdateOutputMeta()";
-
-//   std::vector<paddle::dialect::IrTensor> vec_dense_inputs;
-//   for (size_t i = 0; i < this->op_->operands().size(); ++i) {
-//     vec_dense_inputs.emplace_back(paddle::dialect::IrTensor());
-//     vec_dense_inputs.back().SetDims(phi::make_ddim(input_shapes_[i]));
-//     vec_dense_inputs.back().SetDtype(input_dtypes_[i]);
-//   }
-
-//   VLOG(0) << "PythonOperationFunctionInstruction finish vec_dense_inputs";
-
-//   std::vector<paddle::dialect::IrTensor> output =
-//       (*py_func_infer_meta_ptr_)(vec_dense_inputs);
-
-//   VLOG(0) << "PythonOperationFunctionInstruction finish "
-//              "(*py_func_infer_meta_ptr_)(vec_dense_inputs);";
-
-//   for (size_t i = 0; i < cache_out_ptrs_.size(); ++i) {
-//     auto out_in_scope = cache_out_ptrs_.at(i);
-//     // update dims and dtype
-//     phi::DenseTensorMeta* out_meta =
-//         phi::DenseTensorUtils::GetMutableMeta(out_in_scope);
-//     out_meta->dims = output[i].dims();
-//     out_meta->dtype = output[i].dtype();
-//     out_meta->strides = out_meta->calc_strides(out_meta->dims);
-//   }
-
-//   VLOG(0) << "PythonOperationFunctionInstruction finish out_meta";
-// }
-
-void PythonOperationFunctionInstruction::BuildShapeDtype() {
+void PythonFunctionInstruction::BuildShapeDtype() {
   input_shapes_.clear();
   input_dtypes_.clear();
   vec_input_shapes_.clear();
@@ -424,10 +393,9 @@ void PythonOperationFunctionInstruction::BuildShapeDtype() {
   }
 }
 
-void PythonOperationFunctionInstruction::Run() {
+void PythonFunctionInstruction::Run() {
   if (FLAGS_check_cuda_error) [[unlikely]] {
-    CUDAErrorCheck("PythonOperationFunctionInstruction " + custom_op_name_ +
-                   " begin");
+    CUDAErrorCheck("PythonFunctionInstruction " + custom_op_name_ + " begin");
   }
 
   VLOG(3) << "Custom Operator: InferShape - calc output ddim.";
@@ -454,8 +422,7 @@ void PythonOperationFunctionInstruction::Run() {
   python_operator_function_ctx_.ValidateAndAssignOutputs(
       out);  // 从宏里面扒出来
   if (FLAGS_check_cuda_error) [[unlikely]] {
-    CUDAErrorCheck("PythonOperationFunctionInstruction " + custom_op_name_ +
-                   " finish");
+    CUDAErrorCheck("PythonFunctionInstruction " + custom_op_name_ + " finish");
   }
 }
 }  // namespace paddle::framework
diff --git a/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
similarity index 89%
rename from paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h
rename to paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
index e46e98bb38223a..8e0eaf2c24547d 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
@@ -25,12 +25,12 @@ class Operation;
 namespace paddle {
 namespace framework {
 class Scope;
-class PythonOperationFunctionInstruction : public InstructionBase {
+class PythonFunctionInstruction : public InstructionBase {
  public:
-  PythonOperationFunctionInstruction(size_t id,
-                                     const phi::Place& place,
-                                     ::pir::Operation* op,
-                                     const ValueExecutionInfo& value_exec_info);
+  PythonFunctionInstruction(size_t id,
+                            const phi::Place& place,
+                            ::pir::Operation* op,
+                            const ValueExecutionInfo& value_exec_info);
 
   ::pir::Operation* Operation() const override { return op_; }
 
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 4e0c63c0d6ac60..a267feadf33606 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -65,7 +65,7 @@
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h"
-#include "paddle/fluid/framework/new_executor/instruction/python_operation_function_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/python_function_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/tensorrt_engine_instruction.h"
 #include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
 #include "paddle/fluid/pir/dialect/kernel/ir/kernel_attribute.h"
@@ -961,7 +961,7 @@ void PirInterpreter::BuildInstruction() {
               op_idx++, place_, &op, *(value_exe_info_.get())));
     } else if (op.dialect()->name() == "custom_py_func") {
       vec_instruction_base_.emplace_back(
-          std::make_unique<PythonOperationFunctionInstruction>(
+          std::make_unique<PythonFunctionInstruction>(
               op_idx++, place_, &op, *(value_exe_info_.get())));
     } else if (paddle::dialect::IsCustomEngineOp(&op)) {
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
index 490b31e09959f6..6b57822dd2128a 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
@@ -193,25 +193,25 @@ pir::OpPrintFn CustomKernelDialect::PrintOperation(
   };
 }
 
-CustomPyFuncDialect::CustomPyFuncDialect(pir::IrContext *context)
-    : pir::Dialect(name(), context, pir::TypeId::get<CustomPyFuncDialect>()) {
+PythonFunctionDialect::PythonFunctionDialect(pir::IrContext *context)
+    : pir::Dialect(name(), context, pir::TypeId::get<PythonFunctionDialect>()) {
   initialize();
 }
 
-void CustomPyFuncDialect::initialize() {
+void PythonFunctionDialect::initialize() {
   RegisterOps<dialect::CustomPyFuncOp>();
 }
 
-void CustomPyFuncDialect::PrintType(pir::Type type, std::ostream &os) const {
+void PythonFunctionDialect::PrintType(pir::Type type, std::ostream &os) const {
   PrintKernelType(type, os);
 }
 
-void CustomPyFuncDialect::PrintAttribute(pir::Attribute attr,
-                                         std::ostream &os) const {
+void PythonFunctionDialect::PrintAttribute(pir::Attribute attr,
+                                           std::ostream &os) const {
   PrintKernelAttribute(attr, os);
 }
 
-pir::OpPrintFn CustomPyFuncDialect::PrintOperation(
+pir::OpPrintFn PythonFunctionDialect::PrintOperation(
     const pir::Operation &op) const {
   return [](const pir::Operation &op, pir::IrPrinter &printer) {
     auto &os = printer.os;
@@ -226,7 +226,7 @@ pir::OpPrintFn CustomPyFuncDialect::PrintOperation(
             .data()) {
       kernel_name = kernel_name + "_";
     }
-    os << " \"" << kernel_name << "(custom_py_func)\"";
+    os << " \"" << kernel_name << "(py_func)\"";
     printer.PrintOpOperands(op);
     printer.PrintAttributeMap(op);
     os << " :";
@@ -302,7 +302,7 @@ pir::OpPrintFn OneDNNKernelDialect::PrintOperation(
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelDialect)
-IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncDialect)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PythonFunctionDialect)
 #ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
 #endif
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
index 8891ef7618001f..9cb370acfa677c 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
@@ -53,9 +53,9 @@ class CustomKernelDialect : public pir::Dialect {
   void initialize();
 };
 
-class CustomPyFuncDialect : public pir::Dialect {
+class PythonFunctionDialect : public pir::Dialect {
  public:
-  explicit CustomPyFuncDialect(pir::IrContext* context);
+  explicit PythonFunctionDialect(pir::IrContext* context);
 
   static const char* name() { return "custom_py_func"; }
 
@@ -94,7 +94,7 @@ class OneDNNKernelDialect : public pir::Dialect {
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelDialect)
-IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncDialect)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PythonFunctionDialect)
 #ifdef PADDLE_WITH_DNNL
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
 #endif
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 11dec59c8c4600..a769a7b1b0956e 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -3824,7 +3824,7 @@ std::unique_ptr<pir::Program> PdOpLowerToKernelPass(pir::Program* prog,
   ctx->GetOrRegisterDialect<OperatorDialect>();
   ctx->GetOrRegisterDialect<KernelDialect>();
   ctx->GetOrRegisterDialect<CustomKernelDialect>();
-  ctx->GetOrRegisterDialect<CustomPyFuncDialect>();
+  ctx->GetOrRegisterDialect<PythonFunctionDialect>();
 
 #ifdef PADDLE_WITH_DNNL
   ctx->GetOrRegisterDialect<OneDNNOperatorDialect>();
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index 106831c588a876..ab9fb727c53e50 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -1063,7 +1063,7 @@ class PADDLE_API OpMetaInfo {
   InferShapeFunc infer_shape_fn_{nullptr};
   InferDtypeFunc infer_dtype_fn_{nullptr};
   InferSpmdFunc infer_spmd_fn_{nullptr};
-  // 3. custom pyop function
+  // 3. pyop function info
   PythonOperatorFunctionType pyop_func_{nullptr};
   PythonOperatorInferMetaFunctionType pyop_func_infer_meta_{nullptr};
 #ifdef PADDLE_WITH_TENSORRT
diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
index e48fbb1a4d9922..4eb36cc1174b27 100644
--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -531,7 +531,7 @@ const InferSpmdFunc& OpMetaInfoHelper::GetInferSpmdFn(
   return info.infer_spmd_fn_;
 }
 
-// Python Custom Op
+// Python Op
 const PythonOperatorFunctionType& OpMetaInfoHelper::GetPythonOperatorFunction(
     const paddle::OpMetaInfo& info) {
   return info.pyop_func_;

From 2bfd3b766ba60acd80680415335fefb283e17ea8 Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Tue, 16 Dec 2025 15:56:35 +0800
Subject: [PATCH 10/33] 2023 -> 2025

---
 .../new_executor/instruction/python_function_instruction.cc     | 2 +-
 .../new_executor/instruction/python_function_instruction.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 5381e178271554..d678305c689ef0 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
index 8e0eaf2c24547d..a679f14d3cbafb 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

From b8320a82fcca87d3b09fcec7c5480c8be7481e5d Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 16:01:03 +0800
Subject: [PATCH 11/33] refine name in instructions

---
 .../python_function_instruction.cc            | 29 +++++++++----------
 .../instruction/python_function_instruction.h |  4 +--
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index d678305c689ef0..54fd933f1a5557 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -28,7 +28,7 @@ COMMON_DECLARE_bool(check_cuda_error);
 
 namespace paddle::framework {
 
-void PythonFunctionInstruction::BuildCustomContext(
+void PythonFunctionInstruction::BuildPythonFunctionContext(
     const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
   PADDLE_ENFORCE_NOT_NULL(
       custom_op_meta_,
@@ -92,12 +92,12 @@ void PythonFunctionInstruction::BuildCustomContext(
         // // tensor here.
         // std::vector<paddle::Tensor> custom_vec_in;
         // custom_vec_in.emplace_back(paddle::Tensor());
-        // python_operator_function_ctx_.EmplaceBackInputs(std::move(custom_vec_in));
+        // python_function_ctx_.EmplaceBackInputs(std::move(custom_vec_in));
       } else {
         input_name2id_map_[t] = input_index;
         input_index++;
         input_ptrs_.emplace_back(nullptr);
-        python_operator_function_ctx_.EmplaceBackInput(paddle::Tensor());
+        python_function_ctx_.EmplaceBackInput(paddle::Tensor());
       }
       continue;
     }
@@ -121,7 +121,7 @@ void PythonFunctionInstruction::BuildCustomContext(
       input_ptrs_.push_back(dense_tensor_in);
       paddle::Tensor custom_in;
       custom_in.set_impl(tensor_in);
-      python_operator_function_ctx_.EmplaceBackInput(std::move(custom_in));
+      python_function_ctx_.EmplaceBackInput(std::move(custom_in));
     } else if (var->IsType<VariableRefArray>()) {
       // std::vector<phi::DenseTensor*> vec_input_ptrs;
       // std::vector<paddle::Tensor> vec_custom_in;
@@ -147,7 +147,7 @@ void PythonFunctionInstruction::BuildCustomContext(
       // vec_input_name2id_map_[t] = vec_input_index;
       // vec_input_index++;
       // vec_input_ptrs_.push_back(vec_input_ptrs);
-      // python_operator_function_ctx_.EmplaceBackInputs(vec_custom_in);
+      // python_function_ctx_.EmplaceBackInputs(vec_custom_in);
     } else {
       PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d] ",
                                                  var->Type()));
@@ -168,12 +168,12 @@ void PythonFunctionInstruction::BuildCustomContext(
     if (attr_type_name == "pir::Int32Attribute") {
       custom_attrs_.push_back(
           attr_map[t].dyn_cast<pir::Int32Attribute>().data());
-      python_operator_function_ctx_.EmplaceBackAttr(
+      python_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int32Attribute>().data());
     } else if (attr_type_name == "pir::Int64Attribute") {
       custom_attrs_.push_back(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
-      python_operator_function_ctx_.EmplaceBackAttr(
+      python_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
     } else {
       PADDLE_THROW(common::errors::Unimplemented("attr type not support [%s] ",
@@ -204,7 +204,7 @@ void PythonFunctionInstruction::BuildCustomContext(
               out_name));
       VLOG(3) << "Custom Operator: BuildContext - inplace optional outputs : "
               << out_name << " is None.";
-      python_operator_function_ctx_.EmplaceBackOutput(paddle::Tensor());
+      python_function_ctx_.EmplaceBackOutput(paddle::Tensor());
 
       VLOG(8) << "ctx->EmplaceBackOutput : an optional output";
       continue;
@@ -222,7 +222,7 @@ void PythonFunctionInstruction::BuildCustomContext(
       // here only can copy the output tensor into context
       custom_out.set_impl(tensor_out);
 
-      python_operator_function_ctx_.EmplaceBackOutput(std::move(custom_out));
+      python_function_ctx_.EmplaceBackOutput(std::move(custom_out));
       VLOG(8) << "ctx->EmplaceBackOutput DenseTensor: "
               << value_exec_info_.GetVarName(out_ptr);
     } else if (out_ptr.type().isa<pir::VectorType>()) {
@@ -258,7 +258,7 @@ void PythonFunctionInstruction::BuildCustomContext(
       // }
       // VLOG(8) << "ctx->EmplaceBackOutput VariableRefArray: "
       //         << value_exec_info_.GetVarName(out_ptr);
-      // python_operator_function_ctx_.EmplaceBackOutputs(custom_vec_out);
+      // python_function_ctx_.EmplaceBackOutputs(custom_vec_out);
     } else {
       PADDLE_THROW(common::errors::Unimplemented(
           "only support DenseTensor and vector "));
@@ -269,7 +269,7 @@ void PythonFunctionInstruction::BuildCustomContext(
   auto& op_outputs = OpMetaInfoHelper::GetOutputs(*custom_op_meta_);
 
   // handle inplace map
-  python_operator_function_ctx_.UpdatePlainOutputs(
+  python_function_ctx_.UpdatePlainOutputs(
       op_inputs, op_outputs, op_inplace_map);
 }
 
@@ -327,7 +327,7 @@ PythonFunctionInstruction::PythonFunctionInstruction(
   py_func_infer_meta_ptr_ =
       &(OpMetaInfoHelper::GetPythonOperatorInferMetaFunction(op_meta));
 
-  BuildCustomContext(yaml_info_parser);
+  BuildPythonFunctionContext(yaml_info_parser);
   VLOG(6) << "finish process custom context";
   auto kernel_key = op_attributes.at("kernel_key")
                         .dyn_cast<paddle::dialect::KernelAttribute>()
@@ -415,12 +415,11 @@ void PythonFunctionInstruction::Run() {
   size_t num = op_->num_operands();
   VLOG(0) << "Op num_operands: " << num;
   for (size_t i = 0; i < num; ++i) {
-    vec_dense_inputs.push_back(python_operator_function_ctx_.InputAt(i));
+    vec_dense_inputs.push_back(python_function_ctx_.InputAt(i));
   }
 
   auto out = (*py_func_ptr_)(vec_dense_inputs);
-  python_operator_function_ctx_.ValidateAndAssignOutputs(
-      out);  // 从宏里面扒出来
+  python_function_ctx_.ValidateAndAssignOutputs(out);  // 从宏里面扒出来
   if (FLAGS_check_cuda_error) [[unlikely]] {
     CUDAErrorCheck("PythonFunctionInstruction " + custom_op_name_ + " finish");
   }
diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
index a679f14d3cbafb..02fd0b0c7068f8 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
@@ -41,13 +41,13 @@ class PythonFunctionInstruction : public InstructionBase {
   void clear();
 
  private:
-  void BuildCustomContext(
+  void BuildPythonFunctionContext(
       const paddle::dialect::OpYamlInfoParser& op_yaml_info);
 
   void BuildShapeDtype();
   void UpdateOutputMeta();
 
-  paddle::CustomOpKernelContext python_operator_function_ctx_;
+  paddle::CustomOpKernelContext python_function_ctx_;
   paddle::KernelFunc kernel_func_ = nullptr;
 
   const paddle::PythonOperatorFunctionType* py_func_ptr_ = nullptr;

From a4432ed9140f4a776663f7317fa204017fa796bb Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 16:23:22 +0800
Subject: [PATCH 12/33] custom py func -> python function

---
 .../framework/new_executor/pir_interpreter.cc      |  2 +-
 .../fluid/pir/dialect/kernel/ir/kernel_dialect.cc  |  6 +++---
 .../fluid/pir/dialect/kernel/ir/kernel_dialect.h   |  2 +-
 paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc    | 14 +++++++-------
 paddle/fluid/pir/dialect/kernel/ir/kernel_op.h     |  6 +++---
 .../fluid/pir/transforms/pd_op_to_kernel_pass.cc   |  8 ++++----
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index a267feadf33606..66663e37904c85 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -959,7 +959,7 @@ void PirInterpreter::BuildInstruction() {
       vec_instruction_base_.emplace_back(
           std::make_unique<CustomKernelInstruction>(
               op_idx++, place_, &op, *(value_exe_info_.get())));
-    } else if (op.dialect()->name() == "custom_py_func") {
+    } else if (op.dialect()->name() == "py_func") {
       vec_instruction_base_.emplace_back(
           std::make_unique<PythonFunctionInstruction>(
               op_idx++, place_, &op, *(value_exe_info_.get())));
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
index 6b57822dd2128a..c915c8597f319e 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
@@ -199,7 +199,7 @@ PythonFunctionDialect::PythonFunctionDialect(pir::IrContext *context)
 }
 
 void PythonFunctionDialect::initialize() {
-  RegisterOps<dialect::CustomPyFuncOp>();
+  RegisterOps<dialect::PythonFunctionOp>();
 }
 
 void PythonFunctionDialect::PrintType(pir::Type type, std::ostream &os) const {
@@ -217,8 +217,8 @@ pir::OpPrintFn PythonFunctionDialect::PrintOperation(
     auto &os = printer.os;
     printer.PrintOpResult(op);
     os << " =";
-    auto custom_py_func = op.dyn_cast<CustomPyFuncOp>();
-    std::string kernel_name = custom_py_func.kernel_name();
+    auto py_func_op = op.dyn_cast<PythonFunctionOp>();
+    std::string kernel_name = py_func_op.kernel_name();
     if (op.attributes().count("is_inplace") != 0 &&
         op.attributes()
             .at("is_inplace")
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
index 9cb370acfa677c..effc343272b759 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
@@ -57,7 +57,7 @@ class PythonFunctionDialect : public pir::Dialect {
  public:
   explicit PythonFunctionDialect(pir::IrContext* context);
 
-  static const char* name() { return "custom_py_func"; }
+  static const char* name() { return "py_func"; }
 
   void PrintType(pir::Type type, std::ostream& os) const override;
 
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
index ecf159c61da154..a473b7710f7fe4 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
@@ -139,13 +139,13 @@ phi::KernelKey CustomKernelOp::kernel_key() {
   return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
 }
 
-const char* CustomPyFuncOp::attributes_name[attributes_num] = {  // NOLINT
+const char* PythonFunctionOp::attributes_name[attributes_num] = {  // NOLINT
     "op_name",
     "kernel_name",
     "kernel_key"};
 
-void CustomPyFuncOp::VerifySig() {
-  VLOG(4) << "Verifying inputs, outputs and attributes for: CustomPyFuncOp.";
+void PythonFunctionOp::VerifySig() {
+  VLOG(4) << "Verifying inputs, outputs and attributes for: PythonFunctionOp.";
   auto& attributes = this->attributes();
 
   PADDLE_ENFORCE(attributes.count("op_name") > 0 &&
@@ -164,18 +164,18 @@ void CustomPyFuncOp::VerifySig() {
                      "Type of attribute: kernel_key is not right."));
 }
 
-std::string CustomPyFuncOp::op_name() {
+std::string PythonFunctionOp::op_name() {
   return attributes().at("op_name").dyn_cast<pir::StrAttribute>().AsString();
 }
 
-std::string CustomPyFuncOp::kernel_name() {
+std::string PythonFunctionOp::kernel_name() {
   return attributes()
       .at("kernel_name")
       .dyn_cast<pir::StrAttribute>()
       .AsString();
 }
 
-phi::KernelKey CustomPyFuncOp::kernel_key() {
+phi::KernelKey PythonFunctionOp::kernel_key() {
   return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
 }
 
@@ -304,7 +304,7 @@ phi::KernelKey OneDNNLegacyKernelOp::kernel_key() {
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelOp)
-IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncOp)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PythonFunctionOp)
 #ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
index 1e3ebac6260ebe..56abb10f16189d 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
@@ -56,10 +56,10 @@ class CustomKernelOp : public pir::Op<CustomKernelOp> {
   void VerifySig();
 };
 
-class CustomPyFuncOp : public pir::Op<CustomPyFuncOp> {
+class PythonFunctionOp : public pir::Op<PythonFunctionOp> {
  public:
   using Op::Op;
-  static const char *name() { return "custom_py_func"; }
+  static const char *name() { return "py_func"; }
   static constexpr uint32_t attributes_num = 3;
   static const char *attributes_name[attributes_num];
   std::string op_name();
@@ -112,7 +112,7 @@ class OneDNNLegacyKernelOp : public pir::Op<OneDNNLegacyKernelOp> {
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomKernelOp)
-IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::CustomPyFuncOp)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PythonFunctionOp)
 #ifdef PADDLE_WITH_DNNL
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index a769a7b1b0956e..87bb93f339cc09 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -2584,14 +2584,14 @@ void HandleForCustomPyOp(
                        pir::Int64Attribute::get(ctx, op_item->id()));
 
   VLOG(6) << "Lower custom pyop: " << op_item->name()
-          << " to : " << CustomPyFuncOp::name();
+          << " to : " << PythonFunctionOp::name();
 
-  pir::OpInfo custom_py_func_op_info =
-      ctx->GetRegisteredOpInfo(CustomPyFuncOp::name());
+  pir::OpInfo py_func_op_info =
+      ctx->GetRegisteredOpInfo(PythonFunctionOp::name());
 
   pir::Operation* op = nullptr;
   op = pir::Operation::Create(
-      vec_inputs, op_attribute, op_output_types, custom_py_func_op_info);
+      vec_inputs, op_attribute, op_output_types, py_func_op_info);
   op->set_attribute("origin_id", pir::Int64Attribute::get(ctx, op->id()));
 
   (*map_op_pair)[op_item] = op;

From 5c7db9d45b2a41febde76b63d663c4d8c0627c54 Mon Sep 17 00:00:00 2001
From: DrRyanHuang <zihaohuang@aliyun.com>
Date: Tue, 16 Dec 2025 18:44:47 +0800
Subject: [PATCH 13/33] use void* and PointerAttribute

---
 .../python_function_instruction.cc            |  5 +++++
 .../pir/dialect/operator/ir/op_dialect.cc     |  8 ++++----
 .../fluid/pir/dialect/operator/utils/utils.cc |  4 +++-
 paddle/fluid/pybind/eager_utils.cc            |  6 +++---
 paddle/fluid/pybind/eager_utils.h             |  2 +-
 .../fluid/pybind/manual_static_op_function.h  | 20 ++++++++++---------
 paddle/phi/api/lib/op_meta_info.cc            |  5 +++--
 python/paddle/static/custom_pyop.py           |  5 ++---
 8 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 54fd933f1a5557..71300877ad3112 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -175,6 +175,11 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
       python_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
+    } else if (attr_type_name == "pir::PointerAttribute") {
+      custom_attrs_.push_back(
+          attr_map[t].dyn_cast<pir::PointerAttribute>().data());
+      python_function_ctx_.EmplaceBackAttr(
+          attr_map[t].dyn_cast<pir::PointerAttribute>().data());
     } else {
       PADDLE_THROW(common::errors::Unimplemented("attr type not support [%s] ",
                                                  attr_type_name));
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
index db81173c8ac9d2..b0768cf1a09732 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
@@ -576,7 +576,7 @@ struct CustomOpInfoInterfaceModel : public OpYamlInfoInterface::Concept {
             "Supported data types include `bool`, `int`, `float`, "
             "`int64_t`, `std::string`, `std::vector<int>`, "
             "`std::vector<float>`, `std::vector<int64_t>`, "
-            "`std::vector<std::string>`, Please check whether "
+            "`std::vector<std::string>`, `void*`, Please check whether "
             "the attribute data type and data type string are matched.",
             attr_type_str));
       }
@@ -653,14 +653,14 @@ struct PythonOperatorInfoInterfaceModel : public OpYamlInfoInterface::Concept {
     auto& op_attrs = OpMetaInfoHelper::GetAttrs(op_meta);
     for (const auto& op_attr : op_attrs) {
       auto attr_name_and_type = paddle::ParseAttrStr(op_attr);
-      // PythonOperator only has int64_t attr
+      // PythonOperator only has void* attr
       const std::string& attr_name = attr_name_and_type[0];
       const std::string& attr_type_str = attr_name_and_type[1];
       PADDLE_ENFORCE_EQ(
           attr_type_str,
-          "int64_t",
+          "void*",
           common::errors::InvalidArgument(
-              "PythonOperator only has two int64_t attributes, which "
+              "PythonOperator only has two void* attributes, which "
               "are infer_meta_fn_ptr & fn_ptr."));
       param_names.push_back(attr_name);
       const std::string& attr_pir_type =
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
index 878c4e3a785fd4..b157674aee267d 100644
--- a/paddle/fluid/pir/dialect/operator/utils/utils.cc
+++ b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -494,7 +494,9 @@ const std::unordered_map<std::string, std::string>& CppTypeToAttrTypeMap() {
       {"std::vector<int>", "pir::ArrayAttribute<pir::Int32Attribute>"},
       {"std::vector<float>", "pir::ArrayAttribute<pir::FloatAttribute>"},
       {"std::vector<int64_t>", "pir::ArrayAttribute<pir::Int64Attribute>"},
-      {"std::vector<std::string>", "pir::ArrayAttribute<pir::StrAttribute>"}};
+      {"std::vector<std::string>", "pir::ArrayAttribute<pir::StrAttribute>"},
+      {"void*", "pir::PointerAttribute"},
+  };
   return attr_type_map;
 }
 
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 5b9ea5dcb9987a..6636e740b56f85 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -3725,7 +3725,7 @@ int py_str_to_cpp_str(PyObject* py_str, std::string* cpp_str) {
 }
 
 int parse_attrs_dict(PyObject* py_attrs_dict,
-                     std::unordered_map<std::string, uintptr_t>* attrs) {
+                     std::unordered_map<std::string, void*>* attrs) {
   // 检查输入参数是否有效
   if (!py_attrs_dict || !attrs) {
     PyErr_SetString(PyExc_ValueError, "Invalid input parameters");
@@ -3763,8 +3763,8 @@ int parse_attrs_dict(PyObject* py_attrs_dict,
 
   // 将PyObject指针转换为uintptr_t存储
   // 注意：这里存储的是Python函数对象的地址
-  (*attrs)["infer_meta_fn_ptr"] = reinterpret_cast<uintptr_t>(py_infer_meta);
-  (*attrs)["fn_ptr"] = reinterpret_cast<uintptr_t>(py_real_fn);
+  (*attrs)["infer_meta_fn_ptr"] = reinterpret_cast<void*>(py_infer_meta);
+  (*attrs)["fn_ptr"] = reinterpret_cast<void*>(py_real_fn);
 
   return 0;
 }
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 77e25af4879b5c..830a669445ff6a 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -594,7 +594,7 @@ int py_dict_to_unordered_map_string(
 
 int py_str_to_cpp_str(PyObject* py_str, std::string* cpp_str);
 int parse_attrs_dict(PyObject* py_attrs_dict,
-                     std::unordered_map<std::string, uintptr_t>* attrs);
+                     std::unordered_map<std::string, void*>* attrs);
 /*----------------------for arg parse-----------------------------*/
 paddle::Tensor& GetTensorFromArgsOrKWArgs(
     const std::string& op_type,
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 93316a7a68ba11..5d5b972c9e466a 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1009,7 +1009,7 @@ std::ostream &operator<<(std::ostream &os, const std::vector<T> &vec) {
 using IrTensor = paddle::dialect::IrTensor;
 
 template <typename T>
-auto CreatePyFuncRunner(int64_t py_func_ptr, const std::string &op_name) {
+auto CreatePyFuncRunner(void *py_func_ptr, const std::string &op_name) {
   static_assert(
       std::is_same_v<T, Tensor> || std::is_same_v<T, phi::NativeMetaTensor>,
       "T must be either Tensor or phi::NativeMetaTensor");
@@ -1088,7 +1088,7 @@ static PyObject *run_custom_pyop(PyObject *self,
   std::string op_name;
   std::vector<std::string> inputs_vec;
   std::vector<std::string> outputs_vec;
-  std::unordered_map<std::string, uintptr_t> attrs_map;
+  std::unordered_map<std::string, void *> attrs_map;
   std::unordered_map<std::string, std::string> op_inplace_map;
 
   if (py_str_to_cpp_str(py_op_name, &op_name) == -1 ||
@@ -1106,9 +1106,11 @@ static PyObject *run_custom_pyop(PyObject *self,
   std::cout << "op_name: " << op_name << std::endl;
   std::cout << "inputs: " << inputs_vec << std::endl;
   std::cout << "outputs: " << outputs_vec << std::endl;
-  std::cout << "attrs[infer_meta_fn_ptr]: " << attrs_map["infer_meta_fn_ptr"]
+  std::cout << "attrs[infer_meta_fn_ptr]: "
+            << reinterpret_cast<uintptr_t>(attrs_map["infer_meta_fn_ptr"])
             << std::endl;
-  std::cout << "attrs[fn_ptr]: " << attrs_map["fn_ptr"] << std::endl;
+  std::cout << "attrs[fn_ptr]: "
+            << reinterpret_cast<uintptr_t>(attrs_map["fn_ptr"]) << std::endl;
 
   const auto &meta_info_map = OpMetaInfoMap::Instance().GetMap();
 
@@ -1123,7 +1125,7 @@ static PyObject *run_custom_pyop(PyObject *self,
         op_name,
         std::move(inputs_vec),
         std::move(outputs_vec),
-        {"infer_meta_fn_ptr: int64_t", "fn_ptr: int64_t"},
+        {"infer_meta_fn_ptr: void*", "fn_ptr: void*"},
         std::move(op_inplace_map),
         std::move(py_func),
         std::move(infer_meta_py_func));
@@ -1245,11 +1247,11 @@ static PyObject *run_custom_pyop(PyObject *self,
   custom_attrs.push_back(attrs_map["fn_ptr"]);
   argument.AddAttribute(
       "infer_meta_fn_ptr",
-      pir::Int64Attribute::get(pir::IrContext::Instance(),
-                               attrs_map["infer_meta_fn_ptr"]));
+      pir::PointerAttribute::get(pir::IrContext::Instance(),
+                                 attrs_map["infer_meta_fn_ptr"]));
   argument.AddAttribute("fn_ptr",
-                        pir::Int64Attribute::get(pir::IrContext::Instance(),
-                                                 attrs_map["fn_ptr"]));
+                        pir::PointerAttribute::get(pir::IrContext::Instance(),
+                                                   attrs_map["fn_ptr"]));
 
   // Run infer meta
   VLOG(4) << "Start to run infer meta for " << op_name;
diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
index 4eb36cc1174b27..1e71a65a0ddbea 100644
--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -633,7 +633,8 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Attrs(std::vector<std::string>&& attrs) {
        "std::vector<float>",
        "std::vector<int64_t>",
        "std::vector<std::string>",
-       "std::vector<bool>"});
+       "std::vector<bool>",
+       "void*"});
   for (const auto& attr : attrs) {
     auto attr_type_str = ParseAttrStr(attr)[1];
     if (custom_attrs_type.find(attr_type_str) == custom_attrs_type.end()) {
@@ -642,7 +643,7 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Attrs(std::vector<std::string>&& attrs) {
           "Supported data types include `bool`, `int`, `float`, `double`,  "
           "`int64_t`, `std::string`, `std::vector<int>`, "
           "`std::vector<float>`, `std::vector<int64_t>`, "
-          "`std::vector<std::string>`, `std::vector<bool>`, "
+          "`std::vector<std::string>`, `std::vector<bool>`, `void*`"
           "Please check whether the attribute data type and "
           "data type string are matched.",
           attr_type_str));
diff --git a/python/paddle/static/custom_pyop.py b/python/paddle/static/custom_pyop.py
index 93bec47eb099c2..a26c76bde42ad0 100644
--- a/python/paddle/static/custom_pyop.py
+++ b/python/paddle/static/custom_pyop.py
@@ -158,6 +158,8 @@ def dynamic_mode_fn(*args, **kwargs):
 
 
 def custom_hash(obj):
+    # TODO: Check a case
+    # hash(-1) == hash(-2)
     if isinstance(obj, (int, float, str, bool, bytes)):
         return hash(obj)
 
@@ -269,9 +271,6 @@ def wrapped_fn(*args: P1.args, **kwargs: P1.kwargs) -> R1:
                 output_names=output_names,
                 attrs={
                     "infer_meta_fn_ptr": bound_constants_infer_meta,
-                    # MetaTensorWrapper(
-                    #     bound_constants_infer_meta
-                    # ),
                     "fn_ptr": run_in_dynamic_mode(bound_constants_fn),
                 },
                 inplace_map=inplace_map or {},

From 72872482c0908fb83fdf9c46067332f6f2f3d976 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 18:55:37 +0800
Subject: [PATCH 14/33] `HandleForPythonOp`

---
 .../pir/transforms/pd_op_to_kernel_pass.cc     | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 87bb93f339cc09..6c8362aa39a055 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -2505,7 +2505,7 @@ void HandleForCustomOp(
   block->push_back(op);
 }
 
-void HandleForCustomPyOp(
+void HandleForPythonOp(
     pir::IrContext* ctx,
     pir::Operation* op_item,
     const phi::KernelKey& kernel_key,
@@ -3701,14 +3701,14 @@ void ProcessBlock(
     }
 
     if (paddle::dialect::IsCustomPyOp(op_item)) {
-      HandleForCustomPyOp(ctx,
-                          op_item,
-                          kernel_key,
-                          place,
-                          op_info_parser.get(),
-                          map_op_pair,
-                          map_value_pair,
-                          new_block);
+      HandleForPythonOp(ctx,
+                        op_item,
+                        kernel_key,
+                        place,
+                        op_info_parser.get(),
+                        map_op_pair,
+                        map_value_pair,
+                        new_block);
       continue;
     }
 

From cdb9201b7f17cdb5fb7416bc0fa170e7fe308565 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 16 Dec 2025 18:57:40 +0800
Subject: [PATCH 15/33] refine comment

---
 paddle/phi/api/ext/op_meta_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index ab9fb727c53e50..2ffb85f1df443f 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -1091,7 +1091,7 @@ class OpMetaInfoHelper {
   static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info);
   static const InferSpmdFunc& GetInferSpmdFn(const paddle::OpMetaInfo& info);
 
-  // Python Custom Op
+  // Python Op
   static const PythonOperatorFunctionType& GetPythonOperatorFunction(
       const paddle::OpMetaInfo& info);
   static const PythonOperatorInferMetaFunctionType&

From a7ac4f3cef5934ae8c6bddec5de00544b527ace1 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Wed, 17 Dec 2025 21:51:29 +0800
Subject: [PATCH 16/33] normalize eager_utils

---
 paddle/fluid/pybind/eager_utils.cc            | 190 ++++--------------
 paddle/fluid/pybind/eager_utils.h             |  11 +-
 .../fluid/pybind/manual_static_op_function.h  |  40 ++--
 python/paddle/static/custom_pyop.py           |   2 +-
 4 files changed, 56 insertions(+), 187 deletions(-)

diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index 6636e740b56f85..f4fa478df088cc 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -3587,186 +3587,66 @@ void Check_PIR_not_support_out(PyObject* kwargs) {
   }
 }
 
-/**
- * 将 Python 列表 (list of str) 转换为 C++ vector<string>
- * @param py_list 传入的 Python 列表对象
- * @param cpp_vec 用于存储结果的 C++ vector
- * @return 0 成功，-1 失败（例如，列表元素不是字符串）
- */
-int py_list_to_vector_string(PyObject* py_list,
-                             std::vector<std::string>* cpp_vec) {
-  if (!PyList_Check(py_list)) {
-    PyErr_SetString(PyExc_TypeError, "Expected a list for string conversion.");
-    return -1;
-  }
-
-  Py_ssize_t size = PyList_Size(py_list);
-  cpp_vec->reserve(size);
-
-  for (Py_ssize_t i = 0; i < size; ++i) {
-    // 获取列表中的元素 (借用引用)
-    PyObject* item = PyList_GetItem(py_list, i);
-
-    // 检查是否为字符串
-    if (!PyUnicode_Check(item)) {
-      PyErr_SetString(PyExc_TypeError, "List element must be a string.");
-      cpp_vec->clear();  // 清空已转换的元素
-      return -1;
-    }
-
-    // 转换为 C 字符串 (char *)
-    // PyUnicode_AsUTF8AndSize 是推荐的现代方法
-    Py_ssize_t length;
-    const char* c_str = PyUnicode_AsUTF8AndSize(item, &length);
-    if (c_str == NULL) {
-      // PyUnicode_AsUTF8AndSize 失败会设置异常
-      cpp_vec->clear();
-      return -1;
-    }
-
-    // 构造 C++ std::string 并添加到 vector
-    cpp_vec->emplace_back(c_str, length);
-  }
-  return 0;
-}
-
-/**
- * 将 Python 字典 (dict[str, str]) 转换为 C++ unordered_map<string, string>
- * @param py_dict 传入的 Python 字典对象
- * @param cpp_map 用于存储结果的 C++ unordered_map
- * @return 0 成功，-1 失败（例如，key 或 value 不是字符串）
- */
-int py_dict_to_unordered_map_string(
-    PyObject* py_dict, std::unordered_map<std::string, std::string>* cpp_map) {
+std::unordered_map<std::string, std::string> ParseStringDict(
+    PyObject* py_dict) {
   if (!PyDict_Check(py_dict)) {
-    PyErr_SetString(PyExc_TypeError,
-                    "Expected a dictionary for map conversion.");
-    return -1;
+    PADDLE_THROW(common::errors::InvalidType(
+        "Expected a dictionary object, but got %s",
+        reinterpret_cast<PyTypeObject*>(py_dict->ob_type)->tp_name));
   }
 
+  std::unordered_map<std::string, std::string> result;
   PyObject *key, *value;
-  Py_ssize_t pos = 0;  // 迭代器位置
+  Py_ssize_t pos = 0;
 
   while (PyDict_Next(py_dict, &pos, &key, &value)) {
-    // --- 1. 转换 Key ---
-    if (!PyUnicode_Check(key)) {
-      PyErr_SetString(PyExc_TypeError, "Dictionary key must be a string.");
-      cpp_map->clear();
-      return -1;
-    }
-    Py_ssize_t key_len;
-    const char* c_key = PyUnicode_AsUTF8AndSize(key, &key_len);
-    if (c_key == NULL) {  // 转换失败
-      cpp_map->clear();
-      return -1;
+    if (!PyUnicode_Check(key) || !PyUnicode_Check(value)) {
+      PADDLE_THROW(common::errors::InvalidType(
+          "Both keys and values in the dictionary must be strings."));
     }
-    std::string cpp_key(c_key, key_len);
 
-    // --- 2. 转换 Value ---
-    if (!PyUnicode_Check(value)) {
-      PyErr_SetString(PyExc_TypeError, "Dictionary value must be a string.");
-      cpp_map->clear();
-      return -1;
-    }
-    Py_ssize_t value_len;
+    Py_ssize_t key_len, value_len;
+    const char* c_key = PyUnicode_AsUTF8AndSize(key, &key_len);
     const char* c_value = PyUnicode_AsUTF8AndSize(value, &value_len);
-    if (c_value == NULL) {  // 转换失败
-      cpp_map->clear();
-      return -1;
-    }
-    std::string cpp_value(c_value, value_len);
 
-    // --- 3. 插入到 C++ Map ---
-    cpp_map->emplace(std::move(cpp_key), std::move(cpp_value));
-  }
-  return 0;
-}
-
-int py_str_to_cpp_str(PyObject* py_str, std::string* cpp_str) {
-  // 检查输入指针是否有效
-  if (!py_str || !cpp_str) {
-    return -1;
-  }
-
-  // 检查PyObject是否为字符串类型
-  if (!PyUnicode_Check(py_str)) {
-    PyErr_SetString(PyExc_TypeError, "Expected a string object");
-    return -1;
-  }
-
-  // 将Python字符串转换为UTF-8编码的C字符串
-  PyObject* temp_bytes = PyUnicode_AsUTF8String(py_str);
-  if (!temp_bytes) {
-    // 转换失败，Python异常已被设置
-    return -1;
-  }
-
-  // 获取C字符串指针和长度
-  char* c_str = nullptr;
-  Py_ssize_t length = 0;
-  if (PyBytes_AsStringAndSize(temp_bytes, &c_str, &length) == -1) {
-    Py_DECREF(temp_bytes);
-    return -1;
-  }
+    if (c_key == NULL || c_value == NULL) {
+      PADDLE_THROW(common::errors::External(
+          "Failed to convert Python string to C string."));
+    }
 
-  // 将内容复制到std::string
-  try {
-    *cpp_str = std::string(c_str, length);
-  } catch (const std::exception& e) {
-    Py_DECREF(temp_bytes);
-    PyErr_SetString(PyExc_RuntimeError, "Failed to create std::string");
-    return -1;
+    result.emplace(std::string(c_key, key_len),
+                   std::string(c_value, value_len));
   }
 
-  // 释放临时字节对象
-  Py_DECREF(temp_bytes);
-
-  return 0;
+  return result;
 }
-
-int parse_attrs_dict(PyObject* py_attrs_dict,
-                     std::unordered_map<std::string, void*>* attrs) {
-  // 检查输入参数是否有效
-  if (!py_attrs_dict || !attrs) {
-    PyErr_SetString(PyExc_ValueError, "Invalid input parameters");
-    return -1;
-  }
-
-  // 检查是否为字典类型
-  if (!PyDict_Check(py_attrs_dict)) {
-    PyErr_SetString(PyExc_TypeError, "Expected a dictionary object");
-    return -1;
+std::unordered_map<std::string, void*> ParsePythonOpAttrs(PyObject* py_dict) {
+  if (!PyDict_Check(py_dict)) {
+    PADDLE_THROW(common::errors::InvalidType(
+        "Unknown python op attributes type, expected dict, but got %s",
+        reinterpret_cast<PyTypeObject*>(py_dict->ob_type)->tp_name));
   }
 
-  // 获取特定的函数对象
-  PyObject* py_infer_meta =
-      PyDict_GetItemString(py_attrs_dict, "infer_meta_fn_ptr");
-  PyObject* py_real_fn = PyDict_GetItemString(py_attrs_dict, "fn_ptr");
-
+  PyObject* py_infer_meta = PyDict_GetItemString(py_dict, "infer_meta_fn_ptr");
+  PyObject* py_real_fn = PyDict_GetItemString(py_dict, "fn_ptr");
   if (!py_infer_meta || !py_real_fn) {
-    PyErr_SetString(PyExc_KeyError,
-                    "Missing required keys 'infer_meta_fn_ptr' or 'fn_ptr'");
-    return -1;
+    PADDLE_THROW(common::errors::NotFound(
+        "Missing required keys 'infer_meta_fn_ptr' or 'fn_ptr' in op attrs"));
   }
 
-  // 检查是否为可调用对象
   if (!PyCallable_Check(py_infer_meta) || !PyCallable_Check(py_real_fn)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        "Expected callable objects for 'infer_meta_fn_ptr' and 'fn_ptr'");
-    return -1;
+    PADDLE_THROW(common::errors::InvalidType(
+        "Expected callable objects for 'infer_meta_fn_ptr' and 'fn_ptr'"));
   }
 
-  // 增加引用计数，确保在C++端使用时不会被垃圾回收
+  // Increase reference count to prevent garbage collection in C++
   Py_INCREF(py_infer_meta);
   Py_INCREF(py_real_fn);
+  std::unordered_map<std::string, void*> attrs;
 
-  // 将PyObject指针转换为uintptr_t存储
-  // 注意：这里存储的是Python函数对象的地址
-  (*attrs)["infer_meta_fn_ptr"] = reinterpret_cast<void*>(py_infer_meta);
-  (*attrs)["fn_ptr"] = reinterpret_cast<void*>(py_real_fn);
-
-  return 0;
+  attrs["infer_meta_fn_ptr"] = reinterpret_cast<void*>(py_infer_meta);
+  attrs["fn_ptr"] = reinterpret_cast<void*>(py_real_fn);
+  return attrs;
 }
 
 }  // namespace paddle::pybind
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 830a669445ff6a..f24d57844fbab3 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -587,14 +587,9 @@ GetPredefinedOutTupleTensorFromKwargs_7(PyObject* kwargs);
 
 void Check_PIR_not_support_out(PyObject* kwargs);
 
-int py_list_to_vector_string(PyObject* py_list,
-                             std::vector<std::string>* cpp_vec);
-int py_dict_to_unordered_map_string(
-    PyObject* py_dict, std::unordered_map<std::string, std::string>* cpp_map);
-
-int py_str_to_cpp_str(PyObject* py_str, std::string* cpp_str);
-int parse_attrs_dict(PyObject* py_attrs_dict,
-                     std::unordered_map<std::string, void*>* attrs);
+std::unordered_map<std::string, std::string> ParseStringDict(PyObject* py_dict);
+
+std::unordered_map<std::string, void*> ParsePythonOpAttrs(PyObject* py_dict);
 /*----------------------for arg parse-----------------------------*/
 paddle::Tensor& GetTensorFromArgsOrKWArgs(
     const std::string& op_type,
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 5d5b972c9e466a..e55fb4bc06b87e 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1058,10 +1058,10 @@ auto CreatePyFuncRunner(void *py_func_ptr, const std::string &op_name) {
   };
 }
 
-static PyObject *run_custom_pyop(PyObject *self,
-                                 PyObject *args,
-                                 PyObject *kwargs) {
-  VLOG(6) << "Call run_custom_pyop";
+static PyObject *run_python_op(PyObject *self,
+                               PyObject *args,
+                               PyObject *kwargs) {
+  VLOG(6) << "Call run_python_op";
 
   if (kwargs == NULL) {
     PyErr_SetString(
@@ -1085,21 +1085,15 @@ static PyObject *run_custom_pyop(PyObject *self,
     return nullptr;
   }
 
-  std::string op_name;
-  std::vector<std::string> inputs_vec;
-  std::vector<std::string> outputs_vec;
-  std::unordered_map<std::string, void *> attrs_map;
-  std::unordered_map<std::string, std::string> op_inplace_map;
-
-  if (py_str_to_cpp_str(py_op_name, &op_name) == -1 ||
-      py_list_to_vector_string(py_input_names, &inputs_vec) == -1 ||
-      py_list_to_vector_string(py_output_names, &outputs_vec) == -1 ||
-      parse_attrs_dict(py_attrs_dict, &attrs_map) == -1 ||
-      py_dict_to_unordered_map_string(py_inplace_dict, &op_inplace_map) == -1) {
-    PyErr_SetString(PyExc_KeyError,
-                    "inputs/outputs/attr/inplace_map is Empty!");
-    return nullptr;
-  }
+  std::string op_name = CastPyArg2String(py_op_name, "run_python_op", 0);
+  std::vector<std::string> inputs_vec =
+      CastPyArg2Strings(py_input_names, "run_python_op", 0);
+  std::vector<std::string> outputs_vec =
+      CastPyArg2Strings(py_output_names, "run_python_op", 0);
+  std::unordered_map<std::string, void *> attrs_map =
+      ParsePythonOpAttrs(py_attrs_dict);
+  std::unordered_map<std::string, std::string> op_inplace_map =
+      ParseStringDict(py_inplace_dict);
 
   std::cout << "Get things from python for Custom PyOp: [" << op_name << "]"
             << std::endl;
@@ -1410,7 +1404,7 @@ static PyObject *run_custom_pyop(PyObject *self,
 
   argument.AddOutputs(argument_outputs.begin(), argument_outputs.end());
   ::pir::PassStopGradientsDefaultly(argument);
-  CallStackRecorder callstack_recorder("_run_custom_pyop");
+  CallStackRecorder callstack_recorder("run_python_op");
   callstack_recorder.Record();
   std::vector<pir::Value> op_results;
   pir::Operation *op =
@@ -1740,10 +1734,10 @@ static PyMethodDef ManualOpsAPI[] = {
      (PyCFunction)(void (*)(void))run_custom_op,
      METH_VARARGS | METH_KEYWORDS,
      "C++ interface function for run_custom_op."},
-    {"_run_custom_pyop",
-     (PyCFunction)(void (*)(void))run_custom_pyop,
+    {"_run_python_op",
+     (PyCFunction)(void (*)(void))run_python_op,
      METH_VARARGS | METH_KEYWORDS,
-     "C++ interface function for run_custom_pyop."},
+     "C++ interface function for run_python_op."},
     {"builtin_combine",
      (PyCFunction)(void (*)(void))builtin_combine_op,
      METH_VARARGS | METH_KEYWORDS,
diff --git a/python/paddle/static/custom_pyop.py b/python/paddle/static/custom_pyop.py
index a26c76bde42ad0..76bc0fa82dd113 100644
--- a/python/paddle/static/custom_pyop.py
+++ b/python/paddle/static/custom_pyop.py
@@ -263,7 +263,7 @@ def wrapped_fn(*args: P1.args, **kwargs: P1.kwargs) -> R1:
             const_params_hash = custom_hash(const_params)
 
             # 调用底层算子运行逻辑
-            out = _C_ops._run_custom_pyop(
+            out = _C_ops._run_python_op(
                 *args,  # kwargs 还需要吗？
                 name=f"{op_name}_{const_params_hash}",  # 每次绑定一次，说明绑定的
                 # inputs=inputs,

From 18c9c2ba95d94581f7585336ed641f6db3e18400 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 03:54:24 +0800
Subject: [PATCH 17/33] align part1 and part2

---
 paddle/fluid/framework/python_operator.cc     |  3 +-
 .../pir/dialect/operator/ir/op_dialect.cc     |  2 -
 .../pir/dialect/operator/ir/op_dialect.h      |  2 +-
 .../pir/transforms/pd_op_to_kernel_pass.cc    |  4 +-
 paddle/fluid/pybind/CMakeLists.txt            |  2 -
 paddle/fluid/pybind/pybind.cc                 |  4 --
 paddle/phi/api/ext/native_meta_tensor.h       |  2 +-
 paddle/phi/api/lib/op_meta_info.cc            |  2 +-
 test/legacy_test/test_meta_tensor.py          | 44 -------------------
 9 files changed, 7 insertions(+), 58 deletions(-)

diff --git a/paddle/fluid/framework/python_operator.cc b/paddle/fluid/framework/python_operator.cc
index 2a5850a4d8b0e3..85af44dd780d68 100644
--- a/paddle/fluid/framework/python_operator.cc
+++ b/paddle/fluid/framework/python_operator.cc
@@ -45,7 +45,8 @@ void RegisterPythonOperator(
                     1,
                     common::errors::OutOfRange(
                         "Current op_name(%s) must not be registered more "
-                        "than one, because it don't support gradient op."));
+                        "than once, because it does not support gradient op.",
+                        op_name));
 
   const auto& op_meta_info = op_meta_info_vector.back();
 
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
index b0768cf1a09732..05a38e00242c4d 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc
@@ -1269,8 +1269,6 @@ void PythonOperatorDialect::RegisterPythonOperator(
       AttributeManager::Instance().ToCharPointers(attr_names);
   uint32_t attr_num = attr_names.size();
 
-  std::cout << "attr_num: " << attr_num << std::endl;
-
   std::set<pir::InterfaceValue> interface_values;
   pir::InterfaceValue op_info_interface =
       pir::InterfaceValue::Get<OpYamlInfoInterface,
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.h b/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
index 946b529071914e..1deb3b60f2498d 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
+++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.h
@@ -48,7 +48,7 @@ inline bool IsCustomOp(pir::Operation* op) {
   return op_name.find("custom_op") != op_name.npos;
 }
 
-inline bool IsCustomPyOp(pir::Operation* op) {
+inline bool IsPythonOp(pir::Operation* op) {
   const std::string& op_name = op->name();
   return op_name.find("py_op") != op_name.npos;
 }
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 6c8362aa39a055..b54d6a8910256d 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -2583,7 +2583,7 @@ void HandleForPythonOp(
   op_attribute.emplace("origin_id",
                        pir::Int64Attribute::get(ctx, op_item->id()));
 
-  VLOG(6) << "Lower custom pyop: " << op_item->name()
+  VLOG(6) << "Lower pyop: " << op_item->name()
           << " to : " << PythonFunctionOp::name();
 
   pir::OpInfo py_func_op_info =
@@ -3700,7 +3700,7 @@ void ProcessBlock(
       continue;
     }
 
-    if (paddle::dialect::IsCustomPyOp(op_item)) {
+    if (paddle::dialect::IsPythonOp(op_item)) {
       HandleForPythonOp(ctx,
                         op_item,
                         kernel_key,
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index d9bd49e164204f..27b935cbfc697b 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -113,8 +113,6 @@ set(PYBIND_SRCS
     pir.cc
     pir_utils.cc
     graph.cc
-    ir_tensor.cc
-    ir_meta_tensor.cc
     native_meta_tensor.cc
     reader_py.cc
     protobuf.cc
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 910ad32b4fcfaf..1bf479a1470269 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -125,8 +125,6 @@ limitations under the License. */
 #include "paddle/fluid/pybind/imperative.h"
 #include "paddle/fluid/pybind/inference_api.h"
 #include "paddle/fluid/pybind/io.h"
-#include "paddle/fluid/pybind/ir_meta_tensor.h"
-#include "paddle/fluid/pybind/ir_tensor.h"
 #include "paddle/fluid/pybind/jit.h"
 #include "paddle/fluid/pybind/metrics_py.h"
 #include "paddle/fluid/pybind/native_meta_tensor.h"
@@ -1574,8 +1572,6 @@ PYBIND11_MODULE(libpaddle, m) {
   BindJit(&m);
   BindSot(&m);
   BindCustomDevicePy(&m);
-  BindIrTensor(&m);
-  BindIrMetaTensor(&m);
   BindNativeMetaTensor(&m);
   BindEagerUtils(m.ptr());
   BindOpFunctionCommon(m.ptr());
diff --git a/paddle/phi/api/ext/native_meta_tensor.h b/paddle/phi/api/ext/native_meta_tensor.h
index 60ff1506f5039b..d64ccb2e6f3be3 100644
--- a/paddle/phi/api/ext/native_meta_tensor.h
+++ b/paddle/phi/api/ext/native_meta_tensor.h
@@ -23,7 +23,7 @@ class PADDLE_API NativeMetaTensor {
  public:
   NativeMetaTensor() = default;
   NativeMetaTensor(phi::DataType dtype, phi::DDim dims)
-      : dtype_(dtype), dims_(dims) {}
+      : dims_(dims), dtype_(dtype) {}
   DDim dims() const;
   DataType dtype() const;
   void set_dims(const DDim& dims);
diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
index 1e71a65a0ddbea..79b7e713d4ef11 100644
--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -643,7 +643,7 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Attrs(std::vector<std::string>&& attrs) {
           "Supported data types include `bool`, `int`, `float`, `double`,  "
           "`int64_t`, `std::string`, `std::vector<int>`, "
           "`std::vector<float>`, `std::vector<int64_t>`, "
-          "`std::vector<std::string>`, `std::vector<bool>`, `void*`"
+          "`std::vector<std::string>`, `std::vector<bool>`, `void*`. "
           "Please check whether the attribute data type and "
           "data type string are matched.",
           attr_type_str));
diff --git a/test/legacy_test/test_meta_tensor.py b/test/legacy_test/test_meta_tensor.py
index f873f5cd6d9197..5897a3f37a8693 100644
--- a/test/legacy_test/test_meta_tensor.py
+++ b/test/legacy_test/test_meta_tensor.py
@@ -15,53 +15,9 @@
 import unittest
 
 import paddle
-from paddle.base.libpaddle import IrMetaTensor, IrTensor
 from paddle.static import MetaTensor
 
 
-class TestIrTensor(unittest.TestCase):
-    def test_basic_get_set(self):
-        ir_tensor = IrTensor()
-
-        ir_tensor.set_shape([4, 8192, 768])
-        self.assertEqual(ir_tensor.shape, [4, 8192, 768])
-
-        ir_tensor.set_dtype('bfloat16')
-        self.assertEqual(ir_tensor.dtype, paddle.bfloat16)
-        ir_tensor.set_dtype(paddle.uint8)
-        self.assertEqual(ir_tensor.dtype, paddle.uint8)
-
-    def test_eq(self):
-        x_ir_meta = IrTensor()
-        y_ir_meta = IrTensor()
-        self.assertEqual(x_ir_meta, y_ir_meta)
-        x_ir_meta.set_shape([4, 8192])
-        y_ir_meta.set_shape([4, 8192])
-        self.assertEqual(x_ir_meta, y_ir_meta)
-        x_ir_meta.set_shape([4, 8193])
-        self.assertNotEqual(x_ir_meta, y_ir_meta)
-        y_ir_meta = IrTensor(x_ir_meta)
-        self.assertEqual(x_ir_meta, y_ir_meta)
-
-
-class TestIrMetaTensor(unittest.TestCase):
-    def test_basic_get_set(self):
-        ir_tensor = IrTensor()
-        ir_meta_tensor = IrMetaTensor(ir_tensor)
-
-        shape = [4, 8192, 768]
-        ir_meta_tensor.set_shape(shape)
-        self.assertEqual(ir_tensor.shape, shape)
-        self.assertEqual(ir_meta_tensor.shape, shape)
-
-        ir_meta_tensor.set_dtype('bfloat16')
-        self.assertEqual(ir_tensor.dtype, paddle.bfloat16)
-        self.assertEqual(ir_meta_tensor.dtype, paddle.bfloat16)
-        ir_meta_tensor.set_dtype(paddle.uint8)
-        self.assertEqual(ir_tensor.dtype, paddle.uint8)
-        self.assertEqual(ir_meta_tensor.dtype, paddle.uint8)
-
-
 def infer_meta_fn(x_meta: MetaTensor, y_meta: MetaTensor):
     z_meta = MetaTensor()
     z_meta.set_shape([x_meta.shape[0], y_meta.shape[-1]])

From 395ea102a093deb8343499561703d91cc705909c Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 14:56:15 +0800
Subject: [PATCH 18/33] rm operator<< for vec<T>

---
 .../fluid/pybind/manual_static_op_function.h  | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index e55fb4bc06b87e..8ba4e31e63e513 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -993,18 +993,18 @@ static PyObject *run_custom_op(PyObject *self,
   }
 }
 
-template <typename T>
-std::ostream &operator<<(std::ostream &os, const std::vector<T> &vec) {
-  os << "[";
-  for (size_t i = 0; i < vec.size(); ++i) {
-    os << vec[i];
-    if (i != vec.size() - 1) {
-      os << ", ";
-    }
-  }
-  os << "]";
-  return os;
-}
+// template <typename T>
+// std::ostream &operator<<(std::ostream &os, const std::vector<T> &vec) {
+//   os << "[";
+//   for (size_t i = 0; i < vec.size(); ++i) {
+//     os << vec[i];
+//     if (i != vec.size() - 1) {
+//       os << ", ";
+//     }
+//   }
+//   os << "]";
+//   return os;
+// }
 
 using IrTensor = paddle::dialect::IrTensor;
 
@@ -1095,16 +1095,17 @@ static PyObject *run_python_op(PyObject *self,
   std::unordered_map<std::string, std::string> op_inplace_map =
       ParseStringDict(py_inplace_dict);
 
-  std::cout << "Get things from python for Custom PyOp: [" << op_name << "]"
-            << std::endl;
-  std::cout << "op_name: " << op_name << std::endl;
-  std::cout << "inputs: " << inputs_vec << std::endl;
-  std::cout << "outputs: " << outputs_vec << std::endl;
-  std::cout << "attrs[infer_meta_fn_ptr]: "
-            << reinterpret_cast<uintptr_t>(attrs_map["infer_meta_fn_ptr"])
-            << std::endl;
-  std::cout << "attrs[fn_ptr]: "
-            << reinterpret_cast<uintptr_t>(attrs_map["fn_ptr"]) << std::endl;
+  VLOG(6) << "Building Python OP [" << op_name << "] with attrs:" << std::endl
+          << "op_name: " << op_name << std::endl
+          << "inputs: " << paddle::string::join_strings(inputs_vec, ", ")
+          << std::endl
+          << "outputs: " << paddle::string::join_strings(outputs_vec, ", ")
+          << std::endl
+          << "attrs[infer_meta_fn_ptr]: "
+          << reinterpret_cast<uintptr_t>(attrs_map["infer_meta_fn_ptr"])
+          << std::endl
+          << "attrs[fn_ptr]: "
+          << reinterpret_cast<uintptr_t>(attrs_map["fn_ptr"]);
 
   const auto &meta_info_map = OpMetaInfoMap::Instance().GetMap();
 

From fd77e8670779224a3124905802899869234d6db0 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 15:36:36 +0800
Subject: [PATCH 19/33] cleanup code

---
 .../fluid/pybind/manual_static_op_function.h  | 94 ++-----------------
 1 file changed, 10 insertions(+), 84 deletions(-)

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 8ba4e31e63e513..ae228e417f2699 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1114,8 +1114,7 @@ static PyObject *run_python_op(PyObject *self,
       attrs_map["infer_meta_fn_ptr"], op_name);
 
   if (meta_info_map.find(op_name) == meta_info_map.end()) {
-    std::cout << "We need to register this op first! " << op_name << std::endl;
-    // 不存在先注册一下
+    VLOG(6) << "Python OP " << op_name << " does not exist, registering...";
     paddle::framework::RegisterPythonOperator(
         op_name,
         std::move(inputs_vec),
@@ -1126,7 +1125,6 @@ static PyObject *run_python_op(PyObject *self,
         std::move(infer_meta_py_func));
   }
 
-  // 不知道为啥不能跑?
   PADDLE_ENFORCE_NE(meta_info_map.find(op_name),
                     meta_info_map.end(),
                     common::errors::NotFound(
@@ -1142,8 +1140,6 @@ static PyObject *run_python_op(PyObject *self,
   const auto &inplace_map = paddle::OpMetaInfoHelper::GetInplaceMap(vec_map[0]);
   const auto &inplace_reverse_map =
       paddle::OpMetaInfoHelper::GetInplaceReverseMap(vec_map[0]);
-  // auto infershape_func = OpMetaInfoHelper::GetInferShapeFn(vec_map[0]);
-  // auto inferdtype_func = OpMetaInfoHelper::GetInferDtypeFn(vec_map[0]);
 
   std::string pir_op_name =
       paddle::framework::kPythonOperatorDialectPrefix + op_name;
@@ -1166,7 +1162,6 @@ static PyObject *run_python_op(PyObject *self,
   int input_index = 0;
   int vec_input_index = 0;
 
-  // std::vector<IrTensor> vec_dense_inputs;
   std::vector<phi::NativeMetaTensor> inputs_meta;
   inputs_meta.reserve(inputs.size());
 
@@ -1177,47 +1172,14 @@ static PyObject *run_python_op(PyObject *self,
     // use one un-initialized tensor to indicate both Tensor and
     // vector<Tensor> inputs.
     if (obj == Py_None) {
-      std::cout << "Add un-initialized tensor "
-                   "because the optional input is None"
-                << std::endl;
-      if (paddle::framework::detail::IsDuplicableVar(input)) {
-        std::vector<std::vector<int64_t>> vec_input_shape;
-        std::vector<DataType> vec_input_dtype;
-        vec_input_shapes.emplace_back(vec_input_shape);
-        vec_input_dtypes.emplace_back(vec_input_dtype);
-        vec_input_name2id_map[inputs[i]] = vec_input_index;
-        vec_input_index++;
-      } else {
-        std::vector<int64_t> input_shape;
-        DataType input_dtype = DataType::UNDEFINED;
-        input_shapes.emplace_back(input_shape);
-        input_dtypes.emplace_back(input_dtype);
-        input_name2id_map[inputs[i]] = input_index;
-        input_index++;
-      }
-      argument_inputs.emplace_back();
-      continue;
+      PADDLE_THROW(common::errors::Unimplemented(
+          "Currently, optional Tensor input is not supported in "
+          "Python operator."));
     }
     if (paddle::framework::detail::IsDuplicableVar(input)) {
-      // std::vector<std::vector<int64_t>> tmp_input_shapes;
-      // std::vector<phi::DataType> tmp_input_dtypes;
-      // vec_input_name2id_map[inputs[i]] = vec_input_index;
-      // vec_input_index++;
-      // std::vector<pir::Value> input_values =
-      //     CastPyArg2VectorOfValue(obj, op_name, i, false);
-      // for (auto &input_value : input_values) {
-      //   paddle::dialect::DenseTensorType input_tensor =
-      //       input_value.type().dyn_cast<paddle::dialect::DenseTensorType>();
-      //   tmp_input_shapes.push_back(phi::vectorize(input_tensor.dims()));
-      //   tmp_input_dtypes.push_back(
-      //       paddle::dialect::TransToPhiDataType(input_tensor.dtype()));
-      // }
-      // vec_input_shapes.push_back(tmp_input_shapes);
-      // vec_input_dtypes.push_back(tmp_input_dtypes);
-      // auto combine_op = paddle::dialect::ApiBuilder::Instance()
-      //                       .GetBuilder()
-      //                       ->Build<pir::CombineOp>(input_values);
-      // argument_inputs.push_back(combine_op.out());
+      PADDLE_THROW(common::errors::Unimplemented(
+          "Currently, optional vector<Tensor> input is not supported in "
+          "Python operator."));
     } else {
       input_name2id_map[inputs[i]] = input_index;
       input_index++;
@@ -1225,9 +1187,6 @@ static PyObject *run_python_op(PyObject *self,
           CastPyArg2Value(obj, op_name, i, false);  // NOLINT
       paddle::dialect::DenseTensorType input_tensor =
           input_value.type().dyn_cast<paddle::dialect::DenseTensorType>();
-      // input_shapes.push_back(phi::vectorize(input_tensor.dims()));
-      // input_dtypes.push_back(
-      //     paddle::dialect::TransToPhiDataType(input_tensor.dtype()));
       argument_inputs.push_back(input_value);
 
       inputs_meta.push_back(phi::NativeMetaTensor(
@@ -1236,7 +1195,6 @@ static PyObject *run_python_op(PyObject *self,
     }
   }
   argument.AddInputs(argument_inputs);
-  std::cout << "finish argument.AddInputs(argument_inputs)! " << std::endl;
 
   custom_attrs.push_back(attrs_map["infer_meta_fn_ptr"]);
   custom_attrs.push_back(attrs_map["fn_ptr"]);
@@ -1306,8 +1264,6 @@ static PyObject *run_python_op(PyObject *self,
     all_values_num += output_name2value_num[output];
   }
 
-  std::cout << "all_values_num: " << all_values_num << std::endl;
-
   if (run_auto_parallel) {
     PADDLE_ENFORCE_EQ(
         spmd_info.second.size(),
@@ -1332,38 +1288,9 @@ static PyObject *run_python_op(PyObject *self,
       continue;
     }
     if (paddle::framework::detail::IsDuplicableVar(output)) {
-      std::vector<pir::Type> out_types;
-      std::vector<pir::Attribute> dist_attrs;
-      for (size_t j = 0; j < value_num; ++j) {
-        // auto ddims = phi::make_ddim(output_shapes[value_index]);
-        // auto dtype = output_dtypes[value_index];
-        // phi::DataLayout layout{DataLayout::NCHW};
-        // phi::LegacyLoD lod;
-        // auto type = paddle::dialect::DenseTensorType::get(
-        //     pir::IrContext::Instance(),
-        //     paddle::dialect::TransToIrDataType(dtype),
-        //     ddims,
-        //     layout,
-        //     lod,
-        //     0);
-
-        // if (run_auto_parallel) {
-        //   auto dist_attr =
-        //   dialect::CvtToPirAttr(spmd_info.second[value_index]);
-        //   out_types.push_back(dialect::CvtToPirDistType(type, dist_attr));
-        //   dist_attrs.push_back(dist_attr);
-        // } else {
-        //   out_types.push_back(std::move(type));
-        // }
-        // value_index++;
-      }
-      pir::Type out_vector_type =
-          pir::VectorType::get(pir::IrContext::Instance(), out_types);
-      argument_outputs.push_back(out_vector_type);
-      if (run_auto_parallel) {
-        dist_result_attrs.push_back(
-            pir::ArrayAttribute::get(pir::IrContext::Instance(), dist_attrs));
-      }
+      PADDLE_THROW(common::errors::Unimplemented(
+          "Currently, vector<Tensor> output is not supported in Python "
+          "operator."));
     } else {
       auto dense_out = process_result[value_index];
       auto out_type = paddle::dialect::DenseTensorType::get(
@@ -1427,7 +1354,6 @@ static PyObject *run_python_op(PyObject *self,
     }
   }
   callstack_recorder.AttachToOps();
-  VLOG(0) << "return ToPyObject(op_results);";
   return ToPyObject(op_results);
 }
 

From 2145b0ded2b7b36b00285f3423d10f5e97a67174 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 15:37:21 +0800
Subject: [PATCH 20/33] cleanup comments

---
 paddle/fluid/pybind/manual_static_op_function.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index ae228e417f2699..4849294c4583c9 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -993,19 +993,6 @@ static PyObject *run_custom_op(PyObject *self,
   }
 }
 
-// template <typename T>
-// std::ostream &operator<<(std::ostream &os, const std::vector<T> &vec) {
-//   os << "[";
-//   for (size_t i = 0; i < vec.size(); ++i) {
-//     os << vec[i];
-//     if (i != vec.size() - 1) {
-//       os << ", ";
-//     }
-//   }
-//   os << "]";
-//   return os;
-// }
-
 using IrTensor = paddle::dialect::IrTensor;
 
 template <typename T>

From b786b82333aa5d059cdef1a187f9c585b0ebb7d9 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 15:38:36 +0800
Subject: [PATCH 21/33] prepend indent to op attrs

---
 paddle/fluid/pybind/manual_static_op_function.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 4849294c4583c9..1c3a03ed4c7a13 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1083,15 +1083,15 @@ static PyObject *run_python_op(PyObject *self,
       ParseStringDict(py_inplace_dict);
 
   VLOG(6) << "Building Python OP [" << op_name << "] with attrs:" << std::endl
-          << "op_name: " << op_name << std::endl
-          << "inputs: " << paddle::string::join_strings(inputs_vec, ", ")
+          << "    op_name: " << op_name << std::endl
+          << "    inputs: " << paddle::string::join_strings(inputs_vec, ", ")
           << std::endl
-          << "outputs: " << paddle::string::join_strings(outputs_vec, ", ")
+          << "    outputs: " << paddle::string::join_strings(outputs_vec, ", ")
           << std::endl
-          << "attrs[infer_meta_fn_ptr]: "
+          << "    attrs[infer_meta_fn_ptr]: "
           << reinterpret_cast<uintptr_t>(attrs_map["infer_meta_fn_ptr"])
           << std::endl
-          << "attrs[fn_ptr]: "
+          << "    attrs[fn_ptr]: "
           << reinterpret_cast<uintptr_t>(attrs_map["fn_ptr"]);
 
   const auto &meta_info_map = OpMetaInfoMap::Instance().GetMap();

From b9d3738ba61d3d5b5dd3a6fe62e2a16fce5f1a3b Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 15:48:44 +0800
Subject: [PATCH 22/33] restore exc in pir_interpreter

---
 paddle/fluid/framework/new_executor/pir_interpreter.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 66663e37904c85..f0a5f82b0ff4aa 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -2043,7 +2043,7 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) {
     exception_holder_.Catch(std::current_exception());
   } catch (std::exception& ex) {
     LOG(WARNING) << instr_node->Name() << " raises an exception "
-                 << common::demangle(typeid(ex).name()) << ": " << ex.what();
+                 << common::demangle(typeid(ex).name());
     exception_holder_.Catch(std::current_exception());
   } catch (...) {
     LOG(WARNING) << instr_node->Name() << " raises an unknown exception";

From 6819b9069386edaef80b4b7efa57966338b6e129 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 17:16:06 +0800
Subject: [PATCH 23/33] cleanup some comments

---
 .../instruction/python_function_instruction.cc           | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 71300877ad3112..3082c8bea16650 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -322,11 +322,11 @@ PythonFunctionInstruction::PythonFunctionInstruction(
           "can not find OpYamlInfoInterface from [%s]", op_name));
   paddle::dialect::OpYamlInfoParser yaml_info_parser(
       yaml_interface->get_op_info_(op_name),
-      paddle::dialect::IsLegacyOp(op_name));
+      /*is_legacy_op=*/false);
   VLOG(6) << "finish process yaml_info_parser";
   const auto& op_meta =
       paddle::framework::detail::GetPythonOperatorInfoByPirName(op_name);
-  custom_op_meta_ = &op_meta;  // 后面把这个 custom_op_meta_ 删了吧啊？没啥用
+  custom_op_meta_ = &op_meta;
 
   py_func_ptr_ = &(OpMetaInfoHelper::GetPythonOperatorFunction(op_meta));
   py_func_infer_meta_ptr_ =
@@ -406,7 +406,6 @@ void PythonFunctionInstruction::Run() {
   VLOG(3) << "Custom Operator: InferShape - calc output ddim.";
   BuildShapeDtype();
 
-  // UpdateOutputMeta();
   for (auto& pair : this->InplaceInfo()) {
     ShareVarBuffer(pair.first, pair.second);
   }
@@ -415,16 +414,14 @@ void PythonFunctionInstruction::Run() {
       py_func_ptr_,
       common::errors::InvalidArgument("Custom kernel function is nullptr."));
 
-  // 这里假设只有俩参数
   std::vector<Tensor> vec_dense_inputs;
   size_t num = op_->num_operands();
-  VLOG(0) << "Op num_operands: " << num;
   for (size_t i = 0; i < num; ++i) {
     vec_dense_inputs.push_back(python_function_ctx_.InputAt(i));
   }
 
   auto out = (*py_func_ptr_)(vec_dense_inputs);
-  python_function_ctx_.ValidateAndAssignOutputs(out);  // 从宏里面扒出来
+  python_function_ctx_.ValidateAndAssignOutputs(out);
   if (FLAGS_check_cuda_error) [[unlikely]] {
     CUDAErrorCheck("PythonFunctionInstruction " + custom_op_name_ + " finish");
   }

From 0a82cee76088d2e9dcb1c8ceabf496a63e806582 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 19:04:17 +0800
Subject: [PATCH 24/33] cleanup some code

---
 .../python_function_instruction.cc            | 91 +++----------------
 1 file changed, 12 insertions(+), 79 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 3082c8bea16650..73d3678123f022 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -70,7 +70,6 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
   int vec_input_index = 0;
 
   for (const std::string& t : vec_input_tensor_params) {
-    VLOG(6) << "for (const auto& t : vec_input_tensor_params) {   " << t;
     PADDLE_ENFORCE_EQ(
         name2id.count(t),
         true,
@@ -80,19 +79,10 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
     if (!IsInvalid(ptr)) {
       if (op_yaml_info.GetInputType(op_yaml_info.InputName2Id().at(t)) ==
           "pir::VectorType<paddle::dialect::DenseTensorType>") {
-        // vec_input_name2id_map_[t] = vec_input_index;
-        // vec_input_index++;
-        // vec_input_ptrs_.emplace_back();
-        // // NOTE(YuanRisheng): In dygraph mode, we can not distinguish Tensor
-        // and
-        // // vector<Tensor> when user inputs None, so dygraph mode appends one
-        // // un-initialized Tensor to CustomOpKernelContext. To be compatible
-        // with
-        // // dygraph mode, `custom_vec_in` also emplace_back one un-initialized
-        // // tensor here.
-        // std::vector<paddle::Tensor> custom_vec_in;
-        // custom_vec_in.emplace_back(paddle::Tensor());
-        // python_function_ctx_.EmplaceBackInputs(std::move(custom_vec_in));
+        PADDLE_THROW(common::errors::Unimplemented(
+            "Only support Tensor input type for now in "
+            "PythonFunctionInstruction, "
+            "not support VectorType<DenseTensorType>."));
       } else {
         input_name2id_map_[t] = input_index;
         input_index++;
@@ -123,31 +113,10 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
       custom_in.set_impl(tensor_in);
       python_function_ctx_.EmplaceBackInput(std::move(custom_in));
     } else if (var->IsType<VariableRefArray>()) {
-      // std::vector<phi::DenseTensor*> vec_input_ptrs;
-      // std::vector<paddle::Tensor> vec_custom_in;
-      // auto& variable_array = var->Get<VariableRefArray>();
-      // for (size_t i = 0; i < variable_array.size(); ++i) {
-      //   if (variable_array[i]->IsType<phi::DenseTensor>()) {
-      //     phi::DenseTensor* dense_tensor_in = const_cast<phi::DenseTensor*>(
-      //         &(variable_array[i]->Get<phi::DenseTensor>()));
-      //     std::shared_ptr<phi::DenseTensor> tensor_in(
-      //         dense_tensor_in, [](phi::DenseTensor* ptr) {
-      //           VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
-      //         });
-      //     vec_input_ptrs.push_back(dense_tensor_in);
-      //     paddle::Tensor custom_in;
-      //     custom_in.set_impl(tensor_in);
-      //     vec_custom_in.push_back(std::move(custom_in));
-      //   } else {
-      //     PADDLE_THROW(common::errors::Unimplemented(
-      //         "Only support Vector<DenseTensor> and vector<SelectedRows> now,
-      //         " "not support vector<%d>.", variable_array[i]->Type()));
-      //   }
-      // }
-      // vec_input_name2id_map_[t] = vec_input_index;
-      // vec_input_index++;
-      // vec_input_ptrs_.push_back(vec_input_ptrs);
-      // python_function_ctx_.EmplaceBackInputs(vec_custom_in);
+      PADDLE_THROW(
+          common::errors::Unimplemented("Only support Tensor input type for "
+                                        "now in PythonFunctionInstruction, "
+                                        "not support Vector<DenseTensor>."));
     } else {
       PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d] ",
                                                  var->Type()));
@@ -231,39 +200,10 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
       VLOG(8) << "ctx->EmplaceBackOutput DenseTensor: "
               << value_exec_info_.GetVarName(out_ptr);
     } else if (out_ptr.type().isa<pir::VectorType>()) {
-      // VLOG(0) << "WHere am I?  222222222222";
-      // std::vector<paddle::Tensor> vec_custom_out;
-      // auto& variable_array =
-      //     inner_scope->FindVar(value_exec_info_.GetVarName(out_ptr))
-      //         ->Get<VariableRefArray>();
-      // std::vector<paddle::Tensor> custom_vec_out;
-      // PADDLE_ENFORCE(
-      //     !inplace_id_map.empty() || (i == 0UL && op_->num_results() == 1UL),
-      //     common::errors::PreconditionNotMet(
-      //         "If custom operator's outputs contains `paddle::Vec()` type "
-      //         "without setting InplaceMap, it only can hold one output."));
-      // for (size_t j = 0; j < variable_array.size(); ++j) {
-      //   if (variable_array[j]->IsType<phi::DenseTensor>()) {
-      //     auto dense_tensor_out = const_cast<phi::DenseTensor*>(
-      //         &(variable_array[j]->Get<phi::DenseTensor>()));
-      //     cache_out_ptrs_.emplace_back(dense_tensor_out);
-      //     std::shared_ptr<phi::DenseTensor> tensor_out(
-      //         dense_tensor_out, [](phi::DenseTensor* ptr) {
-      //           VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
-      //         });
-      //     paddle::Tensor custom_out;
-      //     custom_out.set_impl(tensor_out);
-      //     custom_vec_out.push_back(std::move(custom_out));
-      //   } else {
-      //     PADDLE_THROW(common::errors::Unimplemented(
-      //         "Only support Vector<DenseTensor> now, "
-      //         "not support vector<%d>.",
-      //         variable_array[j]->Type()));
-      //   }
-      // }
-      // VLOG(8) << "ctx->EmplaceBackOutput VariableRefArray: "
-      //         << value_exec_info_.GetVarName(out_ptr);
-      // python_function_ctx_.EmplaceBackOutputs(custom_vec_out);
+      PADDLE_THROW(
+          common::errors::Unimplemented("Only support DenseTensor output type "
+                                        "for now in PythonFunctionInstruction, "
+                                        "not support VectorType."));
     } else {
       PADDLE_THROW(common::errors::Unimplemented(
           "only support DenseTensor and vector "));
@@ -295,11 +235,6 @@ PythonFunctionInstruction::PythonFunctionInstruction(
       vec_input_ptrs_(),
       cache_out_ptrs_(),
       value_exec_info_(value_exec_info) {
-  std::cout << "PythonFunctionInstruction::"
-               "PythonFunctionInstruction"
-            << std::endl;
-
-  // auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
   auto op_attributes = op->attributes();
   auto op_name =
       op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
@@ -309,8 +244,6 @@ PythonFunctionInstruction::PythonFunctionInstruction(
   op_ = op;
   VLOG(6) << "construct custom kernel instruction for: " << op_name;
 
-  VLOG(6) << "finish process dist attributes";
-
   SetKernelType(AnalyseOpFuncType(op, place));
   VLOG(6) << "finish process analyse kernel type";
 

From 284deaeb65b8227bc7ee26fc1e362c4df42e43e9 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 19:05:34 +0800
Subject: [PATCH 25/33] reformat msg

---
 .../new_executor/instruction/python_function_instruction.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 73d3678123f022..882b4e6e7076ac 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -168,10 +168,8 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
           paddle::framework::detail::IsOptionalVar(out_name) &&
               !inplace_id_map.empty(),
           common::errors::InvalidArgument(
-              "Custom operator couldn't find custom output for name %s. If "
-              "you "
-              "are using inplace optional inputs & outputs, please check "
-              "your "
+              "Custom operator couldn't find custom output for name %s. If you "
+              "are using inplace optional inputs & outputs, please check your "
               "InplaceMap and `Outputs` again and make sure %s is wrapped by "
               "`paddle::Optional`",
               out_name,

From c797f4d302e5f6be272f58b7b2840efae7a40f87 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 18 Dec 2025 19:30:20 +0800
Subject: [PATCH 26/33] cleanup infermeta in runtime

---
 .../python_function_instruction.cc            | 97 +++++--------------
 .../instruction/python_function_instruction.h | 28 +-----
 2 files changed, 26 insertions(+), 99 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index 882b4e6e7076ac..c5b01699d21152 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -31,11 +31,11 @@ namespace paddle::framework {
 void PythonFunctionInstruction::BuildPythonFunctionContext(
     const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
   PADDLE_ENFORCE_NOT_NULL(
-      custom_op_meta_,
+      python_op_meta_,
       common::errors::PreconditionNotMet(
-          "PythonFunctionInstruction: custom_op_meta_ is null"));
+          "PythonFunctionInstruction: python_op_meta_ is null"));
 
-  auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*custom_op_meta_);
+  auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(*python_op_meta_);
   VLOG(6) << "op_inplace_map.size(): " << op_inplace_map.size();
 
   // check inplace
@@ -48,10 +48,9 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
       PADDLE_ENFORCE(
           paddle::framework::detail::IsOptionalVar(pair.second),
           common::errors::InvalidArgument(
-              "Custom operator couldn't find custom output name for %s. If "
+              "Python operator couldn't find output name for %s. If "
               "you are using inplace optional inputs & outputs, please "
-              "check "
-              "your InplaceMap and `Outputs` again and make sure %s is "
+              "check your InplaceMap and `Outputs` again and make sure %s is "
               "wrapped by `paddle::Optional`",
               pair.second,
               pair.second));
@@ -84,9 +83,7 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
             "PythonFunctionInstruction, "
             "not support VectorType<DenseTensorType>."));
       } else {
-        input_name2id_map_[t] = input_index;
         input_index++;
-        input_ptrs_.emplace_back(nullptr);
         python_function_ctx_.EmplaceBackInput(paddle::Tensor());
       }
       continue;
@@ -106,12 +103,10 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
           dense_tensor_in, [](phi::DenseTensor* ptr) {
             VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
           });
-      input_name2id_map_[t] = input_index;
       input_index++;
-      input_ptrs_.push_back(dense_tensor_in);
-      paddle::Tensor custom_in;
-      custom_in.set_impl(tensor_in);
-      python_function_ctx_.EmplaceBackInput(std::move(custom_in));
+      paddle::Tensor python_in;
+      python_in.set_impl(tensor_in);
+      python_function_ctx_.EmplaceBackInput(std::move(python_in));
     } else if (var->IsType<VariableRefArray>()) {
       PADDLE_THROW(
           common::errors::Unimplemented("Only support Tensor input type for "
@@ -135,18 +130,12 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
                           t));
     auto& attr_type_name = op_yaml_info.AttrTypeName(t);
     if (attr_type_name == "pir::Int32Attribute") {
-      custom_attrs_.push_back(
-          attr_map[t].dyn_cast<pir::Int32Attribute>().data());
       python_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int32Attribute>().data());
     } else if (attr_type_name == "pir::Int64Attribute") {
-      custom_attrs_.push_back(
-          attr_map[t].dyn_cast<pir::Int64Attribute>().data());
       python_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::Int64Attribute>().data());
     } else if (attr_type_name == "pir::PointerAttribute") {
-      custom_attrs_.push_back(
-          attr_map[t].dyn_cast<pir::PointerAttribute>().data());
       python_function_ctx_.EmplaceBackAttr(
           attr_map[t].dyn_cast<pir::PointerAttribute>().data());
     } else {
@@ -161,20 +150,18 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
   for (size_t i = 0; i < op_->num_results(); ++i) {
     pir::Value out_ptr = op_->result(i);
     auto out_name = op_yaml_info.OutputNames()[i];
-    VLOG(0) << "out_name: " << out_name;
-    VLOG(0) << "!IsInvalid(out_ptr)" << !IsInvalid(out_ptr);
     if (!IsInvalid(out_ptr)) {
       PADDLE_ENFORCE(
           paddle::framework::detail::IsOptionalVar(out_name) &&
               !inplace_id_map.empty(),
           common::errors::InvalidArgument(
-              "Custom operator couldn't find custom output for name %s. If you "
+              "Python operator couldn't find python output for name %s. If you "
               "are using inplace optional inputs & outputs, please check your "
               "InplaceMap and `Outputs` again and make sure %s is wrapped by "
               "`paddle::Optional`",
               out_name,
               out_name));
-      VLOG(3) << "Custom Operator: BuildContext - inplace optional outputs : "
+      VLOG(3) << "Python Operator: BuildContext - inplace optional outputs : "
               << out_name << " is None.";
       python_function_ctx_.EmplaceBackOutput(paddle::Tensor());
 
@@ -190,11 +177,11 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
           dense_tensor_out, [](phi::DenseTensor* ptr) {
             VLOG(6) << ptr << " ptr will not be deleted by shared_ptr";
           });
-      paddle::Tensor custom_out;
+      paddle::Tensor python_out;
       // here only can copy the output tensor into context
-      custom_out.set_impl(tensor_out);
+      python_out.set_impl(tensor_out);
 
-      python_function_ctx_.EmplaceBackOutput(std::move(custom_out));
+      python_function_ctx_.EmplaceBackOutput(std::move(python_out));
       VLOG(8) << "ctx->EmplaceBackOutput DenseTensor: "
               << value_exec_info_.GetVarName(out_ptr);
     } else if (out_ptr.type().isa<pir::VectorType>()) {
@@ -208,8 +195,8 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
     }
   }
 
-  auto& op_inputs = OpMetaInfoHelper::GetInputs(*custom_op_meta_);
-  auto& op_outputs = OpMetaInfoHelper::GetOutputs(*custom_op_meta_);
+  auto& op_inputs = OpMetaInfoHelper::GetInputs(*python_op_meta_);
+  auto& op_outputs = OpMetaInfoHelper::GetOutputs(*python_op_meta_);
 
   // handle inplace map
   python_function_ctx_.UpdatePlainOutputs(
@@ -222,25 +209,16 @@ PythonFunctionInstruction::PythonFunctionInstruction(
     pir::Operation* op,
     const ValueExecutionInfo& value_exec_info)
     : InstructionBase(id, place),
-      input_name2id_map_(),
-      vec_input_name2id_map_(),
-      input_shapes_(),
-      vec_input_shapes_(),
-      custom_attrs_(),
-      input_dtypes_(),
-      vec_input_dtypes_(),
-      input_ptrs_(),
-      vec_input_ptrs_(),
       cache_out_ptrs_(),
       value_exec_info_(value_exec_info) {
   auto op_attributes = op->attributes();
   auto op_name =
       op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
-  custom_op_name_ = op_name;
+  python_op_name_ = op_name;
   pir::OpInfo op_info =
       pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
   op_ = op;
-  VLOG(6) << "construct custom kernel instruction for: " << op_name;
+  VLOG(6) << "construct python kernel instruction for: " << op_name;
 
   SetKernelType(AnalyseOpFuncType(op, place));
   VLOG(6) << "finish process analyse kernel type";
@@ -257,14 +235,14 @@ PythonFunctionInstruction::PythonFunctionInstruction(
   VLOG(6) << "finish process yaml_info_parser";
   const auto& op_meta =
       paddle::framework::detail::GetPythonOperatorInfoByPirName(op_name);
-  custom_op_meta_ = &op_meta;
+  python_op_meta_ = &op_meta;
 
   py_func_ptr_ = &(OpMetaInfoHelper::GetPythonOperatorFunction(op_meta));
   py_func_infer_meta_ptr_ =
       &(OpMetaInfoHelper::GetPythonOperatorInferMetaFunction(op_meta));
 
   BuildPythonFunctionContext(yaml_info_parser);
-  VLOG(6) << "finish process custom context";
+  VLOG(6) << "finish process python context";
   auto kernel_key = op_attributes.at("kernel_key")
                         .dyn_cast<paddle::dialect::KernelAttribute>()
                         .data();
@@ -301,49 +279,18 @@ PythonFunctionInstruction::PythonFunctionInstruction(
   VLOG(6) << "finish process no need buffer";
 }
 
-void PythonFunctionInstruction::BuildShapeDtype() {
-  input_shapes_.clear();
-  input_dtypes_.clear();
-  vec_input_shapes_.clear();
-  vec_input_dtypes_.clear();
-  for (auto in_tensor : input_ptrs_) {
-    if (in_tensor) {
-      input_shapes_.push_back(phi::vectorize(in_tensor->dims()));
-      input_dtypes_.push_back(in_tensor->dtype());
-    } else {
-      input_shapes_.emplace_back();
-      input_dtypes_.emplace_back();
-    }
-  }
-  for (auto in_tensors : vec_input_ptrs_) {
-    std::vector<std::vector<int64_t>> input_shapes;
-    std::vector<phi::DataType> input_dtypes;
-    if (in_tensors.size() > 0) {
-      for (auto in_tensor : in_tensors) {
-        input_shapes.push_back(phi::vectorize(in_tensor->dims()));
-        input_dtypes.push_back(in_tensor->dtype());
-      }
-    }
-    vec_input_shapes_.push_back(input_shapes);
-    vec_input_dtypes_.push_back(input_dtypes);
-  }
-}
-
 void PythonFunctionInstruction::Run() {
   if (FLAGS_check_cuda_error) [[unlikely]] {
-    CUDAErrorCheck("PythonFunctionInstruction " + custom_op_name_ + " begin");
+    CUDAErrorCheck("PythonFunctionInstruction " + python_op_name_ + " begin");
   }
 
-  VLOG(3) << "Custom Operator: InferShape - calc output ddim.";
-  BuildShapeDtype();
-
   for (auto& pair : this->InplaceInfo()) {
     ShareVarBuffer(pair.first, pair.second);
   }
 
   PADDLE_ENFORCE_NOT_NULL(
       py_func_ptr_,
-      common::errors::InvalidArgument("Custom kernel function is nullptr."));
+      common::errors::InvalidArgument("Python function pointer is nullptr."));
 
   std::vector<Tensor> vec_dense_inputs;
   size_t num = op_->num_operands();
@@ -354,7 +301,7 @@ void PythonFunctionInstruction::Run() {
   auto out = (*py_func_ptr_)(vec_dense_inputs);
   python_function_ctx_.ValidateAndAssignOutputs(out);
   if (FLAGS_check_cuda_error) [[unlikely]] {
-    CUDAErrorCheck("PythonFunctionInstruction " + custom_op_name_ + " finish");
+    CUDAErrorCheck("PythonFunctionInstruction " + python_op_name_ + " finish");
   }
 }
 }  // namespace paddle::framework
diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
index 02fd0b0c7068f8..60b98c1904f000 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.h
@@ -36,7 +36,7 @@ class PythonFunctionInstruction : public InstructionBase {
 
   void Run() override;
 
-  const std::string& Name() const override { return custom_op_name_; }
+  const std::string& Name() const override { return python_op_name_; }
 
   void clear();
 
@@ -44,41 +44,21 @@ class PythonFunctionInstruction : public InstructionBase {
   void BuildPythonFunctionContext(
       const paddle::dialect::OpYamlInfoParser& op_yaml_info);
 
-  void BuildShapeDtype();
-  void UpdateOutputMeta();
-
   paddle::CustomOpKernelContext python_function_ctx_;
   paddle::KernelFunc kernel_func_ = nullptr;
 
   const paddle::PythonOperatorFunctionType* py_func_ptr_ = nullptr;
   const paddle::PythonOperatorInferMetaFunctionType* py_func_infer_meta_ptr_ =
-      nullptr;
-
-  // key is input name, value is a index in input_shapes_ or vec_input_shapes_
-  std::unordered_map<std::string, int> input_name2id_map_;
-  std::unordered_map<std::string, int> vec_input_name2id_map_;
-
-  // use for running infershape
-  std::vector<std::vector<int64_t>> input_shapes_;
-  std::vector<std::vector<std::vector<int64_t>>> vec_input_shapes_;
-  std::vector<paddle::any> custom_attrs_;
-
-  // use for running inferdtype
-  std::vector<DataType> input_dtypes_;
-  std::vector<std::vector<DataType>> vec_input_dtypes_;
-
-  // use for calculate input shapes and dtypes in runtime
-  std::vector<phi::DenseTensor*> input_ptrs_;
-  std::vector<std::vector<phi::DenseTensor*>> vec_input_ptrs_;
+      nullptr;  // Unused in runtime
 
   // use for update output
   std::vector<phi::DenseTensor*> cache_out_ptrs_;
 
-  std::string custom_op_name_;
+  std::string python_op_name_;
 
   ::pir::Operation* op_{nullptr};  // not owned
 
-  const paddle::OpMetaInfo* custom_op_meta_;   // not owned
+  const paddle::OpMetaInfo* python_op_meta_;   // not owned
   const ValueExecutionInfo& value_exec_info_;  // not owned
 };
 

From b4209022833463141febb1da31565565ad816da4 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 00:05:02 +0800
Subject: [PATCH 27/33] normalize some python code

---
 .../fluid/pybind/manual_static_op_function.h  |   2 +-
 python/paddle/static/__init__.py              |   1 +
 .../static/{custom_pyop.py => python_op.py}   | 132 ++++++++++--------
 3 files changed, 76 insertions(+), 59 deletions(-)
 rename python/paddle/static/{custom_pyop.py => python_op.py} (67%)

diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 1c3a03ed4c7a13..aebb154b461a63 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -18,6 +18,7 @@
 #include "paddle/fluid/eager/api/utils/global_utils.h"
 #include "paddle/fluid/framework/custom_operator_utils.h"
 #include "paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.h"
+#include "paddle/fluid/framework/python_operator.h"
 #include "paddle/fluid/pir/dialect/distributed/ir/dist_tools.h"
 #include "paddle/fluid/pir/dialect/distributed/ir/dist_type.h"
 #include "paddle/fluid/pir/dialect/operator/ir/api_builder.h"
@@ -37,7 +38,6 @@
 #include "paddle/pir/include/core/attribute.h"
 #include "paddle/pir/include/core/builtin_op.h"
 
-#include "paddle/fluid/framework/python_operator.h"
 namespace paddle {
 
 namespace pybind {
diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py
index b625b0b80c13a8..4399835ec2a70e 100644
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -68,6 +68,7 @@
 from .nn.common import ExponentialMovingAverage, py_func
 from .nn.control_flow import Print
 from .nn.metric import accuracy, auc, ctr_metric_bundle
+from .python_op import register_op  # noqa: F401
 
 __all__ = [
     'append_backward',
diff --git a/python/paddle/static/custom_pyop.py b/python/paddle/static/python_op.py
similarity index 67%
rename from python/paddle/static/custom_pyop.py
rename to python/paddle/static/python_op.py
index 76bc0fa82dd113..f78ff1a0e7ac37 100644
--- a/python/paddle/static/custom_pyop.py
+++ b/python/paddle/static/python_op.py
@@ -11,20 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from __future__ import annotations
 
 import inspect
+import sys
 import types
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from functools import partial, wraps
-from typing import Any, Callable, ParamSpec, TypeVar, overload
+from typing import Any, Callable, TypeVar, overload
+
+from typing_extensions import ParamSpec
 
 import paddle
 from paddle import _C_ops
 
-# from paddle.static.meta_tensor import MetaTensorWrapper
-
-HAS_ARGS_OR_KWARGS: int = inspect.CO_VARARGS | inspect.CO_VARKEYWORDS
+HAS_VAR_ARGS_OR_KWARGS: int = inspect.CO_VARARGS | inspect.CO_VARKEYWORDS
 
 
 P1 = ParamSpec("P1")
@@ -64,24 +66,25 @@ def get_fn_defaults_params(fn: Callable[P1, R1]) -> tuple:
     return tuple(fn_defaults_params)
 
 
-def eliminate_positional_only(fn: Callable[P1, R1]) -> Callable[P1, R1]:
+def eliminate_positional_or_keyword_only(
+    fn: Callable[P1, R1],
+) -> Callable[P1, R1]:
+    assert isinstance(fn, types.FunctionType), "Only support regular function"
     code = fn.__code__
-    co_flags: int = code.co_flags & ~HAS_ARGS_OR_KWARGS
+    co_flags: int = code.co_flags & ~HAS_VAR_ARGS_OR_KWARGS
     co_flags = code.co_flags
 
-    # TODO: currently, only support Python3.10
-    if hasattr(code, "co_posonlyargcount"):
-        argcount = (
-            code.co_argcount
-            + code.co_kwonlyargcount
-            + bool(code.co_flags & inspect.CO_VARARGS)
-            + bool(code.co_flags & inspect.CO_VARKEYWORDS)
-        )
-
+    argcount = (
+        code.co_argcount
+        + code.co_kwonlyargcount
+        + bool(code.co_flags & inspect.CO_VARARGS)
+        + bool(code.co_flags & inspect.CO_VARKEYWORDS)
+    )
+    if sys.version_info >= (3, 11):
         new_code = types.CodeType(
             argcount,  # co_argcount
-            0,  # posonlyargcount
-            0,  # kwonlyargcount
+            0,  # posonlyargcount, eliminated
+            0,  # kwonlyargcount, eliminated
             code.co_nlocals,
             code.co_stacksize,
             co_flags,
@@ -91,13 +94,34 @@ def eliminate_positional_only(fn: Callable[P1, R1]) -> Callable[P1, R1]:
             code.co_varnames,
             code.co_filename,
             code.co_name,
+            code.co_qualname,
             code.co_firstlineno,
-            code.co_lnotab,
+            code.co_linetable,
+            code.co_exceptiontable,
             code.co_freevars,
             code.co_cellvars,
         )
     else:
-        raise ValueError
+        new_code = types.CodeType(
+            argcount,  # co_argcount
+            0,  # posonlyargcount, eliminated
+            0,  # kwonlyargcount, eliminated
+            code.co_nlocals,
+            code.co_stacksize,
+            co_flags,
+            code.co_code,
+            code.co_consts,
+            code.co_names,
+            code.co_varnames,
+            code.co_filename,
+            code.co_name,
+            code.co_firstlineno,
+            code.co_linetable
+            if sys.version_info >= (3, 10)
+            else code.co_lnotab,
+            code.co_freevars,
+            code.co_cellvars,
+        )
 
     fn_defaults_params = get_fn_defaults_params(fn)
     new_fn = types.FunctionType(
@@ -110,17 +134,16 @@ def eliminate_positional_only(fn: Callable[P1, R1]) -> Callable[P1, R1]:
     new_fn.__name__ = fn.__name__
     new_fn.__doc__ = fn.__doc__
     new_fn.__annotations__ = fn.__annotations__
-    new_fn.__kwdefaults__ = None
+    new_fn.__kwdefaults__ = None  # already merged into defaults
     return new_fn
 
 
-def bind_constants(fn, infer_meta, input_names, *args, **kwargs):
+def bind_constants(fn, infer_meta, *args, **kwargs):
     sig = inspect.signature(fn)
     bound_args = sig.bind(*args, **kwargs)
     bound_args.apply_defaults()
     params = bound_args.arguments
 
-    # NOTE: dict 可以保留顺序
     mutable_params = {}
     const_params = {}
 
@@ -131,8 +154,8 @@ def bind_constants(fn, infer_meta, input_names, *args, **kwargs):
             const_params[k] = v
 
     mutable_arg_names = list(mutable_params.keys())
-    fn = eliminate_positional_only(fn)
-    infer_meta = eliminate_positional_only(infer_meta)
+    fn = eliminate_positional_or_keyword_only(fn)
+    infer_meta = eliminate_positional_or_keyword_only(infer_meta)
     return (
         mutable_arg_names,
         partial(fn, **const_params),
@@ -145,42 +168,37 @@ def bind_constants(fn, infer_meta, input_names, *args, **kwargs):
 def run_in_dynamic_mode(fn):
     def dynamic_mode_fn(*args, **kwargs):
         with paddle.base.dygraph.base.guard():
-            try:
-                return fn(*args, **kwargs)
-            except Exception as e:
-                print(e)
-                raise
+            return fn(*args, **kwargs)
 
     return dynamic_mode_fn
 
 
-from collections.abc import Mapping
-
-
 def custom_hash(obj):
-    # TODO: Check a case
+    # Compute a hash for various types of objects, including unhashable ones.
+    # This may not be collision-free, but should work for distinguishing different
+    # constant parameters in most practical scenarios.
+
+    # TODO: We should avoid hash collisions more strictly if necessary. For example,
     # hash(-1) == hash(-2)
     if isinstance(obj, (int, float, str, bool, bytes)):
         return hash(obj)
 
-    # 2. 已知可哈希容器 (如 tuple, frozenset)
+    # Hashing for common hashable types
     if isinstance(obj, (tuple, frozenset)):
         try:
             return hash(obj)
         except TypeError:
             pass
 
-    # 3. 不可哈希的 Sequence (如 list)
+    # Unhashable types
     if isinstance(obj, (Sequence, set)):
-        # 使用 (0, ...) 前缀是为了区分列表和元组的哈希值
         try:
             return hash((0, *tuple(custom_hash(item) for item in obj)))
         except TypeError:
             pass
 
-    # 4. 不可哈希的 Mapping (如 dict)
+    # Unhashable Mapping
     if isinstance(obj, Mapping):
-        # 使用 (1, ...) 前缀是为了区分字典和其他容器的哈希值
         try:
             items_hashed = tuple(
                 sorted((custom_hash(k), custom_hash(v)) for k, v in obj.items())
@@ -204,7 +222,7 @@ def register_op(
     infer_meta: Callable[..., Any] | None = None,
     input_names: list[str] | None = None,
     output_names: list[str] | None = None,
-    inplace_map: list[str, str] | None = None,
+    inplace_map: dict[str, str] | None = None,
 ) -> Callable[P1, R1]: ...
 
 
@@ -217,7 +235,7 @@ def register_op(
     infer_meta: Callable[..., Any] | None = None,
     input_names: list[str] | None = None,
     output_names: list[str] | None = None,
-    inplace_map: list[str, str] | None = None,
+    inplace_map: dict[str, str] | None = None,
 ) -> Callable[[Callable[P1, R1]], Callable[P1, R1]]: ...
 
 
@@ -229,13 +247,15 @@ def register_op(
     infer_meta: Callable[..., Any] | None = None,
     input_names: list[str] | None = None,
     output_names: list[str] | None = None,
-    inplace_map: list[str, str] | None = None,
+    inplace_map: dict[str, str] | None = None,
 ):
-    """
-    注册算子的装饰器，支持传入元数据推导函数和输入输出配置。
-    """
+    if input_names is None:
+        raise ValueError("Currently, input_names must be provided.")
+    if output_names is None:
+        raise ValueError("Currently, output_names must be provided.")
+    if infer_meta is None:
+        raise ValueError("Currently, infer_meta must be provided.")
 
-    # 内部装饰器逻辑
     def _register_op(
         real_fn: Callable[P1, R1],
     ) -> Callable[P1, R1]:
@@ -251,22 +271,19 @@ def wrapped_fn(*args: P1.args, **kwargs: P1.kwargs) -> R1:
                 mutable_arg_names,
                 bound_constants_fn,
                 bound_constants_infer_meta,
-                args,
+                mutable_args,
                 const_params,
-            ) = bind_constants(
-                real_fn, infer_meta, input_names, *args, **kwargs
-            )
+            ) = bind_constants(real_fn, infer_meta, *args, **kwargs)
             assert len(mutable_arg_names) == len(input_names), (
-                f"{mutable_arg_names=} != {input_names=}"
+                f"Number of mutable arguments ({len(mutable_arg_names)}) does not match "
+                f"the number of input names ({len(input_names)})."
             )
 
             const_params_hash = custom_hash(const_params)
 
-            # 调用底层算子运行逻辑
             out = _C_ops._run_python_op(
-                *args,  # kwargs 还需要吗？
-                name=f"{op_name}_{const_params_hash}",  # 每次绑定一次，说明绑定的
-                # inputs=inputs,
+                *mutable_args,
+                name=f"{op_name}_{const_params_hash}",
                 input_names=input_names,
                 output_names=output_names,
                 attrs={
@@ -280,9 +297,8 @@ def wrapped_fn(*args: P1.args, **kwargs: P1.kwargs) -> R1:
 
         return wrapped_fn
 
-    # 处理装饰器调用的两种方式：
-    # 1. @register_op(...) -> fn is None
-    # 2. @register_op -> fn is not None (不带括号，但在本例中不适用，因为必须传参)
+    # Handle @register_op(...)
     if fn is None:
         return _register_op
+    # Handle @register_op
     return _register_op(fn)

From 6e0ee44daa7b9f7f44345637ff496916b61e8ab6 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 02:03:29 +0800
Subject: [PATCH 28/33] add ut

---
 test/dygraph_to_static/check_approval.py |   1 +
 test/dygraph_to_static/test_python_op.py | 122 +++++++++++++++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 test/dygraph_to_static/test_python_op.py

diff --git a/test/dygraph_to_static/check_approval.py b/test/dygraph_to_static/check_approval.py
index 43f8f9ae80569a..d756eec2adc4c7 100644
--- a/test/dygraph_to_static/check_approval.py
+++ b/test/dygraph_to_static/check_approval.py
@@ -106,6 +106,7 @@ def __init__(self, start: Location, end: Location):
         "test_partial_program_hook.py",
         "test_jit_backend.py",
         "test_dygraph_to_static_utils.py",
+        "test_python_op.py",
     ],
 }
 
diff --git a/test/dygraph_to_static/test_python_op.py b/test/dygraph_to_static/test_python_op.py
new file mode 100644
index 00000000000000..2cf02e616358f8
--- /dev/null
+++ b/test/dygraph_to_static/test_python_op.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from dygraph_to_static_utils import (
+    Dy2StTestBase,
+    static_guard,
+)
+
+import paddle
+from paddle import Tensor
+from paddle.jit import sot
+
+
+@paddle.static.register_op(
+    name="fn_with_breakgraph",
+    infer_meta=lambda x, y: paddle.static.MetaTensor(
+        dtype=x.dtype, shape=x.shape
+    ),
+    input_names=["x", "y"],
+    output_names=["out"],
+    inplace_map={},
+)
+def fn_with_breakgraph(x: Tensor, y: Tensor) -> Tensor:
+    x = x + 1
+    sot.psdb.breakgraph()
+    y = y + 1
+    return x + y
+
+
+@paddle.static.register_op(
+    name="fn_with_numpy_operation",
+    infer_meta=lambda x, y: paddle.static.MetaTensor(
+        dtype=paddle.int32, shape=x.shape[:-1]
+    ),
+    input_names=["x", "y"],
+    output_names=["out"],
+)
+def fn_with_numpy_operation(x: Tensor, y: Tensor) -> Tensor:
+    x_np = x.numpy()
+    y_np = y.numpy()
+    x_np_reduce = x_np.sum(axis=-1)
+    y_np_reduce = y_np.sum(axis=-1)
+    return paddle.to_tensor(x_np_reduce + y_np_reduce).cast(paddle.int32)
+
+
+class PythonOpTestMixin:
+    def run_in_dygraph(self):
+        return self.fn(**self.inputs)
+
+    @static_guard()
+    def run_in_static(self):
+        main_program = paddle.static.Program()
+        with paddle.static.program_guard(main_program):
+            input_values = {
+                k: paddle.static.data(name=k, shape=v.shape, dtype=v.dtype)
+                for k, v in self.inputs.items()
+            }
+            out_value = self.fn(**input_values)
+            exe = paddle.static.Executor()
+        (out,) = exe.run(
+            main_program,
+            feed={k: v.numpy() for k, v in self.inputs.items()},
+            fetch_list=[out_value],
+        )
+        return out
+
+    def test_dy_st(self):
+        np.testing.assert_allclose(self.run_in_dygraph(), self.run_in_static())
+
+
+class TestFnWithBreakgraph(unittest.TestCase, PythonOpTestMixin):
+    def setUp(self):
+        self.fn = fn_with_breakgraph
+        self.inputs = {
+            "x": paddle.randn([2, 3, 4]),
+            "y": paddle.randn([2, 3, 4]),
+        }
+
+
+class TestFnWithNumPyOperation(unittest.TestCase, PythonOpTestMixin):
+    def setUp(self):
+        self.fn = fn_with_breakgraph
+        self.inputs = {
+            "x": paddle.randn([7, 8, 9]),
+            "y": paddle.randn([7, 8, 9]),
+        }
+
+
+def fn_use_2_register_op(x: Tensor, y: Tensor) -> Tensor:
+    z1 = fn_with_breakgraph(x, y)
+    z2 = fn_with_numpy_operation(x, y)
+    out = z1 * 100 + z2.unsqueeze(axis=-1).astype(paddle.float32)
+    return out
+
+
+class TestToStatic(Dy2StTestBase):
+    def test_to_static_use_2_op(self):
+        x = paddle.randn([2, 3, 4])
+        y = paddle.randn([2, 3, 4])
+        fn = fn_use_2_register_op
+        dy_out = fn(x, y)
+        static_fn = paddle.jit.to_static(fn)
+        st_out = static_fn(x, y)
+        np.testing.assert_allclose(dy_out, st_out)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 05b7ca01b61f70f3b96567e5bc58373a9c6b6c6d Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 02:36:58 +0800
Subject: [PATCH 29/33] revert
 python/paddle/jit/sot/opcode_translator/executor/function_graph.py

---
 .../jit/sot/opcode_translator/executor/function_graph.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/jit/sot/opcode_translator/executor/function_graph.py b/python/paddle/jit/sot/opcode_translator/executor/function_graph.py
index 548f10f478763a..d1703fd9000e39 100644
--- a/python/paddle/jit/sot/opcode_translator/executor/function_graph.py
+++ b/python/paddle/jit/sot/opcode_translator/executor/function_graph.py
@@ -756,7 +756,7 @@ def fallback_symbolic_to_constant(args, kwargs, err):
                     # TODO(zrr1999): maybe we can continue to fallback to all args are constant.
                     raise BreakGraphError(
                         InferMetaBreak(
-                            f"InferMeta encountered {type(err)}, but all args are not symbolic.\n\n, {err}"
+                            f"InferMeta encountered {type(err)}, but all args are not symbolic."
                         )
                     )
 
@@ -782,7 +782,7 @@ def fallback_symbolic_to_constant(args, kwargs, err):
                 ):
                     raise BreakGraphError(
                         InferMetaBreak(
-                            f"InferMeta encountered {type(err)}, but all args are not symbolic.\n\n, {err}"
+                            f"InferMeta encountered {type(err)}, but all args are not symbolic."
                         )
                     )
 

From 744458e91273ce8d9ba17d3fd95fdd841ef3e339 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 02:43:39 +0800
Subject: [PATCH 30/33] Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../instruction/python_function_instruction.cc           | 9 +++++----
 paddle/fluid/pybind/eager_utils.cc                       | 4 ++--
 paddle/fluid/pybind/manual_static_op_function.h          | 4 +++-
 python/paddle/static/python_op.py                        | 3 ---
 test/dygraph_to_static/test_python_op.py                 | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index c5b01699d21152..d10fe4cb227e19 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -81,7 +81,7 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
         PADDLE_THROW(common::errors::Unimplemented(
             "Only support Tensor input type for now in "
             "PythonFunctionInstruction, "
-            "not support VectorType<DenseTensorType>."));
+            "does not support VectorType<DenseTensorType>."));
       } else {
         input_index++;
         python_function_ctx_.EmplaceBackInput(paddle::Tensor());
@@ -113,7 +113,7 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
                                         "now in PythonFunctionInstruction, "
                                         "not support Vector<DenseTensor>."));
     } else {
-      PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d] ",
+      PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d].",
                                                  var->Type()));
     }
   }
@@ -188,10 +188,11 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
       PADDLE_THROW(
           common::errors::Unimplemented("Only support DenseTensor output type "
                                         "for now in PythonFunctionInstruction, "
-                                        "not support VectorType."));
+                                        "does not support VectorType."));
     } else {
       PADDLE_THROW(common::errors::Unimplemented(
-          "only support DenseTensor and vector "));
+          "Only support DenseTensor and VectorType output types in "
+          "PythonFunctionInstruction."));
     }
   }
 
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index f4fa478df088cc..4236c262866c45 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -3631,12 +3631,12 @@ std::unordered_map<std::string, void*> ParsePythonOpAttrs(PyObject* py_dict) {
   PyObject* py_real_fn = PyDict_GetItemString(py_dict, "fn_ptr");
   if (!py_infer_meta || !py_real_fn) {
     PADDLE_THROW(common::errors::NotFound(
-        "Missing required keys 'infer_meta_fn_ptr' or 'fn_ptr' in op attrs"));
+        "Missing required keys 'infer_meta_fn_ptr' or 'fn_ptr' in op attrs."));
   }
 
   if (!PyCallable_Check(py_infer_meta) || !PyCallable_Check(py_real_fn)) {
     PADDLE_THROW(common::errors::InvalidType(
-        "Expected callable objects for 'infer_meta_fn_ptr' and 'fn_ptr'"));
+        "Expected callable objects for 'infer_meta_fn_ptr' and 'fn_ptr'."));
   }
 
   // Increase reference count to prevent garbage collection in C++
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index aebb154b461a63..8c6417f42b9d27 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1208,7 +1208,9 @@ static PyObject *run_python_op(PyObject *self,
       process_result.size(),
       outputs.size(),
       common::errors::InvalidArgument(
-          "%d and %d not equal!", process_result.size(), outputs.size()));
+          "Expected output size %d, but got %d.",
+          static_cast<int>(process_result.size()),
+          static_cast<int>(outputs.size())));
 
   dialect::ProcessMeshAttribute op_mesh;
   bool run_auto_parallel = false;
diff --git a/python/paddle/static/python_op.py b/python/paddle/static/python_op.py
index f78ff1a0e7ac37..cd67bb7d9c9a4a 100644
--- a/python/paddle/static/python_op.py
+++ b/python/paddle/static/python_op.py
@@ -31,9 +31,6 @@
 
 P1 = ParamSpec("P1")
 R1 = TypeVar("R1")
-P2 = ParamSpec("P2")
-R2 = TypeVar("R2")
-
 
 class MissingArgument:
     def __init__(self, fn: Callable[P1, R1], name: str):
diff --git a/test/dygraph_to_static/test_python_op.py b/test/dygraph_to_static/test_python_op.py
index 2cf02e616358f8..2c52771528e42e 100644
--- a/test/dygraph_to_static/test_python_op.py
+++ b/test/dygraph_to_static/test_python_op.py
@@ -93,7 +93,7 @@ def setUp(self):
 
 class TestFnWithNumPyOperation(unittest.TestCase, PythonOpTestMixin):
     def setUp(self):
-        self.fn = fn_with_breakgraph
+        self.fn = fn_with_numpy_operation
         self.inputs = {
             "x": paddle.randn([7, 8, 9]),
             "y": paddle.randn([7, 8, 9]),

From bffe3385bbcb0acd4d70c3c9726171f835a40349 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 02:45:21 +0800
Subject: [PATCH 31/33] Apply suggestions from code review

---
 .../new_executor/instruction/python_function_instruction.cc     | 2 +-
 paddle/fluid/pybind/manual_static_op_function.h                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index d10fe4cb227e19..b3000dd0a84b24 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -111,7 +111,7 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
       PADDLE_THROW(
           common::errors::Unimplemented("Only support Tensor input type for "
                                         "now in PythonFunctionInstruction, "
-                                        "not support Vector<DenseTensor>."));
+                                        "does not support Vector<DenseTensor>."));
     } else {
       PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d].",
                                                  var->Type()));
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 8c6417f42b9d27..8489cffe09308c 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1024,7 +1024,7 @@ auto CreatePyFuncRunner(void *py_func_ptr, const std::string &op_name) {
     if (raw_result == nullptr) {
       PyErr_Print();
       PADDLE_THROW(
-          common::errors::Fatal("Execution of the customPythonOp (%s) failed.\n"
+          common::errors::Fatal("Execution of the Python OP (%s) failed.\n"
                                 "Please review your code, and you may use "
                                 "breakpoint() for debugging.",
                                 op_name));

From 9fc149a9eec87e798804228b1fe0ba176c7aed42 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 02:48:13 +0800
Subject: [PATCH 32/33] format code

---
 .../instruction/python_function_instruction.cc            | 8 ++++----
 paddle/fluid/pybind/manual_static_op_function.h           | 7 +++----
 python/paddle/static/python_op.py                         | 1 +
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
index b3000dd0a84b24..5613bf7b87321e 100644
--- a/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/python_function_instruction.cc
@@ -108,10 +108,10 @@ void PythonFunctionInstruction::BuildPythonFunctionContext(
       python_in.set_impl(tensor_in);
       python_function_ctx_.EmplaceBackInput(std::move(python_in));
     } else if (var->IsType<VariableRefArray>()) {
-      PADDLE_THROW(
-          common::errors::Unimplemented("Only support Tensor input type for "
-                                        "now in PythonFunctionInstruction, "
-                                        "does not support Vector<DenseTensor>."));
+      PADDLE_THROW(common::errors::Unimplemented(
+          "Only support Tensor input type for "
+          "now in PythonFunctionInstruction, "
+          "does not support Vector<DenseTensor>."));
     } else {
       PADDLE_THROW(common::errors::Unimplemented("Not support var type [%d].",
                                                  var->Type()));
diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h
index 8489cffe09308c..c62990adb5181e 100644
--- a/paddle/fluid/pybind/manual_static_op_function.h
+++ b/paddle/fluid/pybind/manual_static_op_function.h
@@ -1207,10 +1207,9 @@ static PyObject *run_python_op(PyObject *self,
   PADDLE_ENFORCE_EQ(
       process_result.size(),
       outputs.size(),
-      common::errors::InvalidArgument(
-          "Expected output size %d, but got %d.",
-          static_cast<int>(process_result.size()),
-          static_cast<int>(outputs.size())));
+      common::errors::InvalidArgument("Expected output size %d, but got %d.",
+                                      static_cast<int>(process_result.size()),
+                                      static_cast<int>(outputs.size())));
 
   dialect::ProcessMeshAttribute op_mesh;
   bool run_auto_parallel = false;
diff --git a/python/paddle/static/python_op.py b/python/paddle/static/python_op.py
index cd67bb7d9c9a4a..2434b89d96e58f 100644
--- a/python/paddle/static/python_op.py
+++ b/python/paddle/static/python_op.py
@@ -32,6 +32,7 @@
 P1 = ParamSpec("P1")
 R1 = TypeVar("R1")
 
+
 class MissingArgument:
     def __init__(self, fn: Callable[P1, R1], name: str):
         self.fn = fn

From 858f7b514334706297688088c0a2d069803f7c52 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 19 Dec 2025 02:56:19 +0800
Subject: [PATCH 33/33] fix co_flags shadowing

---
 python/paddle/static/python_op.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/paddle/static/python_op.py b/python/paddle/static/python_op.py
index 2434b89d96e58f..a9a101c9b5436c 100644
--- a/python/paddle/static/python_op.py
+++ b/python/paddle/static/python_op.py
@@ -70,7 +70,6 @@ def eliminate_positional_or_keyword_only(
     assert isinstance(fn, types.FunctionType), "Only support regular function"
     code = fn.__code__
     co_flags: int = code.co_flags & ~HAS_VAR_ARGS_OR_KWARGS
-    co_flags = code.co_flags
 
     argcount = (
         code.co_argcount