Skip to content
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions paddle/fluid/pir/drr/src/ir_operation_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,11 @@ void OperationFactory::RegisterManualOpCreator() {
common::errors::InvalidArgument(
"'paddings' Attribute is expected for "
"Pool2dOp. "));
PADDLE_ENFORCE_EQ(attrs.find("dilations") != attrs.end(),
true,
common::errors::InvalidArgument(
"'dilations' Attribute is expected for "
"Pool2dOp. "));
std::vector<int64_t> paddings;
for (size_t i = 0;
i < attrs.at("paddings").dyn_cast<pir::ArrayAttribute>().size();
Expand All @@ -459,6 +464,17 @@ void OperationFactory::RegisterManualOpCreator() {
.data());
}

std::vector<int64_t> dilations;
for (size_t i = 0;
i < attrs.at("dilations").dyn_cast<pir::ArrayAttribute>().size();
i++) {
dilations.push_back(attrs.at("dilations")
.dyn_cast<pir::ArrayAttribute>()
.at(i)
.dyn_cast<pir::Int64Attribute>()
.data());
}

PADDLE_ENFORCE_EQ(attrs.find("ceil_mode") != attrs.end(),
true,
common::errors::InvalidArgument(
Expand Down Expand Up @@ -554,6 +570,7 @@ void OperationFactory::RegisterManualOpCreator() {
inputs[1],
strides,
paddings,
dilations,
ceil_mode,
exclusive,
data_format,
Expand Down
188 changes: 158 additions & 30 deletions paddle/phi/kernels/funcs/pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class Pool2dFunctor<CPUContext, PoolProcess, T> {
const std::vector<int64_t>& ksize,
const std::vector<int64_t>& strides,
const std::vector<int64_t>& paddings,
const std::vector<int64_t>& dilations,
Copy link
Contributor

@zhwesky2010 zhwesky2010 Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

infermeta、infersy_mbolic 没有添加。单测应该是过不了的。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

研发大大您好,新增的 dilations 不影响 shape,infermeta、infer_symbolic 用之前就可以了

const std::string data_format,
bool exclusive,
bool adaptive,
Expand Down Expand Up @@ -65,6 +66,9 @@ class Pool2dFunctor<CPUContext, PoolProcess, T> {
const int64_t padding_height = paddings[0];
const int64_t padding_width = paddings[1];

const int64_t dilation_height = dilations[0];
const int64_t dilation_width = dilations[1];

const T* input_data = input.data<T>();
T* output_data = context.template Alloc<T>(output);

Expand Down Expand Up @@ -94,15 +98,19 @@ class Pool2dFunctor<CPUContext, PoolProcess, T> {
std::min(wstart + ksize_width, input_width + padding_width);
pool_size = (hend - hstart) * (wend - wstart);

wstart = std::max(wstart, static_cast<int64_t>(0));
hstart = std::max(hstart, static_cast<int64_t>(0));
hend = std::min(hend, input_height);
wend = std::min(wend, input_width);
hend = hstart + (ksize_height - 1) * dilation_height + 1;
wend = wstart + (ksize_width - 1) * dilation_width + 1;
while (hstart < static_cast<int64_t>(0))
hstart += dilation_height;
while (hend > input_height) hend -= dilation_height;
while (wstart < static_cast<int64_t>(0))
wstart += dilation_width;
while (wend > input_width) wend -= dilation_width;
}

T ele = pool_process.initial();
for (int64_t h = hstart; h < hend; ++h) {
for (int64_t w = wstart; w < wend; ++w) {
for (int64_t h = hstart; h < hend; h += dilation_height) {
for (int64_t w = wstart; w < wend; w += dilation_width) {
pool_process.compute(input_data[h * input_width + w], &ele);
}
}
Expand Down Expand Up @@ -142,14 +150,18 @@ class Pool2dFunctor<CPUContext, PoolProcess, T> {
std::min(wstart + ksize_width, input_width + padding_width);
pool_size = (hend - hstart) * (wend - wstart);

wstart = std::max(wstart, static_cast<int64_t>(0));
hstart = std::max(hstart, static_cast<int64_t>(0));
hend = std::min(hend, input_height);
wend = std::min(wend, input_width);
hend = hstart + (ksize_height - 1) * dilation_height + 1;
wend = wstart + (ksize_width - 1) * dilation_width + 1;
while (hstart < static_cast<int64_t>(0))
hstart += dilation_height;
while (hend > input_height) hend -= dilation_height;
while (wstart < static_cast<int64_t>(0))
wstart += dilation_width;
while (wend > input_width) wend -= dilation_width;
}
T ele = pool_process.initial();
for (int64_t h = hstart; h < hend; ++h) {
for (int64_t w = wstart; w < wend; ++w) {
for (int64_t h = hstart; h < hend; h += dilation_height) {
for (int64_t w = wstart; w < wend; w += dilation_width) {
pool_process.compute(
input_data[h * input_width * input_channels +
w * input_channels + c],
Expand Down Expand Up @@ -189,6 +201,7 @@ class Pool2dGradFunctor<CPUContext, PoolProcess, T> {
const std::vector<int64_t>& ksize,
const std::vector<int64_t>& strides,
const std::vector<int64_t>& paddings,
const std::vector<int64_t>& dilations UNUSED,
const std::string data_format,
bool exclusive,
bool adaptive,
Expand Down Expand Up @@ -355,6 +368,7 @@ class MaxPool2dGradFunctor<CPUContext, T> {
const std::vector<int64_t>& ksize,
const std::vector<int64_t>& strides,
const std::vector<int64_t>& paddings,
const std::vector<int64_t>& dilations,
const std::string data_format,
DenseTensor* input_grad) {
bool channel_last = (data_format == "NHWC");
Expand Down Expand Up @@ -384,6 +398,9 @@ class MaxPool2dGradFunctor<CPUContext, T> {
const int64_t padding_height = paddings[0];
const int64_t padding_width = paddings[1];

const int64_t dilation_height = dilations[0];
const int64_t dilation_width = dilations[1];

const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
Expand All @@ -396,16 +413,21 @@ class MaxPool2dGradFunctor<CPUContext, T> {
for (int64_t c = 0; c < output_channels; ++c) {
for (int64_t ph = 0; ph < output_height; ++ph) {
int64_t hstart = ph * stride_height - padding_height;
int64_t hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, static_cast<int64_t>(0));
int64_t hend = hstart + (ksize_height - 1) * dilation_height + 1;
while (hstart < static_cast<int64_t>(0)) hstart += dilation_height;
while (hend > input_height) hend -= dilation_height;

for (int64_t pw = 0; pw < output_width; ++pw) {
int64_t wstart = pw * stride_width - padding_width;
int64_t wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, static_cast<int64_t>(0));
int64_t wend = wstart + (ksize_width - 1) * dilation_width + 1;
while (wstart < static_cast<int64_t>(0)) wstart += dilation_width;
while (wend > input_width) wend -= dilation_width;

bool stop = false;
for (int64_t h = hstart; h < hend && !stop; ++h) {
for (int64_t w = wstart; w < wend && !stop; ++w) {
for (int64_t h = hstart; h < hend && !stop;
h += dilation_height) {
for (int64_t w = wstart; w < wend && !stop;
w += dilation_width) {
int64_t input_idx = h * input_width + w;
int64_t output_idx = ph * output_width + pw;
if (input_data[input_idx] == output_data[output_idx]) {
Expand All @@ -430,16 +452,20 @@ class MaxPool2dGradFunctor<CPUContext, T> {
for (int64_t c = 0; c < output_channels; ++c) {
for (int64_t ph = 0; ph < output_height; ++ph) {
int64_t hstart = ph * stride_height - padding_height;
int64_t hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, static_cast<int64_t>(0));
int64_t hend = hstart + (ksize_height - 1) * dilation_height + 1;
while (hstart < static_cast<int64_t>(0)) hstart += dilation_height;
while (hend > input_height) hend -= dilation_height;
for (int64_t pw = 0; pw < output_width; ++pw) {
int64_t wstart = pw * stride_width - padding_width;
int64_t wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, static_cast<int64_t>(0));
int64_t wend = wstart + (ksize_width - 1) * dilation_width + 1;
while (wstart < static_cast<int64_t>(0)) wstart += dilation_width;
while (wend > input_width) wend -= dilation_width;

bool stop = false;
for (int64_t h = hstart; h < hend && !stop; ++h) {
for (int64_t w = wstart; w < wend && !stop; ++w) {
for (int64_t h = hstart; h < hend && !stop;
h += dilation_height) {
for (int64_t w = wstart; w < wend && !stop;
w += dilation_width) {
int64_t input_idx =
h * input_width * input_channels + w * input_channels + c;
int64_t output_idx = ph * output_width * output_channels +
Expand Down Expand Up @@ -1103,6 +1129,102 @@ template class Pool3dGradFunctor<phi::CPUContext,
template class Pool3dGradFunctor<phi::CPUContext,
AvgPoolGrad<dtype::float16>,
dtype::float16>;

// native版本,先不删
// /*
// * All tensors are in NCHW format.
// * Ksize, strides, paddings, dilations are two elements. These two elements
// represent
// * height and width, respectively.
// */
// template <typename T1, typename T2>
// class MaxPool2dWithIndexFunctor<CPUContext, T1, T2> {
// public:
// void operator()(const CPUContext& context,
// const DenseTensor& input,
// const std::vector<int64_t>& ksize,
// const std::vector<int64_t>& strides,
// const std::vector<int64_t>& paddings,
// const std::vector<int64_t>& dilations,
// bool adaptive,
// DenseTensor* output,
// DenseTensor* mask) {
// const int64_t batch_size = input.dims()[0];
// const int64_t input_height = input.dims()[2];
// const int64_t input_width = input.dims()[3];
// const int64_t output_channels = output->dims()[1];
// const int64_t output_height = output->dims()[2];
// const int64_t output_width = output->dims()[3];
// const int64_t ksize_height = ksize[0];
// const int64_t ksize_width = ksize[1];
// const int64_t stride_height = strides[0];
// const int64_t stride_width = strides[1];
// const int64_t padding_height = paddings[0];
// const int64_t padding_width = paddings[1];
// const int64_t dilation_height = dilations[0];
// const int64_t dilation_width = dilations[1];
// const int64_t input_stride = input_height * input_width;
// const int64_t output_stride = output_height * output_width;

// const T1* input_data = input.data<T1>();
// T1* output_data = context.template Alloc<T1>(output);
// T2* mask_data = context.template Alloc<T2>(mask);

// int64_t hstart = 0, hend = 0;
// int64_t wstart = 0, wend = 0;
// for (int64_t i = 0; i < batch_size; i++) {
// for (int64_t c = 0; c < output_channels; ++c) {
// for (int64_t ph = 0; ph < output_height; ++ph) {
// if(adaptive){
// hstart = AdaptStartIndex(ph, input_height, output_height);
// hend = AdaptEndIndex(ph, input_height, output_height);
// }else{
// hstart = ph * stride_height - padding_height;
// }
// for(int64_t pw=0;pw<output_width;++pw){
// T1 ele = static_cast<T1>(-FLT_MAX);
// int64_t index = -1;
// if(adaptive){
// wstart = AdaptStartIndex(pw, input_width, output_width);
// wend = AdaptEndIndex(pw, input_width, output_width);
// for (int64_t h = hstart; h < hend; ++h) {
// for (int64_t w = wstart; w < wend; ++w) {
// if (ele < input_data[h * input_width + w]) {
// ele = input_data[h * input_width + w];
// index = h * input_width + w;
// }
// }
// }
// }else{
// wstart=pw*stride_width-padding_width;
// for (int64_t kh=0;kh<ksize_height;++kh){
// int64_t h=hstart+kh*dilation_height;
// if (h < 0 || h >= input_height)
// continue;
// for(int64_t kw=0;kw<ksize_width;++kw){
// int64_t w=wstart+kw*dilation_width;
// if(w<0||w>=input_width)
// continue;
// if (ele < input_data[h * input_width + w]) {
// ele = input_data[h * input_width + w];
// index = h * input_width + w;
// }
// }
// }
// }
// output_data[ph * output_width + pw] = ele;
// mask_data[ph * output_width + pw] = index;
// }
// }
// // offset
// input_data += input_stride;
// output_data += output_stride;
// mask_data += output_stride;
// }
// }
// }
// };

/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent
Expand All @@ -1116,6 +1238,7 @@ class MaxPool2dWithIndexFunctor<CPUContext, T1, T2> {
const std::vector<int64_t>& ksize,
const std::vector<int64_t>& strides,
const std::vector<int64_t>& paddings,
const std::vector<int64_t>& dilations,
bool adaptive,
DenseTensor* output,
DenseTensor* mask) {
Expand All @@ -1131,6 +1254,8 @@ class MaxPool2dWithIndexFunctor<CPUContext, T1, T2> {
const int64_t stride_width = strides[1];
const int64_t padding_height = paddings[0];
const int64_t padding_width = paddings[1];
const int64_t dilation_height = dilations[0];
const int64_t dilation_width = dilations[1];
const int64_t input_stride = input_height * input_width;
const int64_t output_stride = output_height * output_width;

Expand All @@ -1148,23 +1273,25 @@ class MaxPool2dWithIndexFunctor<CPUContext, T1, T2> {
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, static_cast<int64_t>(0));
hend = hstart + (ksize_height - 1) * dilation_height + 1;
while (hstart < static_cast<int64_t>(0)) hstart += dilation_height;
while (hend > input_height) hend -= dilation_height;
}
for (int64_t pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, static_cast<int64_t>(0));
wend = wstart + (ksize_width - 1) * dilation_width + 1;
while (wstart < static_cast<int64_t>(0)) wstart += dilation_width;
while (wend > input_width) wend -= dilation_width;
}

T1 ele = static_cast<T1>(-FLT_MAX);
int64_t index = -1;
for (int64_t h = hstart; h < hend; ++h) {
for (int64_t w = wstart; w < wend; ++w) {
for (int64_t h = hstart; h < hend; h += dilation_height) {
for (int64_t w = wstart; w < wend; w += dilation_width) {
if (ele < input_data[h * input_width + w]) {
ele = input_data[h * input_width + w];
index = h * input_width + w;
Expand Down Expand Up @@ -1198,6 +1325,7 @@ class MaxPool2dWithIndexGradFunctor<CPUContext, T1, T2> {
const std::vector<int64_t>& ksize UNUSED,
const std::vector<int64_t>& strides UNUSED,
const std::vector<int64_t>& paddings UNUSED,
const std::vector<int64_t>& dilations UNUSED,
bool adaptive UNUSED,
DenseTensor* input_grad) {
const int64_t batch_size = input_grad->dims()[0];
Expand Down
Loading
Loading