From 9376f60e7a0d58c273aac48ccfc115536b075d03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9F=B3=E5=AE=97=E8=B0=B7?= Date: Thu, 31 Oct 2024 17:08:15 +0800 Subject: [PATCH] Add forward and backwardadaptation for MaxPool3d --- op_plugin/config/op_plugin_functions.yaml | 4 + ...ool3dWithIndicesBackwardKernelNpuOpApi.cpp | 90 ++++++++++ .../MaxPool3dWithIndicesKernelNpuOpApi.cpp | 157 ++++++++++++++++++ 3 files changed, 251 insertions(+) create mode 100644 op_plugin/ops/opapi/MaxPool3dWithIndicesBackwardKernelNpuOpApi.cpp create mode 100644 op_plugin/ops/opapi/MaxPool3dWithIndicesKernelNpuOpApi.cpp diff --git a/op_plugin/config/op_plugin_functions.yaml b/op_plugin/config/op_plugin_functions.yaml index 913e32bff..2c9ef8860 100644 --- a/op_plugin/config/op_plugin_functions.yaml +++ b/op_plugin/config/op_plugin_functions.yaml @@ -3679,15 +3679,19 @@ official: - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) acl_op: all_version + op_api: all_version - func: max_pool3d_with_indices.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) acl_op: all_version + op_api: all_version - func: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor acl_op: all_version + op_api: all_version - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) acl_op: all_version + op_api: all_version - func: maximum(Tensor self, Tensor other) -> Tensor acl_op: all_version diff --git a/op_plugin/ops/opapi/MaxPool3dWithIndicesBackwardKernelNpuOpApi.cpp b/op_plugin/ops/opapi/MaxPool3dWithIndicesBackwardKernelNpuOpApi.cpp new file mode 100644 index 000000000..df61ed930 --- /dev/null +++ b/op_plugin/ops/opapi/MaxPool3dWithIndicesBackwardKernelNpuOpApi.cpp @@ -0,0 +1,90 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "op_plugin/AclOpsInterface.h" +#include "op_plugin/OpApiInterface.h" +#include "op_plugin/utils/op_api_common.h" +#include "torch_npu/csrc/core/npu/NpuVariables.h" + +namespace op_api { +using npu_preparation = at_npu::native::OpPreparation; + +at::Tensor &max_pool3d_with_indices_backward_out(const at::Tensor &grad_output, + const at::Tensor &self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode, + const at::Tensor &indices, + at::Tensor &grad_input) +{ + DO_COMPATIBILITY(aclnnMaxPool3dWithArgmaxBackward, acl_op::max_pool3d_with_indices_backward_out(grad_output, self, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + indices, + grad_input)); + + static const bool is_supported = (c10_npu::GetSocVersion() >= c10_npu::SocVersion::Ascend910B1 && + c10_npu::GetSocVersion() < c10_npu::SocVersion::Ascend310B1) || + c10_npu::GetSocVersion() > c10_npu::SocVersion::Ascend310B4; + if (!is_supported) { + return acl_op::max_pool3d_with_indices_backward_out(grad_output, self, kernel_size, stride, padding, dilation, + ceil_mode, indices, grad_input); + } + + auto input_size = self.sizes(); + npu_preparation::check_tensor({grad_output}, grad_input, grad_output, input_size); + EXEC_NPU_CMD(aclnnMaxPool3dWithArgmaxBackward, grad_output, self, kernel_size, stride, padding, dilation, + ceil_mode, grad_input); + return grad_input; +} + +at::Tensor max_pool3d_with_indices_backward(const at::Tensor &grad_output, + const at::Tensor &self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode, + const at::Tensor &indices) +{ + DO_COMPATIBILITY(aclnnMaxPool3dWithArgmaxBackward, acl_op::max_pool3d_with_indices_backward(grad_output, + self, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + indices)); + + static const bool is_supported = (c10_npu::GetSocVersion() >= c10_npu::SocVersion::Ascend910B1 && + c10_npu::GetSocVersion() < c10_npu::SocVersion::Ascend310B1) || + c10_npu::GetSocVersion() > c10_npu::SocVersion::Ascend310B4; + if (!is_supported) { + return acl_op::max_pool3d_with_indices_backward(grad_output, self, kernel_size, stride, padding, dilation, + ceil_mode, indices); + } + + auto input_size = self.sizes(); + at::Tensor grad_input = npu_preparation::apply_tensor_without_format(grad_output, input_size); + EXEC_NPU_CMD(aclnnMaxPool3dWithArgmaxBackward, grad_output, self, indices, kernel_size, stride, padding, + dilation, ceil_mode, grad_input); + return grad_input; +} +} diff --git a/op_plugin/ops/opapi/MaxPool3dWithIndicesKernelNpuOpApi.cpp b/op_plugin/ops/opapi/MaxPool3dWithIndicesKernelNpuOpApi.cpp new file mode 100644 index 000000000..d31e7b3f0 --- /dev/null +++ b/op_plugin/ops/opapi/MaxPool3dWithIndicesKernelNpuOpApi.cpp @@ -0,0 +1,157 @@ +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "op_plugin/AclOpsInterface.h" +#include "op_plugin/OpApiInterface.h" +#include "op_plugin/utils/op_api_common.h" +#include "op_plugin/utils/KernelNpuOutputSize.h" +#include "torch_npu/csrc/core/npu/NpuVariables.h" + +namespace op_api { +using npu_preparation = at_npu::native::OpPreparation; + +void max_pool3d_with_indices_parameter_check(const at::Tensor &self, at::IntArrayRef kernel_size, + at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation) +{ + TORCH_CHECK(kernel_size.size() == 1 || kernel_size.size() == 3, + "max_pool3d: kernel_size must either be a single int, or a tuple of three ints", OPS_ERROR(ErrCode::PARAM)) + TORCH_CHECK(stride.size() == 0 || stride.size() == 1 || stride.size() == 3, + "max_pool3d: stride must either be omitted, a single int, or a tuple of three ints", OPS_ERROR(ErrCode::PARAM)) + TORCH_CHECK(padding.size() == 1 || padding.size() == 3, + "max_pool3d: padding must be either be a single int, or a tuple of three ints", OPS_ERROR(ErrCode::PARAM)); + TORCH_CHECK(dilation.size() == 1 || dilation.size() == 3, + "max_pool3d: dilation must be either a single int, or a tuple of three ints", OPS_ERROR(ErrCode::PARAM)); + TORCH_CHECK((self.ndimension() == 4 || self.ndimension() == 5), + "non-empty 4D or 5D (batch mode) tensor expected for input", OPS_ERROR(ErrCode::PARAM)); +} + +std::tuple exec_max_pool3d_with_indices( + const at::Tensor& self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode) +{ + max_pool3d_with_indices_parameter_check(self, kernel_size, stride, padding, dilation); + + const int k_D = at::native::safe_downcast(kernel_size[0]); + const int k_H = kernel_size.size() == 1 ? k_D : at::native::safe_downcast(kernel_size[1]); + const int k_W = kernel_size.size() == 1 ? k_D : at::native::safe_downcast(kernel_size[2]); + + // NB: stride default is not expressible as an integer constant, so we accept + // empty stride for this case + const int d_D = stride.empty() ? k_D : at::native::safe_downcast(stride[0]); + const int d_H = stride.empty() ? k_H : + stride.size() == 1 ? d_D : + at::native::safe_downcast(stride[1]); + const int d_W = stride.empty() ? k_W : + stride.size() == 1 ? d_D : + at::native::safe_downcast(stride[2]); + + const int pad_D = at::native::safe_downcast(padding[0]); + const int pad_H = padding.size() == 1 ? pad_D : at::native::safe_downcast(padding[1]); + const int pad_W = padding.size() == 1 ? pad_D : at::native::safe_downcast(padding[2]); + + const int dilation_D = at::native::safe_downcast(dilation[0]); + const int dilation_H = dilation.size() == 1 ? dilation_D : at::native::safe_downcast(dilation[1]); + const int dilation_W = dilation.size() == 1 ? dilation_D : at::native::safe_downcast(dilation[2]); + + const int64_t n_batch = self.ndimension() == 5 ? self.size(-5) : 1; + const int64_t n_slices = self.size(-4); + const int64_t input_time = self.size(-3); + const int64_t input_height = self.size(-2); + const int64_t input_width = self.size(-1); + + const int64_t output_time = + at::native::pooling_output_shape(input_time, k_D, pad_D, d_D, dilation_D, ceil_mode); + const int64_t output_height = + at::native::pooling_output_shape(input_height, k_H, pad_H, d_H, dilation_H, ceil_mode); + const int64_t output_width = + at::native::pooling_output_shape(input_width, k_W, pad_W, d_W, dilation_W, ceil_mode); + + at::native::pool3d_shape_check(self, + n_slices, + k_D, k_H, k_W, + d_D, d_H, d_W, + pad_D, pad_H, pad_W, + dilation_D, dilation_H, dilation_W, + input_time, input_height, input_width, + output_time, output_height, output_width, + "max_pool3d_with_indices"); + + c10::SmallVector output_size = + self.ndimension() == 5 ? + c10::SmallVector({n_batch, n_slices, output_time, output_height, output_width}) : + c10::SmallVector({n_slices, output_time, output_height, output_width}); + + at::Tensor output = npu_preparation::apply_tensor_without_format(output_size, self.options()); + + // The indices tensor can only be of int32 type + at::Tensor indices = npu_preparation::apply_tensor_without_format(output_size, self.options().dtype(at::kInt)); + + EXEC_NPU_CMD(aclnnMaxPool3dWithArgmax, self, kernel_size, + stride, padding, dilation, ceil_mode, output, indices); + + return std::tuple(output, indices); +} + +std::tuple max_pool3d_with_indices( + const at::Tensor& self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode) +{ + DO_COMPATIBILITY(aclnnMaxPool3dWithArgmax, acl_op::max_pool3d_with_indices(self, kernel_size, stride, + padding, dilation, ceil_mode)); + + static const bool is_supported = (c10_npu::GetSocVersion() >= c10_npu::SocVersion::Ascend910B1 && + c10_npu::GetSocVersion() < c10_npu::SocVersion::Ascend310B1) || + c10_npu::GetSocVersion() > c10_npu::SocVersion::Ascend310B4; + if (!is_supported) { + return acl_op::max_pool3d_with_indices(self, kernel_size, stride, padding, dilation, ceil_mode); + } + + return op_api::exec_max_pool3d_with_indices(self, kernel_size, stride, padding, dilation, ceil_mode); +} + +std::tuple max_pool3d_with_indices_out( + const at::Tensor& self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode, + at::Tensor& output, + at::Tensor& indices) +{ + DO_COMPATIBILITY(aclnnMaxPool3dWithArgmax, acl_op::max_pool3d_with_indices_out(self, kernel_size, stride, padding, + dilation, ceil_mode, output, indices)); + + static const bool is_supported = (c10_npu::GetSocVersion() >= c10_npu::SocVersion::Ascend910B1 && + c10_npu::GetSocVersion() < c10_npu::SocVersion::Ascend310B1) || + c10_npu::GetSocVersion() > c10_npu::SocVersion::Ascend310B4; + if (!is_supported) { + return acl_op::max_pool3d_with_indices_out(self, kernel_size, stride, padding, dilation, ceil_mode, output, indices); + } + + EXEC_NPU_CMD(aclnnMaxPool3dWithArgmax, self, kernel_size, stride, padding, dilation, ceil_mode, output, indices); + return std::tuple(output, indices); +} +} -- Gitee