1 Star 0 Fork 450

sodojun/Paddle

forked from PaddlePaddle/Paddle 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
operators.cmake 29.41 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
# CMake file `unity_build` is used to handle Unity Build compilation.
include(unity_build)
set(PART_CUDA_KERNEL_FILES)
function(find_register FILENAME PATTERN OUTPUT)
# find the op_name of REGISTER_OPERATOR(op_name, ...), REGISTER_OP_CPU_KERNEL(op_name, ...) , etc.
# set op_name to OUTPUT
set(options "")
set(oneValueArgs "")
set(multiValueArgs "")
file(READ ${FILENAME} CONTENT)
# message ("number of arguments sent to function: ${ARGC}")
# message ("all function arguments: ${ARGV}")
# message("PATTERN ${PATTERN}")
string(REGEX MATCH "${PATTERN}\\([ \t\r\n]*[a-z0-9_]*," register "${CONTENT}")
if(NOT register STREQUAL "")
string(REPLACE "${PATTERN}(" "" register "${register}")
string(REPLACE "," "" register "${register}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" register "${register}")
endif()
set(${OUTPUT}
${register}
PARENT_SCOPE)
endfunction()
function(find_phi_register FILENAME ADD_PATH PATTERN)
# set op_name to OUTPUT
file(READ ${FILENAME} CONTENT)
string(
REGEX
MATCH
"${PATTERN}\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*"
register
"${CONTENT}")
if(NOT register STREQUAL "")
string(REPLACE "${PATTERN}(" "" register "${register}")
string(REPLACE "," ";" register "${register}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" register "${register}")
string(REGEX REPLACE "//cuda_only" "" register "${register}")
list(GET register 0 kernel_name)
list(GET register 1 kernel_backend)
list(GET register 2 kernel_layout)
file(
APPEND ${ADD_PATH}
"PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
)
endif()
endfunction()
# Just for those gpu kernels locating at "fluid/operators/", such as 'class_center_sample_op.cu'.
# Add other file modes if need in the future.
function(register_cu_kernel TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(register_cu_kernel "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(cu_srcs)
set(op_common_deps operator op_registry layer common_infer_shape_functions)
foreach(cu_src ${register_cu_kernel_SRCS})
if(${cu_src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${cu_src})
endif()
endforeach()
list(LENGTH cu_srcs cu_srcs_len)
if(${cu_srcs_len} EQUAL 0)
message(
FATAL_ERROR
"The GPU kernel file of ${TARGET} should contains at least one .cu file"
)
endif()
if(WITH_GPU)
nv_library(
${TARGET}
SRCS ${cu_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
elseif(WITH_ROCM)
hip_library(
${TARGET}
SRCS ${cu_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
set(OP_LIBRARY
${TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
foreach(cu_src ${cu_srcs})
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${cu_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
endif()
endforeach()
endfunction()
# Just for those mkldnn kernels locating at "fluid/operators/mkldnn/", such as 'layer_norm_mkldnn_op.cc'.
# Add other file modes if need in the future.
function(register_mkldnn_kernel TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(register_mkldnn_kernel "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(mkldnn_cc_srcs)
set(op_common_deps operator op_registry phi layer
common_infer_shape_functions)
foreach(mkldnn_src ${register_mkldnn_kernel_SRCS})
if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs mkldnn/${mkldnn_src})
endif()
endforeach()
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
if(${mkldnn_cc_srcs_len} EQUAL 0)
message(
FATAL_ERROR
"The MKLDNN kernel file of ${TARGET} should contains at least one *.*_mkldnn_op.cc file"
)
endif()
if(WITH_MKLDNN)
cc_library(
${TARGET}
SRCS ${mkldnn_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
set(OP_LIBRARY
${TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
foreach(mkldnn_src ${mkldnn_cc_srcs})
set(op_name "")
find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n")
endif()
endforeach()
endfunction()
function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# for ops.
set(cc_srcs)
set(cu_srcs)
set(hip_srcs)
set(cu_cc_srcs)
set(hip_cc_srcs)
set(xpu_cc_srcs)
set(xpu_kp_cc_srcs)
set(cudnn_cu_cc_srcs)
set(miopen_cu_cc_srcs)
set(cudnn_cu_srcs)
set(miopen_cu_srcs)
set(CUDNN_FILE)
set(MIOPEN_FILE)
set(mkldnn_cc_srcs)
set(MKLDNN_FILE)
set(op_common_deps operator op_registry phi layer
common_infer_shape_functions)
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
set(options UNITY)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
set(pybind_flag 0)
cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
list(LENGTH op_library_SRCS op_library_SRCS_len)
if(${op_library_SRCS_len} EQUAL 0)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
list(APPEND cc_srcs ${TARGET}.cc)
endif()
if(WITH_GPU)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu)
endif()
# rename in KP: .kps -> .cu
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps
${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES}
PARENT_SCOPE)
list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif()
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
endif()
endif()
if(WITH_ROCM)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND hip_cc_srcs ${TARGET}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND hip_srcs ${TARGET}.cu)
endif()
# rename in KP: .kps -> .cu
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps
${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES}
PARENT_SCOPE)
list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif()
string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc)
list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu)
list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu)
endif()
endif()
if(WITH_MKLDNN)
string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc)
list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc)
endif()
endif()
if(WITH_XPU)
string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
endif()
endif()
if(WITH_XPU_KP)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu)
list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
list(APPEND xpu_kp_cc_srcs ${TARGET}.kps)
endif()
endif()
else()
foreach(src ${op_library_SRCS})
if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND miopen_cu_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$")
list(APPEND hip_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND miopen_cu_cc_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$")
list(APPEND hip_cc_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND cudnn_cu_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND cudnn_cu_cc_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$")
list(APPEND cu_cc_srcs ${src})
elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
list(APPEND xpu_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
elseif((WITH_ROCM OR WITH_GPU) AND ${src} MATCHES ".*\\.kps$")
string(REPLACE ".kps" ".cu" src_cu ${src})
file(COPY ${src} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${src}
${CMAKE_CURRENT_BINARY_DIR}/${src_cu})
if(WITH_ROCM)
list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${src_cu})
else()
list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${src_cu})
endif()
else()
message(
FATAL_ERROR
"${TARGET} Source file ${src} should only be .cc or .cu or .xpu")
endif()
endforeach()
endif()
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len)
list(LENGTH cc_srcs cc_srcs_len)
if(${cc_srcs_len} EQUAL 0)
message(
FATAL_ERROR
"The op library ${TARGET} should contains at least one .cc file")
endif()
if(WIN32)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op")
if("${TARGET}" STREQUAL "${windows_unsupport_op}")
return()
endif()
endforeach()
endif()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Generate the unity target name by the directory where source files located.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET
${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY)
set(OP_LIBRARY
${UNITY_TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
endif()
else()
set(OP_LIBRARY
${TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
endif()
list(LENGTH op_library_DEPS op_library_DEPS_len)
if(${op_library_DEPS_len} GREATER 0)
set(DEPS_OPS
${TARGET} ${DEPS_OPS}
PARENT_SCOPE)
endif()
if(WITH_GPU)
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc and cu source files.
compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs}
${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs})
compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs}
${cu_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}
${unity_target_cu_sources})
else()
# If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
nv_library(
${UNITY_TARGET}
SRCS ${unity_target_cc_sources} ${unity_target_cu_sources}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
# Add alias library to handle dependencies.
add_library(${TARGET} ALIAS ${UNITY_TARGET})
else()
nv_library(
${TARGET}
SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs}
${mkldnn_cc_srcs} ${cu_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
elseif(WITH_ROCM)
list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "cholesky_solve_op.cu")
list(REMOVE_ITEM hip_srcs "lu_op.cu")
list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu")
list(REMOVE_ITEM hip_srcs "svd_op.cu")
list(REMOVE_ITEM hip_srcs "eigvalsh_op.cu")
list(REMOVE_ITEM hip_srcs "qr_op.cu")
list(REMOVE_ITEM hip_srcs "eigh_op.cu")
list(REMOVE_ITEM hip_srcs "lstsq_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "multiclass_nms3_op.cu")
hip_library(
${TARGET}
SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs}
${mkldnn_cc_srcs} ${hip_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
elseif(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
xpu_library(
${TARGET}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
else()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc source files.
compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs}
${mkldnn_cc_srcs} ${xpu_cc_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
else()
# If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
cc_library(
${UNITY_TARGET}
SRCS ${unity_target_cc_sources}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
# Add alias library to handle dependencies.
add_library(${TARGET} ALIAS ${UNITY_TARGET})
else()
cc_library(
${TARGET}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
endif()
list(LENGTH cu_srcs cu_srcs_len)
list(LENGTH hip_srcs hip_srcs_len)
list(LENGTH cu_cc_srcs cu_cc_srcs_len)
list(LENGTH hip_cc_srcs hip_cc_srcs_len)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
# Define operators that don't need pybind here.
foreach(
manual_pybind_op
"compare_all_op"
"compare_op"
"logical_op"
"bitwise_op"
"nccl_op"
"tensor_array_read_write_op"
"tensorrt_engine_op")
if("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
endforeach()
# The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
# Note that it's enough to just adding one operator to pybind in a *_op.cc file.
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
set(ORIGINAL_TARGET ${TARGET})
string(REGEX REPLACE "_op" "" TARGET "${TARGET}")
foreach(cc_src ${cc_srcs})
# pybind USE_OP_ITSELF
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cc_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${cc_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${cc_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${cc_src} "REGISTER_OPERATOR" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
# pybind USE_OP_ITSELF
set(op_name "")
# Add PHI Kernel Registry Message
find_register(${cc_src} "REGISTER_ACTIVATION_OP" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
set(op_name "")
find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
# pybind USE_OP_DEVICE_KERNEL for CPU
set(op_name "")
find_register(${cc_src} "REGISTER_OP_CPU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n")
# why change TARGET here?
# when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
# in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add
# and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h
# however, grad_add has no mkldnn kernel.
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for CUDA
list(APPEND cu_srcs ${cu_cc_srcs})
# message("cu_srcs ${cu_srcs}")
foreach(cu_src ${cu_srcs})
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${cu_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for operators/mkldnn/*
list(APPEND mkldnn_srcs ${mkldnn_cc_srcs})
foreach(mkldnn_src ${mkldnn_srcs})
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${mkldnn_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${mkldnn_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${mkldnn_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${mkldnn_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for ROCm
list(APPEND hip_srcs ${hip_cc_srcs})
# message("hip_srcs ${hip_srcs}")
foreach(hip_src ${hip_srcs})
set(op_name "")
find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name)
find_phi_register(${hip_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${hip_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${hip_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for CUDNN/MIOPEN
list(APPEND cudnn_cu_srcs ${cudnn_cu_cc_srcs})
list(APPEND cudnn_cu_srcs ${miopen_cu_cc_srcs})
list(APPEND cudnn_cu_srcs ${miopen_cu_srcs})
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
#message("cudnn_cu_srcs ${cudnn_cu_srcs}")
if(${cudnn_cu_srcs_len} GREATER 0 AND ${ORIGINAL_TARGET} STREQUAL
"activation_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else()
foreach(cudnn_src ${cudnn_cu_srcs})
set(op_name "")
find_register(${cudnn_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDNN);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
if(WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
if(${ORIGINAL_TARGET} STREQUAL "activation_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, XPU);\n")
else()
foreach(xpu_src ${xpu_cc_srcs})
set(op_name "")
find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL" op_name)
find_phi_register(${xpu_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
set(pybind_flag 1)
else()
find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL_FUNCTOR" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
set(pybind_flag 1)
endif()
endif()
endforeach()
endif()
endif()
# pybind USE_OP_DEVICE_KERNEL for XPU KP
if(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
foreach(xpu_kp_src ${xpu_kp_cc_srcs})
set(op_name "")
find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name)
find_phi_register(${xpu_kp_src} ${pybind_file}
"PD_REGISTER_STRUCT_KERNEL")
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n")
message(STATUS "Building KP Target: ${op_name}")
set(pybind_flag 1)
endif()
endforeach()
endif()
# pybind USE_OP_DEVICE_KERNEL for MKLDNN
if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
# Append first implemented MKLDNN activation operator
if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n")
else()
foreach(mkldnn_src ${mkldnn_cc_srcs})
set(op_name "")
find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
endif()
# pybind USE_NO_KERNEL_OP
# HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
string(REPLACE "_op" "" TARGET "${TARGET}")
if(${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
set(pybind_flag 1)
endif()
# pybind USE_OP
if(${pybind_flag} EQUAL 0)
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n")
elseif(${TARGET} STREQUAL "fake_dequantize")
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
elseif(${TARGET} STREQUAL "fake_quantize")
file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n")
elseif(${TARGET} STREQUAL "tensorrt_engine_op")
message(
STATUS
"Pybind skips [tensorrt_engine_op], for this OP is only used in inference"
)
else()
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
endif()
endif()
endfunction()
function(register_operators)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
file(
GLOB OPS
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*_op.cc")
string(REPLACE "_mkldnn" "" OPS "${OPS}")
string(REPLACE "_xpu" "" OPS "${OPS}")
string(REPLACE ".cc" "" OPS "${OPS}")
list(REMOVE_DUPLICATES OPS)
list(LENGTH register_operators_DEPS register_operators_DEPS_len)
foreach(src ${OPS})
list(FIND register_operators_EXCLUDES ${src} _index)
if(${_index} EQUAL -1)
if(${register_operators_DEPS_len} GREATER 0)
op_library(${src} UNITY DEPS ${register_operators_DEPS})
else()
op_library(${src} UNITY)
endif()
endif()
endforeach()
# Complete the processing of `UNITY_TARGET`.
if(WITH_UNITY_BUILD)
finish_unity_target(cc)
if(WITH_GPU)
finish_unity_target(cu)
endif()
endif()
endfunction()
function(prune_pybind_h)
set(op_list ${OP_LIST})
list(APPEND op_list "load_combine")
list(APPEND op_list "tensorrt_engine")
# TODO(ming1753): conditional_block_infer is temporarily reserved here to avoid link errors in functions of standalone_executor
list(APPEND op_list "conditional_block_infer")
# add fused_op in op_list
list(APPEND op_list "fc")
list(APPEND op_list "fused_conv2d_add_act")
list(APPEND op_list "fusion_seqconv_eltadd_relu")
list(APPEND op_list "fusion_seqpool_cvm_concat")
list(APPEND op_list "fusion_gru")
list(APPEND op_list "fusion_seqexpand_concat_fc")
list(APPEND op_list "fusion_repeated_fc_relu")
list(APPEND op_list "fusion_squared_mat_sub")
# add plugin_op in op_list
list(APPEND op_list "anchor_generator")
file(STRINGS ${pybind_file} op_registry_list)
file(WRITE ${pybind_file_prune} "")
file(
APPEND ${pybind_file_prune}
"// Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n"
)
# add USE_OP_ITSELF for all op in op_list
foreach(op_name IN LISTS op_list)
file(APPEND ${pybind_file_prune} "USE_OP_ITSELF(${op_name});\n")
endforeach()
foreach(op_registry IN LISTS op_registry_list)
if(NOT ${op_registry} EQUAL "")
foreach(op_name IN LISTS op_list)
string(FIND ${op_registry} "(${op_name})" index1)
string(FIND ${op_registry} "(${op_name}," index2)
string(FIND ${op_registry} "USE_OP_ITSELF" index3)
if(((NOT ${index1} EQUAL "-1") OR (NOT ${index2} EQUAL "-1"))
AND (${index3} EQUAL "-1"))
file(APPEND ${pybind_file_prune} "${op_registry}\n")
endif()
endforeach()
endif()
endforeach()
file(WRITE ${pybind_file} "")
file(STRINGS ${pybind_file_prune} op_registry_list_tmp)
foreach(op_name IN LISTS op_registry_list_tmp)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "${op_name}\n")
endif()
endforeach()
endfunction()
function(append_op_util_declare TARGET)
file(READ ${TARGET} target_content)
string(REGEX MATCH "(PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar "${target_content}")
if(NOT ${util_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
endif()
endfunction()
function(append_op_kernel_map_declare TARGET)
file(READ ${TARGET} target_content)
string(
REGEX
MATCH
"(PD_REGISTER_BASE_KERNEL_NAME)\\([ \t\r\n]*[a-z0-9_]*,[ \\\t\r\n]*[a-z0-9_]*"
kernel_mapping_registrar
"${target_content}")
if(NOT ${kernel_mapping_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
kernel_mapping_declare "${kernel_mapping_registrar}")
string(APPEND kernel_mapping_declare ");\n")
file(APPEND ${op_utils_header} "${kernel_mapping_declare}")
endif()
endfunction()
function(register_op_utils TARGET_NAME)
set(utils_srcs)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
file(GLOB SIGNATURES
"${PADDLE_SOURCE_DIR}/paddle/fluid/operators/ops_signature/*_sig.cc")
foreach(target ${SIGNATURES})
append_op_util_declare(${target})
append_op_kernel_map_declare(${target})
list(APPEND utils_srcs ${target})
endforeach()
cc_library(
${TARGET_NAME}
SRCS ${utils_srcs}
DEPS ${register_op_utils_DEPS})
endfunction()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/sodojun/Paddle.git
[email protected]:sodojun/Paddle.git
sodojun
Paddle
Paddle
develop

搜索帮助