/******************************************************************************* * Copyright 2024 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ /// @file /// ukernel C API #ifndef ONEAPI_DNNL_DNNL_UKERNEL_H #define ONEAPI_DNNL_DNNL_UKERNEL_H #include "oneapi/dnnl/dnnl.h" #include "oneapi/dnnl/dnnl_ukernel_types.h" #ifdef __cplusplus extern "C" { #endif /// @addtogroup dnnl_api /// @{ /// @addtogroup dnnl_api_ukernel /// @{ #ifdef DNNL_EXPERIMENTAL_UKERNEL /// Creates a ukernel attributes memory storage. /// /// @param attr_params Output ukernel attributes memory storage. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_ukernel_attr_params_create( dnnl_ukernel_attr_params_t *attr_params); /// Sets post-operations arguments to a storage. /// /// @param attr_params Memory pointers storage object. /// @param post_ops_args A pointer to pointers of post_ops storages. Expected to /// be packed together. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_post_ops_args( dnnl_ukernel_attr_params_t attr_params, const void **post_ops_args); /// Sets tensor A scales argument to a storage. /// /// @param attr_params Memory pointers storage object. /// @param a_scales Pointer to the scales storage. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_A_scales( dnnl_ukernel_attr_params_t attr_params, const void *a_scales); /// Sets tensor B scales argument to a storage. /// /// If `dnnl_brgemm_set_B_scales` used mask of 2, then at least N values of /// selected data type are expected. /// /// @param attr_params Memory pointers storage object. /// @param b_scales Pointer to the scales storage. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_B_scales( dnnl_ukernel_attr_params_t attr_params, const void *b_scales); /// Sets tensor D scales argument to a storage. /// /// @param attr_params Memory pointers storage object. /// @param d_scales Pointer to the scales storage. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_D_scales( dnnl_ukernel_attr_params_t attr_params, const void *d_scales); /// Destroys a ukernel attributes memory storage. /// /// @param attr_params Memory pointers storage object to destroy. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_ukernel_attr_params_destroy( dnnl_ukernel_attr_params_t attr_params); /// @addtogroup dnnl_api_ukernel_brgemm /// @{ /// Creates a BRGeMM ukernel object. Operates by the following formula: /// `C = [A x B]`. /// /// @param brgemm Output BRGeMM ukernel object. /// @param M Dimension M of tensor A. /// @param N Dimension N of tensor B. /// @param K Dimension K of tensors A and B. /// @param batch_size Number of batches to process. /// @param lda Leading dimension of tensor A. /// @param ldb Leading dimension of tensor B. /// @param ldc Leading dimension of tensor C. /// @param a_dt Data type of tensor A. /// @param b_dt Data type of tensor B. /// @param c_dt Data type of tensor C. Must be dnnl_f32. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_create(dnnl_brgemm_t *brgemm, dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t batch_size, dnnl_dim_t lda, dnnl_dim_t ldb, dnnl_dim_t ldc, dnnl_data_type_t a_dt, dnnl_data_type_t b_dt, dnnl_data_type_t c_dt); /// Sets adding an intermediate result to the output tensor C instead of /// writing: `C += [A x B]`. /// /// @param brgemm BRGeMM ukernel object. /// @param add_C Value to indicate addition. Can be `0` to skip addition, and /// `1` to apply addition. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_set_add_C(dnnl_brgemm_t brgemm, int add_C); /// Sets post-operations to a BRGeMM ukernel object: `D = post-operations(C)`. /// /// Post-operations applies if one of the following holds: /// * Non-empty attributes are specified. /// * Output data type `d_dt` is different from accumulation data type `c_dt`. /// /// If any of conditions happens, the final call of the accumulation chain /// must be `dnnl_brgemm_execute_postops`, and `dnnl_brgemm_execute`, otherwise. /// /// @param brgemm BRGeMM ukernel object. /// @param ldd Leading dimension of tensor D. /// @param d_dt Data type of tensor D. /// @param post_ops Primitive post operations attribute to extend the kernel /// operations. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_set_post_ops(dnnl_brgemm_t brgemm, dnnl_dim_t ldd, dnnl_data_type_t d_dt, const_dnnl_post_ops_t post_ops); /// Sets tensor A scales mask to a BRGeMM ukernel object. /// /// For quantization flavor tensor A scales apply to accumulation buffer once C /// is ready. /// /// @param brgemm BRGeMM ukernel object. /// @param a_scale_mask Tensor A scale mask. Can be `0` only. dnnl_status_t DNNL_API dnnl_brgemm_set_A_scales( dnnl_brgemm_t brgemm, int a_scale_mask); /// Sets tensor B scales mask to a BRGeMM ukernel object. /// /// For quantization flavor tensor B scales apply to accumulation buffer once C /// is ready. /// /// @param brgemm BRGeMM ukernel object. /// @param b_scale_mask Tensor B scale mask. Can be `0` and `2` only. dnnl_status_t DNNL_API dnnl_brgemm_set_B_scales( dnnl_brgemm_t brgemm, int b_scale_mask); /// Sets tensor D scales mask to a BRGeMM ukernel object. /// /// For quantization flavor tensor D scales apply after all post-ops are /// applied. /// /// @param brgemm BRGeMM ukernel object. /// @param d_scale_mask Tensor D scale mask. Can be `0` only. dnnl_status_t DNNL_API dnnl_brgemm_set_D_scales( dnnl_brgemm_t brgemm, int d_scale_mask); /// Finalizes initialization of a BRGeMM ukernel object. /// /// This step is mandatory to query information from the object. /// /// @param brgemm Output BRGeMM ukernel object. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_finalize(dnnl_brgemm_t brgemm); /// Returns the packing type expected by a tensor B of a BRGeMM ukernel object. /// /// @param brgemm BRGeMM ukernel object. /// @param pack_type Output packing type. Can be `dnnl_brgemm_no_pack` if /// packing is not expected, and `dnnl_brgemm_pack_32`, otherwise. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_get_B_pack_type( const_dnnl_brgemm_t brgemm, dnnl_pack_type_t *pack_type); /// Returns the size of a scratchpad memory needed for the BRGeMM ukernel /// object. /// /// @param brgemm BRGeMM ukernel object. /// @param size Output size of a buffer required for the BRGeMM ukernel object. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_get_scratchpad_size( const_dnnl_brgemm_t brgemm, size_t *size); /// Returns the flag indicating when the call to `dnnl_brgemm_execute_postops` /// is valid. /// /// @param brgemm BRGeMM ukernel object. /// @param valid The flag indicating if `dnnl_brgemm_execute_postops` is valid /// for a given ukernel object. `1` is for valid and `0`, otherwise. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_is_execute_postops_valid( const_dnnl_brgemm_t brgemm, int *valid); /// Initializes the hardware-specific context. If no initialization required, /// returns the success status. /// /// @param brgemm BRGeMM ukernel object. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_set_hw_context(const_dnnl_brgemm_t brgemm); /// Releases the hardware-specific context. Must be used after all the execution /// calls to BRGeMM ukernel objects. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_release_hw_context(); /// Generates an executable part of BRGeMM ukernel object. /// @param brgemm BRGeMM ukernel object. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_generate(dnnl_brgemm_t brgemm); /// Executes a BRGeMM ukernel object. /// /// @param brgemm BRGeMM ukernel object. /// @param A_ptr Base pointer to a tensor A. /// @param B_ptr Base pointer to a tensor B. /// @param A_B_offsets Pointer to the set of tensor A and tensor B offsets for /// each batch; the set must be contiguous in memory. Single batch should /// supply offsets for both tensors A and B simultaneously. The number of /// batches must coincide with the `batch_size` value passed at the creation /// stage. /// @param C_ptr Pointer to a tensor C (accumulation buffer). /// @param scratchpad_ptr Pointer to a scratchpad buffer. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_execute(const_dnnl_brgemm_t brgemm, const void *A_ptr, const void *B_ptr, const dnnl_dim_t *A_B_offsets, void *C_ptr, void *scratchpad_ptr); /// Executes a BRGeMM ukernel object with post operations. /// /// @param brgemm BRGeMM ukernel object. /// @param A Base pointer to a tensor A. /// @param B Base pointer to a tensor B. /// @param A_B_offsets Pointer to a set of tensor A and tensor B offsets for /// each batch. A set must be contiguous in memory. A single batch should /// supply offsets for both tensors A and B simultaneously. The number of /// batches must coincide with the `batch_size` value passed at the creation /// stage. /// @param C_ptr Pointer to a tensor C (accumulation buffer). /// @param D_ptr Pointer to a tensor D (output buffer). /// @param scratchpad_ptr Pointer to a scratchpad buffer. /// @param attr_params Ukernel attributes memory storage. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_execute_postops(const_dnnl_brgemm_t brgemm, const void *A, const void *B, const dnnl_dim_t *A_B_offsets, const void *C_ptr, void *D_ptr, void *scratchpad_ptr, const_dnnl_ukernel_attr_params_t attr_params); /// Destroys a BRGeMM ukernel object. /// /// @param brgemm BRGeMM ukernel object to destroy. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_brgemm_destroy(dnnl_brgemm_t brgemm); /// Creates a transform object. /// /// @param transform Output transform object. /// @param K Dimension K. /// @param N Dimension N. /// @param in_pack_type Input packing type. Must be one of /// `dnnl_pack_type_no_trans`, or `dnnl_pack_type_trans`. /// @param in_ld Input leading dimension. /// @param out_ld Output leading dimension. When packing data, it specifies a /// block by N dimension. /// @param in_dt Input data type. /// @param out_dt Output data type. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_transform_create(dnnl_transform_t *transform, dnnl_dim_t K, dnnl_dim_t N, dnnl_pack_type_t in_pack_type, dnnl_dim_t in_ld, dnnl_dim_t out_ld, dnnl_data_type_t in_dt, dnnl_data_type_t out_dt); /// Generates an executable part of transform object. /// @param transform Transform object. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_transform_generate(dnnl_transform_t transform); /// Executes a transform object. /// /// @param transform Transform object. /// @param in_ptr Pointer to an input buffer. /// @param out_ptr Pointer to an output buffer. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_transform_execute( const_dnnl_transform_t transform, const void *in_ptr, void *out_ptr); /// Destroys a transform object. /// /// @param transform Transform object. /// @returns #dnnl_success on success and a status describing the error /// otherwise. dnnl_status_t DNNL_API dnnl_transform_destroy(dnnl_transform_t transform); /// @} dnnl_api_ukernel_brgemm #endif /// @} dnnl_api_ukernel /// @} dnnl_api #ifdef __cplusplus } #endif #endif /* ONEAPI_DNNL_DNNL_UKERNEL_H */