/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

/// @file
/// C API

#ifndef ONEAPI_DNNL_DNNL_H
#define ONEAPI_DNNL_DNNL_H

#include "oneapi/dnnl/dnnl_common.h"
#include "oneapi/dnnl/dnnl_config.h"
#include "oneapi/dnnl/dnnl_types.h"
#include "oneapi/dnnl/dnnl_version.h"

#ifdef __cplusplus
extern "C" {
#endif

/// @addtogroup dnnl_api
/// @{

/// @addtogroup dnnl_api_primitives
/// @{

/// @addtogroup dnnl_api_primitives_common
/// @{

/// Changes the primitive descriptor to point to the next available
/// implementation.
///
/// @param primitive_desc A primitive descriptor to change.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
/// @returns #dnnl_last_impl_reached if no more implementations available,
/// in which case the primitive descriptor itself is kept unchanged.
dnnl_status_t DNNL_API dnnl_primitive_desc_next_impl(
        dnnl_primitive_desc_t primitive_desc);

/// Clones a primitive descriptor. The resulting primitive descriptor must be
/// destroyed separately.
///
/// @param primitive_desc Output primitive descriptor.
/// @param existing_primitive_desc Primitive descriptor to clone.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_desc_clone(
        dnnl_primitive_desc_t *primitive_desc,
        const_dnnl_primitive_desc_t existing_primitive_desc);

/// Returns a constant reference to the attributes of a primitive descriptor.
///
/// @warning
///     It is an error to destroy the resulting @p attr.
///
/// @warning
///     The lifetime of an @p attr is the same as that of a @p
///     primitive_desc, so it is an error to use the @p attr once the @p
///     primitive_desc has been destroyed.
///
/// @param primitive_desc Primitive descriptor.
/// @param attr Output primitive attributes.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_desc_get_attr(
        const_dnnl_primitive_desc_t primitive_desc,
        const_dnnl_primitive_attr_t *attr);

/// Destroys a primitive descriptor.
///
/// @param primitive_desc Primitive descriptor to destroy.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_desc_destroy(
        dnnl_primitive_desc_t primitive_desc);

/// Queries a primitive descriptor for various pieces of information.
///
/// The most common use case is to query a primitive descriptor, created with
/// source, weights, and destination memory descriptors with format tags set
/// to #dnnl_format_tag_any, for the corresponding memory descriptors (in this
/// case the @p what is set to #dnnl_query_src_md, #dnnl_query_weights_md, and
/// #dnnl_query_dst_md respectively) so that it is possible to create memory
/// objects and reorder primitives if necessary.
///
/// Another typical use case is to query a primitive descriptor for workspace
/// memory descriptor (with @p what set to #dnnl_query_workspace_md). If this
/// query returns #dnnl_not_required status, then workspace memory is not
/// required.
///
/// @note
///     When querying for a memory descriptor for a scratchpad, a workspace,
///     or an optional parameter, the query will return a pointer to a zero
///     memory descriptor if the parameter is not needed.
///
/// A few other use cases:
///  - query a primitive descriptor for the implementation information string
///    (#dnnl_query_impl_info_str)
///  - query a primitive descriptor for the number of inputs and outputs
///    (#dnnl_query_num_of_inputs_s32 and #dnnl_query_num_of_outputs_s32
///    respectively)
///
/// @sa dnnl_query_t for more options
///
/// @param primitive_desc Primitive descriptor.
/// @param what Parameter to query.
/// @param index Index of the parameter to query for.
/// @param result Output result. The type depends on the query. For example,
///     it must be a @c dnnl_memory_desc_t* if querying for a memory
///     descriptor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_desc_query(
        const_dnnl_primitive_desc_t primitive_desc, dnnl_query_t what,
        int index, void *result);

/// Queries primitive descriptor for a memory descriptor.
///
/// @note
///     This function is a convenience version of
///     #dnnl_primitive_desc_query().
///
/// @param primitive_desc Primitive descriptor.
/// @param what Kind of memory descriptor parameter to query for.
/// @param index Index of the parameter to query.
/// @returns A pointer to the requested memory descriptor.
/// @returns A pointer to a zero memory descriptor if the parameter is not
///          needed.
/// @returns NULL in case of any error.
///
const_dnnl_memory_desc_t DNNL_API dnnl_primitive_desc_query_md(
        const_dnnl_primitive_desc_t primitive_desc, dnnl_query_t what,
        int index);

/// Queries primitive descriptor for a signed 32bit int.
///
/// @note
///     This function is a convenience version of
///     #dnnl_primitive_desc_query().
///
/// @param primitive_desc Primitive descriptor.
/// @param what Kind of the value to query for.
/// @param index Index of the parameter to query.
/// @returns The requested value.
/// @returns 0 in case of any error (in particular if the queried entity is
///     not of type int32_t). Note that 0 may also be the actual returned
///     value.
int DNNL_API dnnl_primitive_desc_query_s32(
        const_dnnl_primitive_desc_t primitive_desc, dnnl_query_t what,
        int index);

/// Creates a primitive.
///
/// @param primitive Output primitive.
/// @param primitive_desc Primitive descriptor used to create the primitive.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_create(dnnl_primitive_t *primitive,
        const_dnnl_primitive_desc_t primitive_desc);

/// Creates a primitive from a cache blob.
///
/// @param primitive Output primitive.
/// @param primitive_desc Primitive descriptor used to create the primitive.
/// @param size Size of the cache blob in bytes.
/// @param cache_blob Cache blob of size @p size.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_create_from_cache_blob(
        dnnl_primitive_t *primitive, const_dnnl_primitive_desc_t primitive_desc,
        size_t size, const uint8_t *cache_blob);

/// Executes a primitive.
///
/// @param primitive Primitive to execute.
/// @param stream Stream to use.
/// @param nargs Number of arguments.
/// @param args Array of arguments. Each argument is an
///     <index, #dnnl_memory_t> pair. The index is one of the `DNNL_ARG_*`
///     values such as `DNNL_ARG_SRC`. Unless runtime shapes are used (see
///     #DNNL_RUNTIME_DIM_VAL), the memory object must have the same memory
///     descriptor as that returned by
///     #dnnl_primitive_desc_query_md(#dnnl_query_exec_arg_md, index).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.

/// @note If any argument in @p args is padded (padded_dims >
/// dims), the primitive execution will assume properly zero-padded
/// input arguments, and produce zero-padded output arguments.
dnnl_status_t DNNL_API dnnl_primitive_execute(const_dnnl_primitive_t primitive,
        dnnl_stream_t stream, int nargs, const dnnl_exec_arg_t *args);

/// Retrieves a constant reference to the primitive descriptor of a given
/// primitive.
///
/// @warning
///     It is an error to destroy the returned object. It is owned by the
///     primitive. The @c const qualifier of the returned object prevents
///     such attempts.
///
/// @param primitive Primitive to query for the primitive descriptor.
/// @param primitive_desc Output primitive descriptor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_get_primitive_desc(
        const_dnnl_primitive_t primitive,
        const_dnnl_primitive_desc_t *primitive_desc);

/// Retrieves a cache blob associated with the given primitive.
///
/// @param primitive Primitive to query for the cache blob.
/// @param size Size of the cache blob in bytes.
/// @param cache_blob Cache blob of size @p size. If the @p cache_blob is
///     nullptr then the size of the cache blob is returned in @p size.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
///
/// @note The cache blob can be empty. It's the user's responsibility to check
///     whether it's empty prior to passing it to
///     #dnnl_primitive_create_from_cache_blob().
dnnl_status_t DNNL_API dnnl_primitive_get_cache_blob(
        const_dnnl_primitive_t primitive, size_t *size, uint8_t *cache_blob);

/// Destroys a primitive.
///
/// @param primitive The primitive to destroy.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_destroy(dnnl_primitive_t primitive);

/// @} dnnl_api_primitives_common

/// @addtogroup dnnl_api_attributes
/// @{

/// Creates an empty (default) primitive attributes with all the parameters
/// set to their default values.
///
/// Empty attributes are implied whenever the respective argument is NULL.
///
/// @param attr Output primitive attributes.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_create(dnnl_primitive_attr_t *attr);

/// Clones primitive attributes.
///
/// @param attr Output primitive attributes.
/// @param existing_attr Primitive attributes to clone.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_clone(
        dnnl_primitive_attr_t *attr, const_dnnl_primitive_attr_t existing_attr);

/// Destroys primitive attributes.
///
/// @param attr Primitive attributes to destroy.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_destroy(dnnl_primitive_attr_t attr);

/// Returns probability for output dropout primitive attribute.
///
/// @param attr Primitive attributes.
/// @param dropout_desc Output dropout memory descriptor
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_dropout(
        const_dnnl_primitive_attr_t attr,
        const_dnnl_memory_desc_t *dropout_desc);

/// Sets probability for output dropout primitive attribute.
///
/// @param attr Primitive attributes.
/// @param dropout_desc Output dropout memory descriptor
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_dropout(
        dnnl_primitive_attr_t attr, const_dnnl_memory_desc_t dropout_desc);

/// Returns the floating-point math mode primitive attribute.
///
/// @param attr Primitive attributes.
/// @param mode Output FP math mode.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_fpmath_mode(
        const_dnnl_primitive_attr_t attr, dnnl_fpmath_mode_t *mode);

/// Sets the floating-point math mode primitive attributes.
///
/// @param attr Primitive attributes.
/// @param mode FP math mode. The possible values are:
///     #dnnl_fpmath_mode_strict (default),
///     #dnnl_fpmath_mode_bf16,
///     #dnnl_fpmath_mode_f16,
///     #dnnl_fpmath_mode_tf32,
///     #dnnl_fpmath_mode_any.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_fpmath_mode(
        dnnl_primitive_attr_t attr, dnnl_fpmath_mode_t mode);

/// Returns the floating-point math mode primitive attribute.
///
/// @param attr Primitive attributes.
/// @param mode Output FP math mode.
/// @param apply_to_int Output use floating-point arithmetic for integer primitives.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_fpmath_mode_v2(
        const_dnnl_primitive_attr_t attr, dnnl_fpmath_mode_t *mode,
        int *apply_to_int);

/// Sets the floating-point math mode primitive attributes.
///
/// @param attr Primitive attributes.
/// @param mode FP math mode. The possible values are:
///     #dnnl_fpmath_mode_strict (default),
///     #dnnl_fpmath_mode_bf16,
///     #dnnl_fpmath_mode_f16,
///     #dnnl_fpmath_mode_tf32,
///     #dnnl_fpmath_mode_any.
/// @param apply_to_int Boolean. Use of floating-point arithmetic for integer primitives.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_fpmath_mode_v2(
        dnnl_primitive_attr_t attr, dnnl_fpmath_mode_t mode, int apply_to_int);

/// Returns the deterministic primitive attribute value.
///
/// @param attr Primitive attributes.
/// @param value Output deterministic attribute value
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_deterministic(
        const_dnnl_primitive_attr_t attr, int *value);

/// Sets the deterministic primitive attribute value.
///
/// @param attr Primitive attributes.
/// @param value Boolean value to set deterministic attribute.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_deterministic(
        dnnl_primitive_attr_t attr, int value);

/// Returns the accumulation mode primitive attribute.
///
/// @param attr Primitive attributes.
/// @param mode Output accumulation mode.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_accumulation_mode(
        const_dnnl_primitive_attr_t attr, dnnl_accumulation_mode_t *mode);

/// Sets the accumulation mode primitive attribute.
///
/// @param attr Primitive attributes.
/// @param mode Accumulation mode. The possible values are:
///     #dnnl_accumulation_mode_strict (default), which is s32 for quantized primitives, f32/f64 otherwise
///     #dnnl_accumulation_mode_relaxed, which is same as strict but allows intermediate accumulators to be in src/dst datatype
///     #dnnl_accumulation_mode_any, which allows accumulators to be src/dst datatype or any wider type.
///     #dnnl_accumulation_mode_f32,
///     #dnnl_accumulation_mode_s32,
///     #dnnl_accumulation_mode_f16.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_accumulation_mode(
        dnnl_primitive_attr_t attr, dnnl_accumulation_mode_t mode);

/// Returns the primitive attributes scratchpad mode.
///
/// @param attr Primitive attributes.
/// @param mode Output scratchpad mode.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_scratchpad_mode(
        const_dnnl_primitive_attr_t attr, dnnl_scratchpad_mode_t *mode);

/// Sets primitive attributes scratchpad mode.
///
/// @param attr Primitive attributes.
/// @param mode Scratchpad mode. The possible values are:
///     #dnnl_scratchpad_mode_library (default) and
///     #dnnl_scratchpad_mode_user.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_scratchpad_mode(
        dnnl_primitive_attr_t attr, dnnl_scratchpad_mode_t mode);

/// Sets primitive attributes scaling factors for primitive operations for a
/// given memory argument. The scaling factors must be passed at execution time
/// as an argument with index #DNNL_ARG_ATTR_SCALES | arg.
///
/// @sa dnnl_primitive_attr_set_scales_mask
///
///
/// @param attr Primitive attributes.
/// @param arg Parameter argument index as passed to the
///     dnnl_primitive_execute() call.
/// @param mask Scaling factors correspondence mask that defines the
///     correspondence between the tensor dimensions and the @p scales array.
///     The set i-th bit indicates that a dedicated scaling factor is used for
///     each index along that dimension. Set the mask to 0 to use a common
///     scaling factor for the whole output tensor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_scales_mask(
        dnnl_primitive_attr_t attr, int arg, int mask);

/// Sets primitive attributes scaling factors for primitive operations for a
/// given memory argument. The scaling factors must be passed at execution time
/// as an argument with index #DNNL_ARG_ATTR_SCALES | arg.
///
/// @sa dnnl_primitive_attr_set_scales
///
///
/// @param attr Primitive attributes.
/// @param arg Parameter argument index as passed to the
///     dnnl_primitive_execute() call.
/// @param mask Scaling factors correspondence mask that defines the
///     correspondence between the tensor dimensions and the @p scales array.
///     The set i-th bit indicates that a dedicated scaling factor is used for
///     each index along that dimension. Set the mask to 0 to use a common
///     scaling factor for the whole output tensor.
/// @param ndims Number of group dimensions.
/// @param group_dims Scaling factors correspondence groups that define the
///     correspondence between the tensor dimensions and the scales array.
///     The group dimensions should only be provided for each logical dimension
///     that has correspondence mask @p mask set.
/// @param data_type Scaling factors data_type.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_scales(
        dnnl_primitive_attr_t attr, int arg, int mask, int ndims,
        const dnnl_dims_t group_dims, dnnl_data_type_t data_type);

/// Sets primitive attributes zero points for primitive operations for a given
/// memory argument. The zero points must be passed at execution time
/// as an argument with index #DNNL_ARG_ATTR_ZERO_POINTS | arg.
///
/// @sa dnnl_primitive_attr_set_zero_points_mask
///
///
/// @param attr Primitive attributes.
/// @param arg Parameter argument index as passed to the
///     dnnl_primitive_execute() call.
/// @param mask Zero point correspondence mask that defines the
///     correspondence between the tensor dimensions and the @p
///     zero_points array. The set i-th bit indicates that a dedicated
///     zero point is used for each index along that dimension. Set the
///     mask to 0 to use a common zero point for the whole output tensor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_zero_points_mask(
        dnnl_primitive_attr_t attr, int arg, int mask);

/// Sets primitive attributes zero points for primitive operations for a given
/// memory argument. The zero points must be passed at execution time
/// as an argument with index #DNNL_ARG_ATTR_ZERO_POINTS | arg.
///
/// @sa dnnl_primitive_attr_set_zero_points
///
///
/// @param attr Primitive attributes.
/// @param arg Parameter argument index as passed to the
///     dnnl_primitive_execute() call.
/// @param mask Zero point correspondence mask that defines the
///     correspondence between the tensor dimensions and the @p
///     zero_points array. The set i-th bit indicates that a dedicated
///     zero point is used for each index along that dimension. Set the
///     mask to 0 to use a common zero point for the whole output tensor.
/// @param ndims Number of group dimensions.
/// @param group_dims Zero point factors correspondence groups that define the
///     correspondence between the tensor dimensions and the zero_points array.
///     The group dimensions should be only provided for each logical dimension
///     that has the bit set correspondence mask @p mask set.
/// @param data_type Zero points factors data_type.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_zero_points(
        dnnl_primitive_attr_t attr, int arg, int mask, int ndims,
        const dnnl_dims_t group_dims, dnnl_data_type_t data_type);

/// Sets the rounding mode attribute value for a given argument
///
/// @param attr Primitive attributes.
/// @param arg Argument for which rounding mode should be set.
/// @param mode Rounding mode to apply to the argument.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_rounding(
        dnnl_primitive_attr_t attr, int arg, dnnl_rounding_mode_t mode);

/// Returns the rounding mode attribute value for a given argument
///
/// @param attr Primitive attributes.
/// @param arg Argument for which rounding mode query applies.
/// @param mode Output rounding mode.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_rounding(
        dnnl_primitive_attr_t attr, int arg, dnnl_rounding_mode_t *mode);

/// Returns primitive attributes post-ops.
///
/// @warning
///     The output @p post_ops points to the internal @p attr field, so it is
///     an error to modify or destroy them. The lifetime of @p post_ops is
///     the same as that of the @p attr it belongs to, so it is an error to
///     use @p post_ops after @p attr has been destroyed.
///
/// @param attr Primitive attributes.
/// @param post_ops Output post-ops.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_post_ops(
        const_dnnl_primitive_attr_t attr, const_dnnl_post_ops_t *post_ops);

/// Sets primitive attributes post-ops.
///
/// @note
///     There is no way to check whether the post-ops would be supported by
///     the target primitive. Any error will be reported by the
///     dnnl_<primitive name>_[propagation kind]_primitive_desc_create() function call.
///
/// @param attr Primitive attributes.
/// @param post_ops Post-ops to set.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_post_ops(
        dnnl_primitive_attr_t attr, const_dnnl_post_ops_t post_ops);

/// Creates empty post-ops sequence.
///
/// @param post_ops Output post-ops.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_create(dnnl_post_ops_t *post_ops);

/// Clones post-ops primitive attribute.
///
/// @param post_ops Output post-ops primitive attribute.
/// @param existing_post_ops Post-ops primitive attribute to clone.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_clone(
        dnnl_post_ops_t *post_ops, const_dnnl_post_ops_t existing_post_ops);

/// Destroys post-ops.
///
/// @param post_ops Post-ops to destroy.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_destroy(dnnl_post_ops_t post_ops);

/// Returns the length of post-ops.
///
/// @param post_ops Post-ops.
/// @returns The number of post-ops entries.
int DNNL_API dnnl_post_ops_len(const_dnnl_post_ops_t post_ops);

/// Returns the kind of a post-op entry.
///
/// @param post_ops Post-ops.
/// @param index Post-op entry index.
/// @returns The kind of the post-op with the specified index.
/// @returns #dnnl_undefined_primitive if there is no post-op at the specified
///     index.
dnnl_primitive_kind_t DNNL_API dnnl_post_ops_get_kind(
        const_dnnl_post_ops_t post_ops, int index);

/// Appends an accumulation v3 (sum) to post-ops. Prior to accumulating the
/// result, a zero point is subtracted from the previous value and is
/// multiplied by the scale.
///
/// The kind of this post-op is #dnnl_sum.
///
/// This feature may improve performance for cases like dequantize the
/// asymmetrically quantized sum's src1 tensor to f32 domain before performing
/// the sum operation by subtracting the @p zero_point before the scaling.
///
/// In the simplest case where accumulation is the only post-op, the
/// computations will be:
///
///     dst[:] <- scale * (dst[:] - zero_point) + op(...)
///                                             // instead of dst[:] <- op(...)
///
/// If @p data_type is specified, original dst tensor will be reinterpreted
/// as a tensor with provided data type. Since it is reinterpretation,
/// data_type and dst data type should have the same size.
/// As a result, computations will be:
///
///     dst[:] <- scale * (as_data_type(dst[:]) - zero_point) + op(...)
///                                        // instead of dst[:] <- op(...)
/// @note
///     This post-op executes in-place and does not change the
///     destination layout.
///
/// @param post_ops Post-ops.
/// @param scale Accumulation scaling factor.
/// @param zero_point Single scalar int32_t value of zero point.
/// @param data_type Accumulation data_type.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_append_sum(dnnl_post_ops_t post_ops,
        float scale, int32_t zero_point, dnnl_data_type_t data_type);

/// Returns the parameters of an accumulation (sum) post-op with
/// zero point and data type parameter.
///
/// @param post_ops Post-ops.
/// @param index Index of the sum post-op.
/// @param scale Output accumulation scaling factor.
/// @param zero_point Zero point.
/// @param data_type Data type for accumulation.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_get_params_sum(
        const_dnnl_post_ops_t post_ops, int index, float *scale,
        int32_t *zero_point, dnnl_data_type_t *data_type);

/// Appends an elementwise post-op.
///
/// The kind of this post operation is #dnnl_eltwise.
///
/// In the simplest case when the elementwise is the only post operation, the
/// computations would be:
///
///     dst[:] <- eltwise_op (op(...)) // instead of dst[:] <- op(...)
///
/// where eltwise_op is configured with the given parameters.
///
/// @param post_ops Post-ops.
/// @param alg_kind Elementwise algorithm for the post-op.
/// @param alpha Alpha parameter for the elementwise algorithm.
/// @param beta Beta parameter for the elementwise algorithm.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_append_eltwise(dnnl_post_ops_t post_ops,
        dnnl_alg_kind_t alg_kind, float alpha, float beta);

/// Returns the parameters of an elementwise post-op.
///
/// @param post_ops Post-ops.
/// @param index Index of the elementwise post-op.
/// @param alg_kind Output elementwise algorithm kind.
/// @param alpha Output alpha parameter for the elementwise algorithm.
/// @param beta Output beta parameter for the elementwise algorithm.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
/// @returns #dnnl_invalid_arguments if @p index does not refer to an
///     elementwise post-op.
dnnl_status_t DNNL_API dnnl_post_ops_get_params_eltwise(
        const_dnnl_post_ops_t post_ops, int index, dnnl_alg_kind_t *alg_kind,
        float *alpha, float *beta);

/// Appends a depthwise post-op convolution.
///
/// This post-op can only be fused with a 2D 1x1 convolution (convolution with
/// weights spatial dimensions equal to 1 i.e., kh=kw=1).
///
/// The kind of this post-op is #dnnl_convolution.
///
/// The number of outputs for primitive with fusion is one. The output spatial
/// size can be derived as below:
///
/// output_height = ceil(output_height_1x1_convolution, stride)
/// output_width = ceil(output_width_1x1_convolution, stride)
///
/// See @ref dev_guide_attributes_post_ops_depthwise and
/// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info.
///
/// @param post_ops Post-ops.
/// @param weights_data_type Weights data type of depthwise post-op
/// @param bias_data_type Bias data type of depthwise post-op
/// @param dst_data_type Output data type of depthwise post-op
/// @param kernel_size Size of kernel of depthwise post-op
/// @param stride_size Size of stride of depthwise post-op
/// @param padding_l_size Size of left and top paddings of depthwise post-op
/// @returns #dnnl_success on success and a status describing the error
///     otherwise
dnnl_status_t DNNL_API dnnl_post_ops_append_dw(dnnl_post_ops_t post_ops,
        dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type,
        dnnl_data_type_t dst_data_type, dnnl_dim_t kernel_size,
        dnnl_dim_t stride_size, dnnl_dim_t padding_l_size);

/// Returns the parameters of an depthwise post-op.
///
/// @param post_ops Post-ops.
/// @param index Index of the elementwise post-op.
/// @param weights_data_type Weights data type of depthwise post-op
/// @param bias_data_type Bias data type of depthwise post-op
/// @param dst_data_type Output data type of depthwise post-op
/// @param kernel_size Size of kernel of depthwise post-op
/// @param stride_size Size of stride of depthwise post-op
/// @param padding_l_size Size of left and top paddings of depthwise post-op
/// @returns #dnnl_success on success and a status describing the error
///     otherwise
dnnl_status_t DNNL_API dnnl_post_ops_get_params_dw(
        const_dnnl_post_ops_t post_ops, int index,
        dnnl_data_type_t *weights_data_type, dnnl_data_type_t *bias_data_type,
        dnnl_data_type_t *dst_data_type, dnnl_dim_t *kernel_size,
        dnnl_dim_t *stride_size, dnnl_dim_t *padding_l_size);

/// Appends a binary post-op.
///
/// The kind of this post operation is #dnnl_binary.
///
/// In the simplest case when the binary is the only post operation, the
/// computations would be:
///
///     dst[:] <- binary_op (dst[:], another_input[:])
///
/// where binary_op is configured with the given parameters. binary_op supports
/// broadcast semantics for a second operand.
///
/// @param post_ops Post-ops.
/// @param alg_kind Binary algorithm for the post-op.
/// @param src1_desc Memory descriptor of a second operand.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_append_binary(dnnl_post_ops_t post_ops,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t src1_desc);

/// Returns the parameters of a binary post-op.
///
/// @param post_ops Post-ops.
/// @param index Index of the binary post-op.
/// @param alg_kind Output binary algorithm kind.
/// @param src1_desc Output memory descriptor of a second operand.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
/// @returns #dnnl_invalid_arguments if @p index does not refer to a binary
///     post-op.
dnnl_status_t DNNL_API dnnl_post_ops_get_params_binary(
        const_dnnl_post_ops_t post_ops, int index, dnnl_alg_kind_t *alg_kind,
        const_dnnl_memory_desc_t *src1_desc);

/// Appends a prelu forward post-op.
///
/// The kind of this post-op is #dnnl::primitive::kind::prelu.
///
/// The post-op can be defined as:
///
///      dst[:] <- prelu(dst[:], weights[:])
///      prelu:
///      dst[:] <- dst[:] if dst[:] > 0
///      dst[:] <- dst[:] * weights[:] if dst[:] <= 0
///
///
/// @note
///     The order of dimensions does not depend on how elements are laid
///     out in memory. For example:
///     - for a 2D CNN activations tensor the order is always (n, c)
///     - for a 4D CNN activations tensor the order is always (n, c, h, w)
///     - for a 5D CNN weights tensor the order is always
///        (g, oc, ic, kh, kw)
///
///    Prelu weights tensor is passed in runtime execution phase. Prelu
///    weights tensor data type is implicitly assumed as f32 using plain
///    layout (a, ab, acb, acdb, acdeb)
///
/// @param post_ops Post-ops.
/// @param mask Defines the correspondence between the output tensor
///     dimensions and the prelu weights tensor. The set i-th bit indicates
///     that a dedicated weights value is used for each index along that
///     dimension. Set the mask to 0 to use a common weights value
///     for the whole output tensor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_append_prelu(
        dnnl_post_ops_t post_ops, int mask);

/// Returns the parameters of a prelu post-op.
///
/// @param post_ops Post-ops.
/// @param index Index of the prelu post-op.
/// @param mask Mask of the prelu post-op.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_post_ops_get_params_prelu(
        const_dnnl_post_ops_t post_ops, int index, int *mask);

/// @} dnnl_api_attributes

/// @} dnnl_api_primitives

/// @addtogroup dnnl_api_memory
/// @{

/// Destroys a memory descriptor.
///
/// @param memory_desc Memory descriptor to destroy.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_destroy(dnnl_memory_desc_t memory_desc);

/// Clones a memory descriptor. The resulting memory descriptor must be
/// destroyed separately.
///
/// @param memory_desc Output memory descriptor.
/// @param existing_memory_desc Memory descriptor to clone.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_clone(dnnl_memory_desc_t *memory_desc,
        const_dnnl_memory_desc_t existing_memory_desc);

/// Retrieves a binary blob associated with the given memory descriptor
///
/// @param blob Output pointer to binary blob.
///     If not nullptr, size bytes of the memory descriptor blob are written.
/// @param size Output pointer to the size of the binary blob in bytes.
///     Size is written if blob is nullptr.
/// @param memory_desc input memory descriptor to serialize
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_get_blob(
        uint8_t *blob, size_t *size, const_dnnl_memory_desc_t memory_desc);

/// Creates a memory descriptor from a memory descriptor binary blob.
///
/// @param memory_desc Output pointer to a newly allocated memory descriptor.
/// @param blob Pointer to a memory descriptor binary blob.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_blob(
        dnnl_memory_desc_t *memory_desc, const uint8_t *blob);

/// Creates a memory descriptor using dimensions and strides.
///
/// @note
///     As always, the logical order of dimensions corresponds to the `abc...`
///     format tag, and the physical meaning of the dimensions depends on both
///     the primitive that consumes the memory and the context of that
///     consumption.
///
/// @param memory_desc Output memory descriptor.
/// @param ndims Number of dimensions
/// @param dims Array of dimensions.
/// @param data_type Elements data type.
/// @param strides Strides in each dimension.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_strides(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, const dnnl_dims_t strides);

/// Creates a memory descriptor using dimensions and memory format tag.
///
/// @note
///     As always, the logical order of dimensions corresponds to the `abc...`
///     format tag, and the physical meaning of the dimensions depends on both
///     the primitive that consumes the memory and the context of that
///     consumption.
///
/// @param memory_desc Output memory descriptor.
/// @param ndims Number of dimensions
/// @param dims Array of dimensions.
/// @param data_type Elements data type.
/// @param tag Memory format tag. Can be #dnnl_format_tag_any which would
///     allow a primitive to chose the final memory format. In this case the
///     format_kind field of the memory descriptor would be set to
///     #dnnl_format_kind_any.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_tag(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, dnnl_format_tag_t tag);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Creates a memory descriptor for CSR encoding.
///
/// @param memory_desc Output memory descriptor.
/// @param ndims Number of dimensions
/// @param dims Array of dimensions.
/// @param data_type Elements data type.
/// @param nnz Number of non-zero entries.
/// @param indices_dt Data type of indices.
/// @param pointers_dt Data type of pointers.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_csr_encoding(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, dnnl_dim_t nnz, dnnl_data_type_t indices_dt,
        dnnl_data_type_t pointers_dt);

/// Creates a memory descriptor for COO encoding.
///
/// The created memory descriptor will describe a memory object that
/// contains n+1 buffers for an n-dimensional tensor.
/// The buffers have the following meaning and assigned numbers (index):
///  - 0: values
///  - 1: indices for dimension 0
///  - 2: indices for dimension 1 ...
///  - n: indices for dimension n-1
///
/// @param memory_desc Output memory descriptor.
/// @param ndims Number of dimensions.
/// @param dims Array of dimensions.
/// @param data_type Elements data type.
/// @param nnz Number of non-zero entries.
/// @param indices_dt Data type of indices.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_coo_encoding(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, dnnl_dim_t nnz,
        dnnl_data_type_t indices_dt);

/// Creates a memory descriptor for packed sparse encoding.
///
/// The created memory descriptor cannot be used to create a memory
/// object. It can only be used to create a primitive descriptor to
/// query the actual memory descriptor (similar to the format tag
/// `any`).
///
/// @warning
///     The meaning and content of the handles of the memory object that
///     is created using the queried memory descriptor are unspecified
///     therefore using the content is an undefined behavior.
///
/// @param memory_desc Output memory descriptor.
/// @param ndims Number of dimensions
/// @param dims Array of dimensions.
/// @param data_type Elements data type.
/// @param nnz Number of non-zero entries.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_packed_encoding(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, dnnl_dim_t nnz);
#endif

/// Creates a memory descriptor for a region inside an area
/// described by an existing memory descriptor.
///
/// @warning
///     Some combinations of physical memory layout and/or offsets or dims may
///     result in a failure to create a submemory.
//
/// @param memory_desc Output memory descriptor.
/// @param parent_memory_desc An existing memory descriptor.
/// @param dims Sizes of the region.
/// @param offsets Offsets to the region from the encompassing
///     memory object in each dimension
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_create_submemory(
        dnnl_memory_desc_t *memory_desc,
        const_dnnl_memory_desc_t parent_memory_desc, const dnnl_dims_t dims,
        const dnnl_dims_t offsets);

/// Creates a memory descriptor by reshaping an existing one. The new
/// memory descriptor inherits the data type. This operation is valid only for
/// memory descriptors that have format_kind #dnnl_blocked or
/// #dnnl_format_kind_any.
///
/// The resulting memory descriptor must be destroyed separately.
///
/// The operation ensures the transformation of the physical memory format
/// corresponds to the transformation of the logical dimensions. If such
/// transformation is impossible, the function returns #dnnl_invalid_arguments.
///
/// The reshape operation can be described as a combination of the following
/// basic operations:
/// 1. Add a dimension of size `1`. This is always possible.
/// 2. Remove a dimension of size `1`. This is possible only if the dimension
///    has no padding (i.e. `padded_dims[dim] == dims[dim] && dims[dim] == 1`).
/// 3. Split a dimension into multiple ones. This is possible only if the size
///    of the dimension is exactly equal to the product of the split ones and
///    the dimension does not have padding (i.e.
///    `padded_dims[dim] = dims[dim]`).
/// 4. Joining multiple consecutive dimensions into a single one. As in the
///    cases above, this requires that the dimensions do not have padding and
///    that the memory format is such that in physical memory these dimensions
///    are dense and have the same order as their logical counterparts. This
///    also assumes that these dimensions are not blocked.
///    - Here, dense means:
///      `stride for dim[i] == (stride for dim[i + 1]) * dim[i + 1]`;
///    - And same order means:
///      `i < j` if and only if `stride for dim[j] <= stride for dim[i]`.
///
/// @warning
///     Some combinations of physical memory layout and/or offsets or
///     dimensions may result in a failure to make a reshape.
///
/// @param out_memory_desc Output memory descriptor.
/// @param in_memory_desc An existing memory descriptor. Must have format_kind
///     set to #dnnl_blocked or #dnnl_format_kind_any.
/// @param ndims Number of dimensions for the output memory descriptor.
/// @param dims Dimensions for the output memory descriptor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_reshape(
        dnnl_memory_desc_t *out_memory_desc,
        const_dnnl_memory_desc_t in_memory_desc, int ndims,
        const dnnl_dims_t dims);

/// Creates a memory descriptor by permuting axes in an existing one.
///
/// The physical memory layout representation is adjusted accordingly to
/// maintain the consistency between the logical and physical parts of the
/// memory descriptor.
///
/// The resulting memory descriptor must be destroyed separately.
///
/// The new memory descriptor inherits the data type. This operation is valid
/// only for memory descriptors that have format_kind set to #dnnl_blocked or
/// #dnnl_format_kind_any.
///
/// The logical axes will be permuted in the following manner:
/// ```
/// for (i: 0 .. in_memory_desc->ndims)
///     out_memory_desc->dims[permutation[i]] = in_memory_desc->dims[i];
/// ```
///
/// Example:
/// @code
///     dnnl_memory_desc_t in_md, out_md, expect_out_md;
///
///     const int permutation[] = {1, 0}; // swap the first and the second axes
///
///     dnnl_dims_t in_dims = {2, 3}, out_dims = {3, 2};
///     dnnl_format_tag_t in_tag = dnnl_ab, out_tag = dnnl_ba;
///
///     dnnl_memory_desc_create_with_tag(
///             &in_md, 2, in_dims, data_type, in_tag);
///     dnnl_memory_desc_create_with_tag(
///             &expect_out_md, 2, out_dims, data_type, out_tag);
///
///     dnnl_memory_desc_permute_axes(&out_md, in_md, permutation);
///     assert(dnnl_memory_desc_equal(out_md, expect_out_md));
///
///     dnnl_memory_desc_destroy(in_md);
///     dnnl_memory_desc_destroy(out_md);
///     dnnl_memory_desc_destroy(expect_out_md);
/// @endcode
///
/// @param out_memory_desc Output memory descriptor.
/// @param in_memory_desc An existing memory descriptor. Must have format_kind
///     set to #dnnl_blocked or #dnnl_format_kind_any.
/// @param permutation Axes permutation (of size `in_memory_desc->ndims`).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_permute_axes(
        dnnl_memory_desc_t *out_memory_desc,
        const_dnnl_memory_desc_t in_memory_desc, const int *permutation);

/// Queries a memory descriptor for various pieces of information.
///
/// The following information can be queried:
///  - Number of dimensions (#dnnl_query_ndims_s32)
///  - Dimensions (#dnnl_query_dims) in the following order:
///    - CNN data tensors: mini-batch, channel, spatial
///      (<code>{N, C, [[D,] H,] W}</code>)
///    - CNN weight tensors: group (optional), output channel, input channel,
///      spatial (<code>{[G,] O, I, [[D,] H,] W}</code>)
///    - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>)
///      or layers, directions, states, mini-batch, channels
///      (<code>{L, D, S, N, C}</code>)
///    - RNN weight tensor: layers, directions, input channel, gates, output
///      channels (<code>{L, D, I, G, O}</code>)
///  - Data type of the tensor elements (#dnnl_query_data_type)
///  - Padded dimensions (#dnnl_query_padded_dims) - size of the data including
///    padding in each dimension
///  - Padded offsets (#dnnl_query_padded_offsets) - per-dimension offset from
///    the padding to actual data, the top-level tensor with offsets applied
///    must lie within the padding area.
///  - Submemory offset (#dnnl_query_submemory_offset_s64) - offset from memory
///    origin to the current block, non-zero only in a description of a memory
///    sub-block.
///  - Format kind (#dnnl_query_format_kind) - memory format kind
///
/// @note
///    The order of dimensions does not depend on the memory format, so
///    whether the data is laid out in #dnnl_nchw or #dnnl_nhwc
///    the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
///
/// The following queries are applicable only to format kind #dnnl_blocked.
///  - Strides (#dnnl_query_strides) between the outermost blocks or in case
///    of plain (non-blocked) formats the strides between dimensions
///  - Number of innermost blocks (#dnnl_query_inner_nblks_s32), e.g.
///    `{4, 16, 4}` in case of `OIhw_4i16o4i`
///  - Size of the innermost blocks (#dnnl_query_inner_blks), e.g. 3 in case
///    of `OIhw_4i16o4i_`
///  - Logical indices of the blocks (#dnnl_query_inner_idxs), e.g. `{1, 0, 1}`
///    in case of `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim
///
/// @param memory_desc Memory descriptor.
/// @param what Parameter to query.
/// @param result Output result. The type depends on the query. For example,
///     it must be a @c dnnl_dims_t** if querying for a strides.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_query(
        const_dnnl_memory_desc_t memory_desc, dnnl_query_t what, void *result);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Queries a memory descriptor for various pieces of information. This version
/// support additional queries #dnnl_query_sparse_encoding, #dnnl_query_nnz_s64
/// #dnnl_query_num_handles_s32 and #dnnl_query_data_type for a particular
/// buffer.
///
/// The following information can be queried:
///  - Number of dimensions (#dnnl_query_ndims_s32)
///  - Dimensions (#dnnl_query_dims) in the following order:
///    - CNN data tensors: mini-batch, channel, spatial
///      (<code>{N, C, [[D,] H,] W}</code>)
///    - CNN weight tensors: group (optional), output channel, input channel,
///      spatial (<code>{[G,] O, I, [[D,] H,] W}</code>)
///    - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>)
///      or layers, directions, states, mini-batch, channels
///      (<code>{L, D, S, N, C}</code>)
///    - RNN weight tensor: layers, directions, input channel, gates, output
///      channels (<code>{L, D, I, G, O}</code>)
///  - Data type of the tensor elements (#dnnl_query_data_type)
///  - Padded dimensions (#dnnl_query_padded_dims) - size of the data including
///    padding in each dimension
///  - Padded offsets (#dnnl_query_padded_offsets) - per-dimension offset from
///    the padding to actual data, the top-level tensor with offsets applied
///    must lie within the padding area.
///  - Submemory offset (#dnnl_query_submemory_offset_s64) - offset from memory
///    origin to the current block, non-zero only in a description of a memory
///    sub-block.
///  - Format kind (#dnnl_query_format_kind) - memory format kind
///
/// @note
///    The order of dimensions does not depend on the memory format, so
///    whether the data is laid out in #dnnl_nchw or #dnnl_nhwc
///    the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
///
/// The following queries are applicable only to format kind #dnnl_blocked.
///  - Strides (#dnnl_query_strides) between the outermost blocks or in case
///    of plain (non-blocked) formats the strides between dimensions
///  - Number of innermost blocks (#dnnl_query_inner_nblks_s32), e.g.
///    `{4, 16, 4}` in case of `OIhw_4i16o4i`
///  - Size of the innermost blocks (#dnnl_query_inner_blks), e.g. 3 in case
///    of `OIhw_4i16o4i_`
///  - Logical indices of the blocks (#dnnl_query_inner_idxs), e.g. `{1, 0, 1}`
///    in case of `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim
///
/// @param memory_desc Memory descriptor.
/// @param what Parameter to query.
/// @param index Index of the parameter to query for. It is mostly used with
///     #dnnl_query_data_type to specify which data type is being queried.
///     The main data type (data type of values) has always index 0. For other
///     indices please refer to the API for creating a memory descriptor for
///     sparse encoding.
/// @param result Output result. The type depends on the query. For example,
///     it must be a @c dnnl_dims_t** if querying for a strides.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_desc_query_v2(
        const_dnnl_memory_desc_t memory_desc, dnnl_query_t what, int index,
        void *result);
#endif

/// Compares two memory descriptors.
///
/// Use this function to identify whether a reorder is required between the
/// two memories
///
/// @param lhs Left-hand side of the comparison.
/// @param rhs Right-hand side of the comparison.
/// @returns 1 if the descriptors are the same.
/// @returns 0 if the descriptors are different.
int DNNL_API dnnl_memory_desc_equal(
        const_dnnl_memory_desc_t lhs, const_dnnl_memory_desc_t rhs);

/// Returns the size of a memory descriptor.
///
/// @param memory_desc Memory descriptor.
/// @returns The number of bytes required for memory described by a memory
///     descriptor.
size_t DNNL_API dnnl_memory_desc_get_size(const_dnnl_memory_desc_t memory_desc);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Returns the size of the data that corresponds to the given index.
///
/// @param memory_desc Memory descriptor.
/// @param index Index of the buffer.
///
/// @returns The number of bytes required for the requested data.
size_t DNNL_API dnnl_memory_desc_get_size_v2(
        const_dnnl_memory_desc_t memory_desc, int index);
#endif

/// Returns the size of data type.
///
/// @param data_type Data type.
/// @returns The number of bytes occupied by data type.
size_t DNNL_API dnnl_data_type_size(dnnl_data_type_t data_type);

/// Creates a memory object.
///
/// Unless @p handle is equal to DNNL_MEMORY_NONE, the constructed memory
/// object will have the underlying buffer set. In this case, the buffer will
/// be initialized as if dnnl_memory_set_data_handle() had been called.
///
/// @sa dnnl_memory_set_data_handle()
///
/// @param memory Output memory object.
/// @param memory_desc Memory descriptor.
/// @param engine Engine to use.
/// @param handle Handle of the memory buffer to use as an underlying storage.
///     - A pointer to the user-allocated buffer. In this case the library
///       doesn't own the buffer.
///     - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
///       allocate the buffer for the memory object. In this case the library
///       owns the buffer.
///     - DNNL_MEMORY_NONE to create dnnl_memory without an underlying buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        void *handle);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Creates a memory object with multiple handles.
///
/// @param memory Output memory object.
/// @param memory_desc Memory descriptor.
/// @param engine Engine to use.
/// @param nhandles Number of handles.
/// @param handles Handles of the memory buffers to use as underlying storages.
///     For each element of the @p handles array the following applies:
///     - A pointer to the user-allocated buffer. In this case the library
///       doesn't own the buffer.
///     - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
///       allocate the buffer for the memory object. In this case the library
///       owns the buffer.
///     - DNNL_MEMORY_NONE Instructs the library to skip allocation of the
///       memory buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_create_v2(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        int nhandles, void **handles);
#endif

/// Returns the memory descriptor for a memory object.
///
/// @param memory Memory object.
/// @param memory_desc Output memory descriptor (a copy).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_get_memory_desc(
        const_dnnl_memory_t memory, const_dnnl_memory_desc_t *memory_desc);

/// Returns the engine of a memory object.
///
/// @param memory Memory object.
/// @param engine Output engine on which the memory is located.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_get_engine(
        const_dnnl_memory_t memory, dnnl_engine_t *engine);

/// Maps a memory object and returns a host-side pointer to a memory buffer
/// with a copy of its contents.
///
/// Mapping enables explicit direct access to memory contents for the engines
/// that do not support it implicitly.
///
/// Mapping is an exclusive operation - a memory object cannot be used in
/// other operations until this memory object is unmapped.
///
/// @note
///     Any primitives working with @p memory should be completed before
///     the memory is mapped. Use dnnl_stream_wait to synchronize the
///     corresponding execution stream.
///
/// @note
///     The dnnl_memory_map_data() and dnnl_memory_unmap_data() functions are
///     mainly provided for debug and testing purposes, and their performance
///     may be suboptimal.
///
/// @param memory Memory object.
/// @param mapped_ptr Output pointer to the mapped buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_map_data(
        const_dnnl_memory_t memory, void **mapped_ptr);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Maps a memory object and returns a host-side pointer to a memory buffer
/// with a copy of its contents. The memory buffer corresponds to the given
/// index.
///
/// Mapping enables explicit direct access to memory contents for the engines
/// that do not support it implicitly.
///
/// Mapping is an exclusive operation - a memory object cannot be used in
/// other operations until this memory object is unmapped.
///
/// @note
///     Any primitives working with @p memory should be completed before
///     the memory is mapped. Use dnnl_stream_wait to synchronize the
///     corresponding execution stream.
///
/// @note
///     The dnnl_memory_map_data() and dnnl_memory_unmap_data() functions are
///     mainly provided for debug and testing purposes, and their performance
///     may be suboptimal.
///
/// @param memory Memory object.
/// @param mapped_ptr Output pointer to the mapped buffer.
/// @param index Index of the buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_map_data_v2(
        const_dnnl_memory_t memory, void **mapped_ptr, int index);
#endif

/// Unmaps a memory object and writes back any changes made to the previously
/// mapped memory buffer. The pointer to the mapped buffer must be obtained
/// via the dnnl_memory_map_data() call.
///
/// @note
///     The dnnl_memory_map_data() and dnnl_memory_unmap_data() functions are
///     mainly provided for debug and testing purposes, and their performance
///     may be suboptimal.
///
/// @param memory Memory object.
/// @param mapped_ptr Pointer to the mapped buffer that must have been
///     obtained using the dnnl_memory_map_data() function.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_unmap_data(
        const_dnnl_memory_t memory, void *mapped_ptr);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Unmaps a memory object and writes back any changes made to the previously
/// mapped memory buffer. The pointer to the mapped buffer must be obtained
/// via the dnnl_memory_map_data() call. The buffer corresponds to the given
/// index.
///
/// @note
///     The dnnl_memory_map_data() and dnnl_memory_unmap_data() functions are
///     mainly provided for debug and testing purposes, and their performance
///     may be suboptimal.
///
/// @param memory Memory object.
/// @param mapped_ptr Pointer to the mapped buffer that must have been
///     obtained using the dnnl_memory_map_data() function.
/// @param index Index of the buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_unmap_data_v2(
        const_dnnl_memory_t memory, void *mapped_ptr, int index);
#endif

/// Returns memory object's data handle.
///
/// @param memory Memory object.
/// @param handle Output data handle. For the CPU engine, the data handle is a
///     pointer to the actual data. For OpenCL it is a cl_mem.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_get_data_handle(
        const_dnnl_memory_t memory, void **handle);

/// Sets the underlying memory buffer.
///
/// @param memory Memory object.
/// @param handle Data handle. For the CPU engine or when USM is used, the
///     memory buffer is a pointer to the actual data. For OpenCL it is a
///     `cl_mem`.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_set_data_handle(
        dnnl_memory_t memory, void *handle);

#ifdef DNNL_EXPERIMENTAL_SPARSE
/// Returns an underlying memory buffer that corresponds to the given index.
///
/// @param memory Memory object.
/// @param handle Data handle. For the CPU engine or when USM is used, the
///     memory buffer is a pointer to the actual data. For OpenCL it is a
///     `cl_mem`.
/// @param index Index of the buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_get_data_handle_v2(
        const_dnnl_memory_t memory, void **handle, int index);

/// Sets an underlying memory buffer that corresponds to the given index.
///
/// @param memory Memory object.
/// @param handle Data handle. For the CPU engine or when USM is used, the
///     memory buffer is a pointer to the actual data. For OpenCL it is a
///     `cl_mem`.
/// @param index Index of the buffer.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_set_data_handle_v2(
        dnnl_memory_t memory, void *handle, int index);
#endif

/// Destroys a memory object.
///
/// @param memory Memory object to destroy.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_memory_destroy(dnnl_memory_t memory);

/// @} dnnl_api_memory

/// @addtogroup dnnl_api_primitives
/// @{

/// @addtogroup dnnl_api_reorder
/// @{

/// Creates a primitive descriptor for a reorder primitive.
///
/// @param reorder_primitive_desc Output primitive descriptor.
/// @param src_desc Source memory descriptor.
/// @param src_engine Engine on which the source memory object will be
///     located.
/// @param dst_desc Destination memory descriptor.
/// @param dst_engine Engine on which the destination memory object
///     will be located.
/// @param attr Primitive attributes to use (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_reorder_primitive_desc_create(
        dnnl_primitive_desc_t *reorder_primitive_desc,
        const_dnnl_memory_desc_t src_desc, dnnl_engine_t src_engine,
        const_dnnl_memory_desc_t dst_desc, dnnl_engine_t dst_engine,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_reorder

/// @addtogroup dnnl_api_concat
/// @{

/// Creates a primitive descriptor for an out-of-place concatenation
/// primitive.
///
/// @param concat_primitive_desc Output primitive descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param n Number of source parameters.
/// @param concat_dimension Source tensors will be concatenated over
///     dimension with this index. Note that order of dimensions does
///     not depend on memory format.
/// @param src_descs Array of source memory descriptors with @p n elements.
/// @param attr Primitive attributes to use (can be NULL).
/// @param engine Engine to use.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_concat_primitive_desc_create(
        dnnl_primitive_desc_t *concat_primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t dst_desc, int n, int concat_dimension,
        const_dnnl_memory_desc_t const *src_descs,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_concat

/// @addtogroup dnnl_api_sum
/// @{

/// Creates a primitive descriptor for an (out-of-place) sum primitive.
///
/// @param sum_primitive_desc Output primitive descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param n Number of source parameters.
/// @param scales Vector of scales to multiply data in each source
///     memory by.
/// @param src_descs Array of source memory descriptors having @p n elements.
/// @param attr Primitive attributes to use (can be NULL).
/// @param engine Engine to use.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_sum_primitive_desc_create(
        dnnl_primitive_desc_t *sum_primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t dst_desc, int n, const float *scales,
        const_dnnl_memory_desc_t const *src_descs,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_sum

/// @addtogroup dnnl_api_binary
/// @{

/// Creates a primitive descriptor for a binary primitive.
///
/// @note
///     Memory descriptors @p src1_desc and @p dst_desc are allowed to be
///     initialized with #dnnl_format_tag_any or with format_kind set to
///     #dnnl_format_kind_any.
///
/// @note
///     Both memory descriptors must have the same number of dimensions.
///     Element broadcasting is supported for memory descriptor @p src1_desc
///     and are applied to @p src1_desc dimensions that have size equal to 1.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Algorithm kind. Valid values are #dnnl_binary_add,
///     #dnnl_binary_mul, #dnnl_binary_max, #dnnl_binary_min, #dnnl_binary_div,
///     #dnnl_binary_sub, #dnnl_binary_ge, #dnnl_binary_gt, #dnnl_binary_le,
///     #dnnl_binary_lt, #dnnl_binary_eq and #dnnl_binary_ne.
/// @param src0_desc Source 0 memory descriptor.
/// @param src1_desc Source 1 memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_binary_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t src0_desc,
        const_dnnl_memory_desc_t src1_desc, const_dnnl_memory_desc_t dst_desc,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a binary primitive with support of
/// ternary operators.
///
/// @note
///     Memory descriptors @p src1_desc, @p src2_desc and @p dst_desc are
///     allowed to be initialized with #dnnl_format_tag_any or with format_kind
///     set to #dnnl_format_kind_any.
///
/// @note
///     All memory descriptors must have the same number of dimensions.
///     Element broadcasting is supported for memory descriptor @p src1_desc
///     and is applied to @p src1_desc dimensions that have a size equal to 1.
///     There is no broadcasting support for @p src2_desc.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Algorithm kind.
/// @param src0_desc Source 0 memory descriptor.
/// @param src1_desc Source 1 memory descriptor.
/// @param src2_desc Source memory descriptor for ternary operations. Might
///     be empty.
/// @param dst_desc Destination memory descriptor.
/// @param attr Primitive attributes.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_binary_primitive_desc_create_v2(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t src0_desc,
        const_dnnl_memory_desc_t src1_desc, const_dnnl_memory_desc_t src2_desc,
        const_dnnl_memory_desc_t dst_desc, const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_binary

/// @addtogroup dnnl_api_convolution
/// @{

/// Creates a primitive descriptor for a convolution forward propagation
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// Arrays @p strides, @p dilates, @p padding_l, and @p padding_r contain
/// values for spatial dimensions only and hence must have the same number of
/// elements as there are spatial dimensions. The order of values is the same
/// as in the tensor: depth (for 3D tensors), height (for 3D and 2D tensors),
/// and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind Convolution algorithm. Possible values are
///     #dnnl_convolution_direct, #dnnl_convolution_winograd,
///     #dnnl_convolution_auto.
/// @param src_desc Source memory descriptor.
/// @param weights_desc Weights memory descriptor.
/// @param bias_desc Bias memory descriptor. Passing NULL, a zero memory
///     descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param dst_desc Destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param dilates Array of dilations for spatial dimension. A zero value
///     means no dilation in the corresponding dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_convolution_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t bias_desc, const_dnnl_memory_desc_t dst_desc,
        const dnnl_dims_t strides, const dnnl_dims_t dilates,
        const dnnl_dims_t padding_l, const dnnl_dims_t padding_r,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a convolution backward propagation
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// Arrays @p strides, @p dilates, @p padding_l, and @p padding_r contain
/// values for spatial dimensions only and hence must have the same number of
/// elements as there are spatial dimensions. The order of values is the same
/// as in the tensor: depth (for 3D tensors), height (for 3D and 2D tensors),
/// and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Convolution algorithm. Possible values are
///     #dnnl_convolution_direct, #dnnl_convolution_winograd,
///     #dnnl_convolution_auto.
/// @param diff_src_desc Diff source memory descriptor.
/// @param weights_desc Weights memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param dilates Array of dilations for spatial dimension. A zero value
///     means no dilation in the corresponding dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_convolution_backward_data_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t diff_dst_desc, const dnnl_dims_t strides,
        const dnnl_dims_t dilates, const dnnl_dims_t padding_l,
        const dnnl_dims_t padding_r, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a convolution weights gradient primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// Arrays @p strides, @p dilates, @p padding_l, and @p padding_r contain
/// values for spatial dimensions only and hence must have the same number of
/// elements as there are spatial dimensions. The order of values is the same
/// as in the tensor: depth (for 3D tensors), height (for 3D and 2D tensors),
/// and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Convolution algorithm. Possible values are
///     #dnnl_convolution_direct, #dnnl_convolution_winograd,
///     #dnnl_convolution_auto.
/// @param src_desc Source memory descriptor.
/// @param diff_weights_desc Diff weights memory descriptor.
/// @param diff_bias_desc Diff bias memory descriptor. Passing NULL, a zero
///     memory descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param dilates Array of dilations for spatial dimension. A zero value
///     means no dilation in the corresponding dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_convolution_backward_weights_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t diff_weights_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_desc, const dnnl_dims_t strides,
        const dnnl_dims_t dilates, const dnnl_dims_t padding_l,
        const dnnl_dims_t padding_r, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_convolution

/// @addtogroup dnnl_api_deconvolution
/// @{

/// Creates a primitive descriptor for a deconvolution forward propagation
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// Arrays @p strides, @p dilates, @p padding_l, and @p padding_r contain
/// values for spatial dimensions only and hence must have the same number of
/// elements as there are spatial dimensions. The order of values is the same
/// as in the tensor: depth (for 3D tensors), height (for 3D and 2D tensors),
/// and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind Deconvolution algorithm. Possible values are
///     #dnnl_deconvolution_direct, #dnnl_deconvolution_winograd.
/// @param src_desc Source memory descriptor.
/// @param weights_desc Weights memory descriptor.
/// @param bias_desc Bias memory descriptor. Passing NULL, a zero memory
///     descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param dst_desc Destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param dilates Array of dilations for spatial dimension. A zero value
///     means no dilation in the corresponding dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_deconvolution_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t bias_desc, const_dnnl_memory_desc_t dst_desc,
        const dnnl_dims_t strides, const dnnl_dims_t dilates,
        const dnnl_dims_t padding_l, const dnnl_dims_t padding_r,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a deconvolution backward propagation
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// Arrays @p strides, @p dilates, @p padding_l, and @p padding_r contain
/// values for spatial dimensions only and hence must have the same number of
/// elements as there are spatial dimensions. The order of values is the same
/// as in the tensor: depth (for 3D tensors), height (for 3D and 2D tensors),
/// and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Deconvolution algorithm. Possible values are
///     #dnnl_deconvolution_direct, #dnnl_deconvolution_winograd.
/// @param diff_src_desc Diff source memory descriptor.
/// @param weights_desc Weights memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param dilates Array of dilations for spatial dimension. A zero value
///     means no dilation in the corresponding dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_deconvolution_backward_data_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t diff_dst_desc, const dnnl_dims_t strides,
        const dnnl_dims_t dilates, const dnnl_dims_t padding_l,
        const dnnl_dims_t padding_r, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a deconvolution weights gradient
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// Arrays @p strides, @p dilates, @p padding_l, and @p padding_r contain
/// values for spatial dimensions only and hence must have the same number of
/// elements as there are spatial dimensions. The order of values is the same
/// as in the tensor: depth (for 3D tensors), height (for 3D and 2D tensors),
/// and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Deconvolution algorithm. Possible values are
///     #dnnl_deconvolution_direct, #dnnl_deconvolution_winograd.
/// @param src_desc Source memory descriptor.
/// @param diff_weights_desc Diff weights memory descriptor.
/// @param diff_bias_desc Diff bias memory descriptor. Passing NULL, a zero
///     memory descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param dilates Array of dilations for spatial dimension. A zero value
///     means no dilation in the corresponding dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API
dnnl_deconvolution_backward_weights_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t diff_weights_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_desc, const dnnl_dims_t strides,
        const dnnl_dims_t dilates, const dnnl_dims_t padding_l,
        const dnnl_dims_t padding_r, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_deconvolution

/// @addtogroup dnnl_api_shuffle
/// @{

/// Creates a primitive descriptor for a shuffle forward propagation primitive
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param axis The axis along which the data is shuffled.
/// @param group_size Shuffle group size.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_shuffle_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, int axis, dnnl_dim_t group_size,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a shuffle backward propagation primitive
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param axis The axis along which the data is shuffled.
/// @param group_size Shuffle group size.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_shuffle_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc, int axis, dnnl_dim_t group_size,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_shuffle

/// @addtogroup dnnl_api_eltwise
/// @{

/// Creates a primitive descriptor for an eltwise forward propagation primitive.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind Elementwise algorithm kind.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param alpha The alpha parameter for the elementwise operation. Specific
///     meaning depends on the algorithm.
/// @param beta The beta parameter for the elementwise operation. Specific
///     meaning depends on the algorithm.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_eltwise_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const_dnnl_memory_desc_t src_desc, const_dnnl_memory_desc_t dst_desc,
        float alpha, float beta, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for an eltwise backward propagation
///     primitive.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Elementwise algorithm kind.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param data_desc Destination memory descriptor if one of the
///     "use_dst_for_bwd" algorithms are used (such as
///     #dnnl_eltwise_relu_use_dst_for_bwd), source memory descriptor otherwise.
/// @param alpha The alpha parameter for the elementwise operation. Specific
///     meaning depends on the algorithm.
/// @param beta The beta parameter for the elementwise operation. Specific
///     meaning depends on the algorithm.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_eltwise_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t data_desc, float alpha, float beta,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_eltwise

/// @addtogroup dnnl_api_softmax
/// @{

/// Creates a primitive descriptor for a softmax forward propagation primitive.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind Softmax algorithm kind: either #dnnl_softmax_accurate, or
///     #dnnl_softmax_log.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param softmax_axis Axis over which softmax is computed.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_softmax_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const_dnnl_memory_desc_t src_desc, const_dnnl_memory_desc_t dst_desc,
        int softmax_axis, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a softmax backward propagation primitive.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Softmax algorithm kind: either #dnnl_softmax_accurate, or
///     #dnnl_softmax_log.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param softmax_axis Axis over which softmax is computed.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_softmax_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t dst_desc, int softmax_axis,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_softmax

/// @addtogroup dnnl_api_pooling
/// @{

/// Creates a primitive descriptor for a pooling forward propagation
///     primitive.
///
/// Arrays @p strides, @p kernel, @p dilation, @p padding_l and @p padding_r
/// contain values for spatial dimensions only and hence must have the same
/// number of elements as there are spatial dimensions. The order of values
/// is the same as in the tensor: depth (for 3D tensors),
/// height (for 3D and 2D tensors), and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind Pooling algorithm kind: either #dnnl_pooling_max,
///     #dnnl_pooling_avg_include_padding, or #dnnl_pooling_avg_exclude_padding.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param kernel Array of kernel spatial dimensions.
/// @param dilation Array of dilations for spatial dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_pooling_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const_dnnl_memory_desc_t src_desc, const_dnnl_memory_desc_t dst_desc,
        const dnnl_dims_t strides, const dnnl_dims_t kernel,
        const dnnl_dims_t dilation, const dnnl_dims_t padding_l,
        const dnnl_dims_t padding_r, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a pooling backward propagation
///     primitive.
///
/// Arrays @p strides, @p kernel, @p dilation, @p padding_l and @p padding_r
/// contain values for spatial dimensions only and hence must have the same
/// number of elements as there are spatial dimensions. The order of values
/// is the same as in the tensor: depth (for 3D tensors),
/// height (for 3D and 2D tensors), and width.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind Pooling algorithm kind: either #dnnl_pooling_max,
///     #dnnl_pooling_avg_include_padding, or #dnnl_pooling_avg_exclude_padding.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param strides Array of strides for spatial dimension.
/// @param kernel Array of kernel spatial dimensions.
/// @param dilation Array of dilations for spatial dimension.
/// @param padding_l Array of padding values for low indices for each spatial
///     dimension `([[front,] top,] left)`.
/// @param padding_r Array of padding values for high indices for each spatial
///     dimension `([[back,] bottom,] right)`. Can be NULL in which case
///     padding is considered to be symmetrical.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_pooling_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc, const dnnl_dims_t strides,
        const dnnl_dims_t kernel, const dnnl_dims_t dilation,
        const dnnl_dims_t padding_l, const dnnl_dims_t padding_r,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_pooling

/// @addtogroup dnnl_api_prelu
/// @{

/// Creates a primitive descriptor for a PReLU (leaky ReLU with trainable
///     alpha parameter) forward propagation primitive.
///
/// @note
///     weights descriptor is allowed to be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param weights_desc Alpha parameters memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_prelu_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t dst_desc, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a PReLU (leaky ReLU with trainable
///     alpha parameter) backward propagation primitive.
///
/// @note
///     weights descriptor and diff_weights descriptor are allowed
///     to be initialized with #dnnl_format_tag_any or with format_kind
///     set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param src_desc Source memory descriptor.
/// @param weights_desc Alpha parameters memory descriptor.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_weights_desc Diff alpha parameters memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_prelu_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_weights_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_prelu

/// @addtogroup dnnl_api_lrn
/// @{

/// Creates a primitive descriptor for an LRN forward propagation primitive.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind LRN algorithm kind: either #dnnl_lrn_across_channels or
///     #dnnl_lrn_within_channel.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param local_size Regularization local size.
/// @param alpha The alpha regularization parameter.
/// @param beta The beta regularization parameter.
/// @param k The k regularization parameter.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lrn_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const_dnnl_memory_desc_t src_desc, const_dnnl_memory_desc_t dst_desc,
        dnnl_dim_t local_size, float alpha, float beta, float k,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for an LRN backward propagation primitive.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param alg_kind LRN algorithm kind: either #dnnl_lrn_across_channels or
///     #dnnl_lrn_within_channel.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param src_desc Source memory descriptor.
/// @param local_size Regularization local size.
/// @param alpha The alpha regularization parameter.
/// @param beta The beta regularization parameter.
/// @param k The k regularization parameter.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lrn_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t src_desc, dnnl_dim_t local_size, float alpha,
        float beta, float k, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_lrn

/// @addtogroup dnnl_api_batch_normalization
/// @{

/// Creates a primitive descriptor for a batch normalization forward propagation
///     primitive.
///
/// @note
///     In-place operation is supported: the dst can refer to the same memory
///     as the src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param epsilon Batch normalization epsilon parameter.
/// @param flags Batch normalization flags (@ref dnnl_normalization_flags_t).
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_batch_normalization_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, float epsilon, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a batch normalization backward
///     propagation primitive.
///
/// @note
///     In-place operation is supported: the diff_dst can refer to the same
///     memory as the diff_src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_backward_data and #dnnl_backward (diffs for all parameters are
///     computed in this case).
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param src_desc Source memory descriptor.
/// @param epsilon Batch normalization epsilon parameter.
/// @param flags Batch normalization flags (@ref dnnl_normalization_flags_t).
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_batch_normalization_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t src_desc, float epsilon, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_batch_normalization

/// @addtogroup dnnl_api_group_normalization
/// @{

/// Creates a primitive descriptor for a group normalization forward propagation
///     primitive.
///
/// @note
///     In-place operation is supported: the dst can refer to the same memory
///     as the src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param groups Group normalization groups parameter.
/// @param epsilon Group normalization epsilon parameter.
/// @param flags Group normalization flags (@ref dnnl_normalization_flags_t).
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_group_normalization_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, dnnl_dim_t groups, float epsilon,
        unsigned flags, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a group normalization backward
///     propagation primitive.
///
/// @note
///     In-place operation is supported: the diff_dst can refer to the same
///     memory as the diff_src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_backward_data and #dnnl_backward (diffs for all parameters are
///     computed in this case).
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param src_desc Source memory descriptor.
/// @param groups Group normalization groups parameter.
/// @param epsilon Group normalization epsilon parameter.
/// @param flags Group normalization flags (@ref dnnl_normalization_flags_t).
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_group_normalization_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t src_desc, dnnl_dim_t groups, float epsilon,
        unsigned flags, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_group_normalization

/// @addtogroup dnnl_api_layer_normalization
/// @{

/// Creates a primitive descriptor for a layer normalization forward propagation
///     primitive.
///
/// @note
///     In-place operation is supported: the dst can refer to the same memory
///     as the src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param stat_desc Memory descriptor for mean and variance. If this
///     parameter is NULL, a zero memory descriptor, or a memory descriptor
///     with format_kind set to #dnnl_format_kind_undef, then the memory
///     descriptor for stats is derived from @p src_desc by removing the last
///     dimension.
/// @param epsilon Layer normalization epsilon parameter.
/// @param flags Layer normalization flags (@ref dnnl_normalization_flags_t).
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_layer_normalization_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, const_dnnl_memory_desc_t stat_desc,
        float epsilon, unsigned flags, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a layer normalization backward
///     propagation primitive.
///
/// @note
///     In-place operation is supported: the diff_dst can refer to the same
///     memory as the diff_src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_backward_data and #dnnl_backward (diffs for all parameters are
///     computed in this case).
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param src_desc Source memory descriptor.
/// @param stat_desc Memory descriptor for mean and variance. If this
///     parameter is NULL, a zero memory descriptor, or a memory descriptor
///     with format_kind set to #dnnl_format_kind_undef, then the memory
///     descriptor for stats is derived from @p src_desc by removing the last
///     dimension.
/// @param epsilon Layer normalization epsilon parameter.
/// @param flags Layer normalization flags (@ref dnnl_normalization_flags_t).
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_layer_normalization_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t src_desc, const_dnnl_memory_desc_t stat_desc,
        float epsilon, unsigned flags, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a layer normalization forward propagation
///     primitive with a user-provided data type for the scale and shift
///     memory objects.
///
/// @note
///     In-place operation is supported: the dst can refer to the same memory
///     as the src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param stat_desc Memory descriptor for mean and variance. If this
///     parameter is NULL, a zero memory descriptor, or a memory descriptor
///     with format_kind set to #dnnl_format_kind_undef, then the memory
///     descriptor for stats is derived from @p src_desc by removing the last
///     dimension.
/// @param scale_shift_data_type Data type of scale and shift memory. If neither scale
///     nor shift flag are specified the parameter is ignored.
/// @param epsilon Layer normalization epsilon parameter.
/// @param flags Layer normalization flags (@ref dnnl_normalization_flags_t).
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API
dnnl_layer_normalization_forward_primitive_desc_create_v2(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, const_dnnl_memory_desc_t stat_desc,
        dnnl_data_type_t scale_shift_data_type, float epsilon, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a layer normalization backward
///     propagation primitive with a user-provided data type for the
///     scale and shift memory objects.
///
/// @note
///     In-place operation is supported: the diff_dst can refer to the same
///     memory as the diff_src.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_backward_data and #dnnl_backward (diffs for all parameters are
///     computed in this case).
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param src_desc Source memory descriptor.
/// @param stat_desc Memory descriptor for mean and variance. If this
///     parameter is NULL, a zero memory descriptor, or a memory descriptor
///     with format_kind set to #dnnl_format_kind_undef, then the memory
///     descriptor for stats is derived from @p src_desc by removing the last
///     dimension.
/// @param diff_scale_shift_data_type Data type of diff scale and shift memory. If neither scale
///     nor shift flag are specified the parameter is ignored.
/// @param scale_shift_data_type Data type of scale and shift memory. If neither scale
///     nor shift flag are specified the parameter is ignored.
/// @param epsilon Layer normalization epsilon parameter.
/// @param flags Layer normalization flags (@ref dnnl_normalization_flags_t).
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API
dnnl_layer_normalization_backward_primitive_desc_create_v2(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_memory_desc_t src_desc, const_dnnl_memory_desc_t stat_desc,
        dnnl_data_type_t diff_scale_shift_data_type,
        dnnl_data_type_t scale_shift_data_type, float epsilon, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_layer_normalization

/// @addtogroup dnnl_api_inner_product
/// @{

/// Creates a primitive descriptor for an inner product forward propagation
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param src_desc Source memory descriptor.
/// @param weights_desc Weights memory descriptor.
/// @param bias_desc Bias memory descriptor. Passing NULL, a zero memory
///     descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param dst_desc Destination memory descriptor.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_inner_product_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t bias_desc, const_dnnl_memory_desc_t dst_desc,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for an inner product backward propagation
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param diff_src_desc Diff source memory descriptor.
/// @param weights_desc Weights memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_inner_product_backward_data_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for an inner product  weights gradient
///     primitive.
///
/// @note
///     Memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive_descriptor.
/// @param engine Engine to use.
/// @param src_desc Source memory descriptor.
/// @param diff_weights_desc Diff weights memory descriptor.
/// @param diff_bias_desc Diff bias memory descriptor. Passing NULL, a zero
///     memory descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API
dnnl_inner_product_backward_weights_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t diff_weights_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_inner_product

/// @addtogroup dnnl_api_attributes
/// @{

/// Set quantization scale and shift parameters for RNN data tensors.
///
/// For performance reasons, the low-precision configuration of the RNN
/// primitives expects input activations to have the unsigned 8-bit integer
/// data type. The scale and shift parameters are used to quantize
/// floating-point data to unsigned integer and must be passed to the RNN
/// primitive using attributes.
///
/// The quantization formula is `scale * data + shift`.
///
/// @note
///     Quantization scale and shift are common for src_layer, src_iter,
///     dst_iter, and dst_layer.
///
/// Example usage:
/// @code
///     // RNN parameters
///     int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
///     // Activations quantization parameters
///     float scale = 63.f, shift = 64.f;
///
///     dnnl_primitive_attr_t rnn_attr;
///     // Create default attributes
///     dnnl_primitive_attr_create(&rnn_attr);
///
///     // Set scale and shift for int8 quantization of activation
///     dnnl_primitive_attr_set_rnn_data_qparams(rnn_attr, scale, shift);
///
///     // Create an RNN primitive descriptor.
///     dnnl_primitive_desc_t rnn_pd;
///     dnnl_vanilla_rnn_forward_primitive_desc_create(&rnn_pd,
///             engine, /* arguments */, attr);
/// @endcode
///
/// @param attr Primitive attributes.
/// @param scale The value to scale the data by.
/// @param shift The value to shift the data by.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_rnn_data_qparams(
        dnnl_primitive_attr_t attr, const float scale, const float shift);

/// Returns the quantization scale and shift parameters for RNN data tensors.
///
/// @note
///     Quantization scale and shift are common for src_layer, src_iter,
///     dst_iter, and dst_layer.
///
/// @param attr Primitive attributes.
/// @param scale The value to scale the data by.
/// @param shift The value to shift the data by.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_rnn_data_qparams(
        const_dnnl_primitive_attr_t attr, float *scale, float *shift);

/// Sets quantization scaling factors for RNN weights tensors. The
/// low-precision configuration of the RNN primitives expects input weights to
/// use the signed 8-bit integer data type. The scaling factors are used to
/// quantize floating-point data to signed integer and must be passed to RNN
/// primitives using attributes.
///
/// @note
///     The dimension order is always native and does not depend on the actual
///     layout used. For example, five-dimensional weights always have (l, d,
///     i, g, o) logical dimension ordering.
///
/// @note
///     Quantization scales are common for weights_layer and weights_iteration
///
/// @param attr Primitive attributes.
/// @param count Number of elements in the @p scales array.
/// @param mask Scaling factors correspondence mask that defines the
///     correspondence between the output tensor dimensions and the @p
///     scales vector. The set i-th bit indicates that a dedicated scaling
///     factor should be used for each index along that dimension. Set the
///     mask to 0 to use a common scaling factor for the whole output
///     tensor.
/// @param scales Array of output scaling factors that must contain @p count
///     values and the following equality must hold:
///     \f[count = \prod\limits_{d \in mask} weights.dims[d].\f]
///     Violations can only be detected when the attributes are used to create
///     a primitive descriptor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_rnn_weights_qparams(
        dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask,
        const float *scales);

/// Returns the quantization scaling factors for RNN weights tensors.
///
/// @param attr Primitive attributes.
/// @param count Number of elements in the @p scales array.
/// @param mask Scaling factors correspondence mask that defines the
///     correspondence between the output tensor dimensions and the @p
///     scales vector. The set i-th bit indicates that a dedicated scaling
///     factor should be used for each index along that dimension. Set the
///     mask to 0 to use a common scaling factor for the whole output
///     tensor.
/// @param scales Array of output scaling factors that contain @p count
///     values and the following equality must hold:
///     \f[count = \prod\limits_{d \in mask} weights.dims[d].\f]
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_rnn_weights_qparams(
        const_dnnl_primitive_attr_t attr, dnnl_dim_t *count, int *mask,
        const float **scales);

/// Sets quantization scaling factors for RNN projection weights tensors. The
/// low-precision configuration of the RNN primitives expects input weights to
/// use the signed 8-bit integer data type. The scaling factors are used to
/// quantize floating-point data to signed integer and must be passed to RNN
/// primitives using attributes.
///
/// @note
///     The dimension order is always native and does not depend on the actual
///     layout used. For example, five-dimensional weights always have (l, d,
///     i, g, o) logical dimension ordering.
///
/// @param attr Primitive attributes.
/// @param count Number of elements in the @p scales array.
/// @param mask Scaling factors correspondence mask that defines the
///     correspondence between the output tensor dimensions and the @p
///     scales vector. The set i-th bit indicates that a dedicated scaling
///     factor should be used for each index along that dimension. Set the
///     mask to 0 to use a common scaling factor for the whole output
///     tensor.
/// @param scales Array of output scaling factors that must contain @p count
///     values and the following equality must hold:
///     \f[count = \prod\limits_{d \in mask} weights.dims[d].\f]
///     Violations can only be detected when the attributes are used to create
///     a primitive descriptor.
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_set_rnn_weights_projection_qparams(
        dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask,
        const float *scales);

/// Returns the quantization scaling factors for RNN projection weights tensors.
///
/// @param attr Primitive attributes.
/// @param count Number of elements in the @p scales array.
/// @param mask Scaling factors correspondence mask that defines the
///     correspondence between the output tensor dimensions and the @p
///     scales vector. The set i-th bit indicates that a dedicated scaling
///     factor should be used for each index along that dimension. Set the
///     mask to 0 to use a common scaling factor for the whole output
///     tensor.
/// @param scales Array of output scaling factors that contain @p count
///     values and the following equality must hold:
///     \f[count = \prod\limits_{d \in mask} weights.dims[d].\f]
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_primitive_attr_get_rnn_weights_projection_qparams(
        const_dnnl_primitive_attr_t attr, dnnl_dim_t *count, int *mask,
        const float **scales);

/// @} dnnl_api_attributes

/// @addtogroup dnnl_api_rnn
/// @{

/// Creates a primitive descriptor for vanilla RNN forward propagation
///     primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc,
/// - @p bias_desc,
/// - @p dst_iter_desc.
///
/// This would then indicate that the RNN forward propagation primitive should
/// not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param activation Activation kind. Possible values are #dnnl_eltwise_relu,
///     #dnnl_eltwise_tanh or #dnnl_eltwise_logistic.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param flags Unused.
/// @param alpha Negative slope if activation is #dnnl_eltwise_relu.
/// @param beta Unused.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_vanilla_rnn_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const dnnl_alg_kind_t activation,
        const dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc, unsigned flags, float alpha,
        float beta, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for vanilla RNN backward propagation
///     primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p diff_src_iter_desc,
/// - @p bias_desc together with @p diff_bias_desc,
/// - @p dst_iter_desc together with @p diff_dst_iter_desc.
///
/// This would then indicate that the RNN backward propagation primitive should
/// not use the respective data and should use zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Must be #dnnl_backward.
/// @param activation Activation kind. Possible values are #dnnl_eltwise_relu,
///     #dnnl_eltwise_tanh or #dnnl_eltwise_logistic.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param diff_src_layer_desc Memory descriptor for the diff of input vector.
/// @param diff_src_iter_desc Memory descriptor for the diff of input recurrent
///     hidden state vector.
/// @param diff_weights_layer_desc Memory descriptor for the diff of weights
///     applied to the layer input.
/// @param diff_weights_iter_desc Memory descriptor for the diff of weights
///     applied to the recurrent input.
/// @param diff_bias_desc Diff bias memory descriptor.
/// @param diff_dst_layer_desc Memory descriptor for the diff of output
///     vector.
/// @param diff_dst_iter_desc Memory descriptor for the diff of output
///     recurrent hidden state vector.
/// @param flags Unused.
/// @param alpha Negative slope if activation is #dnnl_eltwise_relu.
/// @param beta Unused.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_vanilla_rnn_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, const dnnl_alg_kind_t activation,
        const dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t diff_src_layer_desc,
        const_dnnl_memory_desc_t diff_src_iter_desc,
        const_dnnl_memory_desc_t diff_weights_layer_desc,
        const_dnnl_memory_desc_t diff_weights_iter_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_layer_desc,
        const_dnnl_memory_desc_t diff_dst_iter_desc, unsigned flags,
        float alpha, float beta, const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for an LSTM forward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p src_iter_c_desc,
/// - @p weights_peephole_desc,
/// - @p bias_desc,
/// - @p dst_iter_desc together with @p dst_iter_c_desc.
///
/// This would then indicate that the LSTM forward propagation primitive should
/// not use them and should default to zero values instead.
///
/// The @p weights_projection_desc could either be @c NULL or point to a zero
/// memory descriptor. This would then indicate that the LSTM doesn't have
/// recurrent projection layer.
///
/// @note
///     All memory descriptors can be initialized with #dnnl_format_tag_any or
///     with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param src_iter_c_desc Memory descriptor for the input recurrent cell
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param weights_peephole_desc Memory descriptor for the weights applied to
///     the cell states (according to the Peephole LSTM formula).
/// @param weights_projection_desc Memory descriptor for the weights applied to
///     the hidden states to get the recurrent projection (according to the
///     Projection LSTM formula).
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param dst_iter_c_desc Memory descriptor for the output recurrent cell
///     state vector.
/// @param flags Unused.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lstm_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t src_iter_c_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t weights_peephole_desc,
        const_dnnl_memory_desc_t weights_projection_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t dst_iter_c_desc, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for an LSTM backward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p src_iter_c_desc, @p diff_src_iter_desc,
///   and @p diff_src_iter_c_desc,
/// - @p weights_peephole_desc together with @p diff_weights_peephole_desc,
/// - @p bias_desc together with @p diff_bias_desc,
/// - @p dst_iter_desc together with @p dst_iter_c_desc, @p diff_dst_iter_desc,
///   and @p diff_dst_iter_c_desc.
///
/// This would then indicate that the LSTM backward propagation primitive
/// should not use them and should default to zero values instead.
///
/// The @p weights_projection_desc together with @p
/// diff_weights_projection_desc could either be @c NULL or point to a zero
/// memory descriptor. This would then indicate that the LSTM doesn't have
/// recurrent projection layer.
///
/// @note
///     All memory descriptors can be initialized with #dnnl_format_tag_any or
///     with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Must be #dnnl_backward.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param src_iter_c_desc Memory descriptor for the input recurrent cell
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param weights_peephole_desc Memory descriptor for the weights applied to
///     the cell states (according to the Peephole LSTM formula).
/// @param weights_projection_desc Memory descriptor for the weights applied to
///     the hidden states to get the recurrent projection (according to the
///     Projection LSTM formula).
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param dst_iter_c_desc Memory descriptor for the output recurrent cell
///     state vector.
/// @param diff_src_layer_desc Memory descriptor for the diff of input vector.
/// @param diff_src_iter_desc Memory descriptor for the diff of input recurrent
///     hidden state vector.
/// @param diff_src_iter_c_desc Memory descriptor for the diff of input
/// recurrent cell state vector.
/// @param diff_weights_layer_desc Memory descriptor for the diff of weights
///     applied to the layer input.
/// @param diff_weights_iter_desc Memory descriptor for the diff of weights
///     applied to the recurrent input.
/// @param diff_weights_peephole_desc Memory descriptor for the diff of weights
///     applied to the cell states (according to the Peephole LSTM formula).
/// @param diff_weights_projection_desc Memory descriptor for the diff of
///     weights applied to the hidden states to get the recurrent projection
///     (according to the Projection LSTM formula).
/// @param diff_bias_desc Diff bias memory descriptor.
/// @param diff_dst_layer_desc Memory descriptor for the diff of output
///     vector.
/// @param diff_dst_iter_desc Memory descriptor for the diff of output
///     recurrent hidden state vector.
/// @param diff_dst_iter_c_desc Memory descriptor for the diff of output
///     recurrent cell state vector.
/// @param flags Unused.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lstm_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t src_iter_c_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t weights_peephole_desc,
        const_dnnl_memory_desc_t weights_projection_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t dst_iter_c_desc,
        const_dnnl_memory_desc_t diff_src_layer_desc,
        const_dnnl_memory_desc_t diff_src_iter_desc,
        const_dnnl_memory_desc_t diff_src_iter_c_desc,
        const_dnnl_memory_desc_t diff_weights_layer_desc,
        const_dnnl_memory_desc_t diff_weights_iter_desc,
        const_dnnl_memory_desc_t diff_weights_peephole_desc,
        const_dnnl_memory_desc_t diff_weights_projection_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_layer_desc,
        const_dnnl_memory_desc_t diff_dst_iter_desc,
        const_dnnl_memory_desc_t diff_dst_iter_c_desc, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for GRU forward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc,
/// - @p bias_desc,
/// - @p dst_iter_desc.
///
/// This would then indicate that the GRU forward propagation primitive should
/// not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param flags Unused.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_gru_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for GRU backward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p diff_src_iter_desc,
/// - @p bias_desc together with @p diff_bias_desc,
/// - @p dst_iter_desc together with @p diff_dst_iter_desc.
///
/// This would then indicate that the GRU backward propagation primitive
/// should not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Must be #dnnl_backward.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param diff_src_layer_desc Memory descriptor for the diff of input vector.
/// @param diff_src_iter_desc Memory descriptor for the diff of input recurrent
///     hidden state vector.
/// @param diff_weights_layer_desc Memory descriptor for the diff of weights
///     applied to the layer input.
/// @param diff_weights_iter_desc Memory descriptor for the diff of weights
///     applied to the recurrent input.
/// @param diff_bias_desc Diff bias memory descriptor.
/// @param diff_dst_layer_desc Memory descriptor for the diff of output
///     vector.
/// @param diff_dst_iter_desc Memory descriptor for the diff of output
///     recurrent hidden state vector.
/// @param flags Unused.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_gru_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t diff_src_layer_desc,
        const_dnnl_memory_desc_t diff_src_iter_desc,
        const_dnnl_memory_desc_t diff_weights_layer_desc,
        const_dnnl_memory_desc_t diff_weights_iter_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_layer_desc,
        const_dnnl_memory_desc_t diff_dst_iter_desc, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a descriptor for LBR GRU forward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc,
/// - @p bias_desc,
/// - @p dst_iter_desc.
///
/// This would then indicate that the LBR GRU forward propagation primitive
/// should not use them and should default to zero values instead.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param flags Unused.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lbr_gru_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for LBR GRU backward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p diff_src_iter_desc,
/// - @p bias_desc together with @p diff_bias_desc,
/// - @p dst_iter_desc together with @p diff_dst_iter_desc.
///
/// This would then indicate that the LBR GRU backward propagation primitive
/// should not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Must be #dnnl_backward.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param diff_src_layer_desc Memory descriptor for the diff of input vector.
/// @param diff_src_iter_desc Memory descriptor for the diff of input recurrent
///     hidden state vector.
/// @param diff_weights_layer_desc Memory descriptor for the diff of weights
///     applied to the layer input.
/// @param diff_weights_iter_desc Memory descriptor for the diff of weights
///     applied to the recurrent input.
/// @param diff_bias_desc Diff bias memory descriptor.
/// @param diff_dst_layer_desc Memory descriptor for the diff of output
///     vector.
/// @param diff_dst_iter_desc Memory descriptor for the diff of output
///     recurrent hidden state vector.
/// @param flags Unused.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lbr_gru_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t diff_src_layer_desc,
        const_dnnl_memory_desc_t diff_src_iter_desc,
        const_dnnl_memory_desc_t diff_weights_layer_desc,
        const_dnnl_memory_desc_t diff_weights_iter_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_layer_desc,
        const_dnnl_memory_desc_t diff_dst_iter_desc, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for AUGRU forward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc,
/// - @p bias_desc,
/// - @p dst_iter_desc.
///
/// This would then indicate that the AUGRU forward propagation primitive should
/// not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param attention_desc Memory descriptor for the attention vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param flags Unused.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_augru_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t attention_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for AUGRU backward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p diff_src_iter_desc,
/// - @p bias_desc together with @p diff_bias_desc,
/// - @p dst_iter_desc together with @p diff_dst_iter_desc.
///
/// This would then indicate that the AUGRU backward propagation primitive
/// should not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Must be #dnnl_backward.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param attention_desc Memory descriptor for the attention vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param diff_src_layer_desc Memory descriptor for the diff of input vector.
/// @param diff_src_iter_desc Memory descriptor for the diff of input recurrent
///     hidden state vector.
/// @param diff_attention_desc Memory descriptor for the diff of attention vector.
/// @param diff_weights_layer_desc Memory descriptor for the diff of weights
///     applied to the layer input.
/// @param diff_weights_iter_desc Memory descriptor for the diff of weights
///     applied to the recurrent input.
/// @param diff_bias_desc Diff bias memory descriptor.
/// @param diff_dst_layer_desc Memory descriptor for the diff of output
///     vector.
/// @param diff_dst_iter_desc Memory descriptor for the diff of output
///     recurrent hidden state vector.
/// @param flags Unused.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_augru_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t attention_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t diff_src_layer_desc,
        const_dnnl_memory_desc_t diff_src_iter_desc,
        const_dnnl_memory_desc_t diff_attention_desc,
        const_dnnl_memory_desc_t diff_weights_layer_desc,
        const_dnnl_memory_desc_t diff_weights_iter_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_layer_desc,
        const_dnnl_memory_desc_t diff_dst_iter_desc, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for LBR AUGRU forward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc,
/// - @p bias_desc,
/// - @p dst_iter_desc.
///
/// This would then indicate that the LBR AUGRU forward propagation primitive
/// should not use them and should default to zero values instead.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param attention_desc Memory descriptor for the attention vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param flags Unused.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lbr_augru_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t attention_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc, unsigned flags,
        const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for LBR AUGRU backward propagation primitive.
///
/// The following arguments may either be @c NULL or point to a zero memory
/// descriptor:
/// - @p src_iter_desc together with @p diff_src_iter_desc,
/// - @p bias_desc together with @p diff_bias_desc,
/// - @p dst_iter_desc together with @p diff_dst_iter_desc.
///
/// This would then indicate that the LBR AUGRU backward propagation primitive
/// should not use them and should default to zero values instead.
///
/// @note
///     All memory descriptors can be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Must be #dnnl_backward.
/// @param direction RNN direction. See @ref dnnl_rnn_direction_t for more
///     info.
/// @param src_layer_desc Memory descriptor for the input vector.
/// @param src_iter_desc Memory descriptor for the input recurrent hidden
///     state vector.
/// @param attention_desc Memory descriptor for the attention vector.
/// @param weights_layer_desc Memory descriptor for the weights applied to the
///     layer input.
/// @param weights_iter_desc Memory descriptor for the weights applied to the
///     recurrent input.
/// @param bias_desc Bias memory descriptor.
/// @param dst_layer_desc Memory descriptor for the output vector.
/// @param dst_iter_desc Memory descriptor for the output recurrent hidden
///     state vector.
/// @param diff_src_layer_desc Memory descriptor for the diff of input vector.
/// @param diff_src_iter_desc Memory descriptor for the diff of input recurrent
///     hidden state vector.
/// @param diff_attention_desc Memory descriptor for the diff of attention vector.
/// @param diff_weights_layer_desc Memory descriptor for the diff of weights
///     applied to the layer input.
/// @param diff_weights_iter_desc Memory descriptor for the diff of weights
///     applied to the recurrent input.
/// @param diff_bias_desc Diff bias memory descriptor.
/// @param diff_dst_layer_desc Memory descriptor for the diff of output
///     vector.
/// @param diff_dst_iter_desc Memory descriptor for the diff of output
///     recurrent hidden state vector.
/// @param flags Unused.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_lbr_augru_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_rnn_direction_t direction,
        const_dnnl_memory_desc_t src_layer_desc,
        const_dnnl_memory_desc_t src_iter_desc,
        const_dnnl_memory_desc_t attention_desc,
        const_dnnl_memory_desc_t weights_layer_desc,
        const_dnnl_memory_desc_t weights_iter_desc,
        const_dnnl_memory_desc_t bias_desc,
        const_dnnl_memory_desc_t dst_layer_desc,
        const_dnnl_memory_desc_t dst_iter_desc,
        const_dnnl_memory_desc_t diff_src_layer_desc,
        const_dnnl_memory_desc_t diff_src_iter_desc,
        const_dnnl_memory_desc_t diff_attention_desc,
        const_dnnl_memory_desc_t diff_weights_layer_desc,
        const_dnnl_memory_desc_t diff_weights_iter_desc,
        const_dnnl_memory_desc_t diff_bias_desc,
        const_dnnl_memory_desc_t diff_dst_layer_desc,
        const_dnnl_memory_desc_t diff_dst_iter_desc, unsigned flags,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_rnn

/// @addtogroup dnnl_api_matmul
/// @{

/// Creates a primitive descriptor for a matrix multiplication primitive.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param src_desc Source memory descriptor (matrix A)
/// @param weights_desc Weights memory descriptor (matrix B)
/// @param bias_desc Bias memory descriptor. Passing NULL, a zero memory
///     descriptor, or a memory descriptor with format_kind set to
///     #dnnl_format_kind_undef disables the bias term.
/// @param dst_desc Destination memory descriptor (matrix C).
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_matmul_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t weights_desc,
        const_dnnl_memory_desc_t bias_desc, const_dnnl_memory_desc_t dst_desc,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_matmul

/// @addtogroup dnnl_api_resampling Resampling
/// @{

/// Creates a primitive descriptor for a resampling forward propagation
///     primitive.
///
/// @note
///     Destination memory descriptor is allowed to be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param prop_kind Propagation kind. Possible values are
///     #dnnl_forward_training and #dnnl_forward_inference.
/// @param alg_kind resampling algorithm kind: either #dnnl_resampling_nearest,
///     or #dnnl_resampling_linear.
/// @param factors Array of scaling factors for spatial dimension.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_resampling_forward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind,
        const float *factors, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, const_dnnl_primitive_attr_t attr);

/// Creates a primitive descriptor for a resampling backward propagation
///     primitive.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind resamplinging algorithm kind: either
///     #dnnl_resampling_nearest, or #dnnl_resampling_linear.
/// @param diff_src_desc Diff source memory descriptor.
/// @param diff_dst_desc Diff destination memory descriptor.
/// @param factors Array of scaling factors for spatial dimension.
/// @param hint_fwd_pd Primitive descriptor for a respective forward propagation
///     primitive.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
///
dnnl_status_t DNNL_API dnnl_resampling_backward_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const float *factors,
        const_dnnl_memory_desc_t diff_src_desc,
        const_dnnl_memory_desc_t diff_dst_desc,
        const_dnnl_primitive_desc_t hint_fwd_pd,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_resampling

/// @addtogroup dnnl_api_reduction Reduction
/// @{

/// Creates a primitive descriptor for a reduction primitive.
///
/// @note
///     Destination memory descriptor is allowed to be initialized with
///     #dnnl_format_tag_any or with format_kind set to #dnnl_format_kind_any.
///
/// @param primitive_desc Output primitive descriptor.
/// @param engine Engine to use.
/// @param alg_kind reduction algorithm kind. Possible values:
///     #dnnl_reduction_max, #dnnl_reduction_min, #dnnl_reduction_sum,
///     #dnnl_reduction_mul, #dnnl_reduction_mean, #dnnl_reduction_norm_lp_max,
///     #dnnl_reduction_norm_lp_sum, #dnnl_reduction_norm_lp_power_p_max,
///     #dnnl_reduction_norm_lp_power_p_sum.
/// @param p Algorithm specific parameter.
/// @param eps Algorithm specific parameter.
/// @param src_desc Source memory descriptor.
/// @param dst_desc Destination memory descriptor.
/// @param attr Primitive attributes (can be NULL).
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_reduction_primitive_desc_create(
        dnnl_primitive_desc_t *primitive_desc, dnnl_engine_t engine,
        dnnl_alg_kind_t alg_kind, const_dnnl_memory_desc_t src_desc,
        const_dnnl_memory_desc_t dst_desc, float p, float eps,
        const_dnnl_primitive_attr_t attr);

/// @} dnnl_api_reduction

/// @} dnnl_api_primitives

/// @addtogroup dnnl_api_primitive_cache
/// @{

/// Returns the number of primitives that can be held in the primitive cache
/// at the same time.
///
/// @param capacity Primitive cache capacity to query. Concurrently
/// accessing @p capacity is safe.
/// @returns #dnnl_invalid_arguments/#dnnl::status::invalid_arguments if the
///     @p capacity value is invalid, and #dnnl_success/#dnnl::status::success on
///     success.
dnnl_status_t DNNL_API dnnl_get_primitive_cache_capacity(int *capacity);

/// Sets a number of primitives that can be held in the primitive cache
/// at a time.
///
/// @param capacity Primitive cache capacity to set. If a new @p capacity is
/// less than a number of primitives that the primitive cache already has
/// then the excess entries will be evicted. Setting the @p capacity to 0
/// clears the primitive cache and disables it. Concurrently modifying
/// @p capacity is safe.
/// @returns #dnnl_invalid_arguments/#dnnl::status::invalid_arguments if the
///     @p capacity value is invalid, and #dnnl_success/#dnnl::status::success on
///     success.
dnnl_status_t DNNL_API dnnl_set_primitive_cache_capacity(int capacity);

/// @} dnnl_api_primitive_cache

/// @addtogroup dnnl_api_service
/// @{

/// Configures dumping of JIT-generated code.
///
/// @note
///     This setting overrides the DNNL_JIT_DUMP environment variable.
///
/// @param enable Flag value. Set to 0 to disable and set to 1 to enable.
/// @returns #dnnl_invalid_arguments/#dnnl::status::invalid_arguments if the
///     @p flag value is invalid, and #dnnl_success/#dnnl::status::success on
///     success.
dnnl_status_t DNNL_API dnnl_set_jit_dump(int enable);

/// Sets library profiling flags. The flags define which profilers are
/// supported.
///
/// @note
///     This setting overrides DNNL_JIT_PROFILE environment variable.
///
/// @sa @ref dev_guide_profilers
///
/// @param flags Profiling flags that can contain the following bits:
///     - @ref DNNL_JIT_PROFILE_VTUNE -- integration with VTune Profiler
///         (on by default)
///     - @ref DNNL_JIT_PROFILE_LINUX_JITDUMP -- produce Linux-specific
///         jit-pid.dump output (off by default). The location of the output
///         is controlled via JITDUMPDIR environment variable or via
///         dnnl_set_jit_profiling_jitdumpdir() function.
///     - @ref DNNL_JIT_PROFILE_LINUX_PERFMAP -- produce Linux-specific
///         perf-pid.map output (off by default). The output is always placed
///         into /tmp.
///
///     Passing @ref DNNL_JIT_PROFILE_NONE disables profiling completely.
///
/// @returns #dnnl_invalid_arguments/#dnnl::status::invalid_arguments if the
///     @p flags value is invalid, and #dnnl_success/#dnnl::status::success on
///     success.
dnnl_status_t DNNL_API dnnl_set_jit_profiling_flags(unsigned flags);

/// Sets JIT dump output path. Only applicable to Linux and is only
/// used when profiling flags have DNNL_JIT_PROFILE_LINUX_PERF bit set.
///
/// After the first JIT kernel is generated, the jitdump output will be placed
/// into temporary directory created using the mkdtemp template
/// 'dir/.debug/jit/dnnl.XXXXXX'.
///
/// @sa @ref dev_guide_profilers
///
/// @note
///     This setting overrides JITDUMPDIR environment variable.  If
///     JITDUMPDIR is not set, and this function is never called, the path
///     defaults to HOME. Passing NULL reverts the value to default.
///
/// @note
///     The directory is accessed only when the first JIT kernel is being
///     created. JIT profiling will be disabled in case of any errors
///     accessing or creating this directory.
///
/// @param dir JIT dump output path.
/// @returns #dnnl_success/#dnnl::status::success if the
///     output directory was set correctly and an error status otherwise.
/// @returns #dnnl_unimplemented/#dnnl::status::unimplemented on Windows.
dnnl_status_t DNNL_API dnnl_set_jit_profiling_jitdumpdir(const char *dir);

/// Sets the maximal ISA the library can dispatch to on the CPU. See
/// #dnnl_cpu_isa_t and #dnnl::cpu_isa for the list of the values accepted by
/// the C and C++ API functions respectively.
///
/// This function has effect only once, and returns an error on subsequent
/// calls. It should also be invoked before any other oneDNN API call, otherwise
/// it may return an error.
///
/// This function overrides the DNNL_MAX_CPU_ISA environment variable. The
/// environment variable can be set to the desired maximal ISA name in upper
/// case and with dnnl_cpu_isa prefix removed. For example:
/// `DNNL_MAX_CPU_ISA=AVX2`.
///
/// @note
///     The ISAs are only partially ordered:
///         - SSE41 < AVX < AVX2 < AVX2_VNNI < AVX2_VNNI_2,
///         - AVX2 < AVX512_CORE < AVX512_CORE_VNNI < AVX512_CORE_BF16
///           < AVX10_1_512 < AVX10_1_512_AMX < AVX10_1_512_AMX_FP16,
///         - AVX2_VNNI < AVX10_1_512.
///     Aliases:
///         - AVX512_CORE_FP16 = AVX10_1_512
///         - AVX512_CORE_AMX = AVX10_1_512_AMX
///         - AVX512_CORE_AMX_FP16 = AVX10_1_512_AMX_FP16
///
/// @sa @ref dev_guide_cpu_dispatcher_control for more details
///
/// @param isa Maximal ISA the library should dispatch to. Pass
///     #dnnl_cpu_isa_default/#dnnl::cpu_isa::isa_default to remove ISA restrictions
///     (except for ISAs with initial support in the library).
/// @returns #dnnl_success/#dnnl::status::success on success and a
///     #dnnl_invalid_arguments/#dnnl::status::invalid_arguments if the @p isa
///     parameter is invalid or the ISA cannot be changed at this time.
/// @returns #dnnl_unimplemented/#dnnl::status::unimplemented if the feature
///     was disabled at build time (see @ref dev_guide_build_options for more
///     details).
dnnl_status_t DNNL_API dnnl_set_max_cpu_isa(dnnl_cpu_isa_t isa);

/// Gets the maximal ISA the library can dispatch to on the CPU. See
/// #dnnl_cpu_isa_t and #dnnl::cpu_isa for the list of the values returned by
/// the C and C++ API functions respectively.
///
/// @sa @ref dev_guide_cpu_dispatcher_control for more details
///
/// @returns #dnnl_cpu_isa_t value reflecting the maximal ISA the library may
///     dispatch to.
dnnl_cpu_isa_t DNNL_API dnnl_get_effective_cpu_isa(void);

/// Sets the hints flag for the CPU ISA. See #dnnl_cpu_isa_hints_t and
/// #dnnl::cpu_isa_hints for the list of the values accepted by the C and C++
/// API functions respectively.
///
/// This function has effect only once, and returns an error on subsequent
/// calls. It should also be invoked before any other oneDNN API call, otherwise
/// it may return an error.
///
/// This function overrides the DNNL_CPU_ISA_HINTS environment variable.
/// @sa @ref dev_guide_cpu_isa_hints for more details
///
/// @param isa_hints CPU ISA hints to be passed over to the implementation.
///     Pass #dnnl_cpu_isa_no_hints/#dnnl::cpu_isa_hints::no_hints to use
///     default features i.e. no hints.
/// @returns #dnnl_success/#dnnl::status::success on success and a
///     #dnnl_runtime_error/#dnnl::status::runtime_error if the ISA hints cannot
///     be specified at the current time.
/// @returns #dnnl_unimplemented/#dnnl::status::unimplemented if the feature
///     was disabled at build time (see @ref dev_guide_build_options for more
///     details).
dnnl_status_t DNNL_API dnnl_set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints);

/// Gets the ISA specific hints that library can follow. See
/// #dnnl_cpu_isa_hints_t and #dnnl::cpu_isa_hints for the list of the values
///  returned by the C and C++ API functions respectively.
///
/// @sa @ref dev_guide_cpu_isa_hints for more details
///
/// @returns #dnnl_cpu_isa_hints_t value reflecting the ISA specific hints the
/// library can follow.
dnnl_cpu_isa_hints_t DNNL_API dnnl_get_cpu_isa_hints(void);

/// @} dnnl_api_service

#ifdef DNNL_EXPERIMENTAL_PROFILING

/// @addtogroup dnnl_api_profiling Profiling
/// @{

/// Resets a profiler's state.
///
/// @param stream Stream associated with the profiler.
///
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_reset_profiling(dnnl_stream_t stream);

/// Queries profiling data. The profiling data accumulates for each primitive
/// execution. The @p num_entries will be equal to the number of executions
/// since the last `dnnl_reset_profiling` call. In order to query the
/// @p num_entries the @p data parameter should be NULL. When @p data is NULL
/// then the @p data_kind parameter is ignored.
///
/// The profiling data can be reset by calling #dnnl_reset_profiling.
///
/// @note
///     It is required to wait for all submitted primitives to complete
///     using #dnnl_stream_wait prior to querying profiling data.
///
/// @param stream Stream that was used for executing a primitive that
/// is being profiled.
/// @param data_kind Profiling data kind to query.
/// @param num_entries Number of profiling data entries.
/// @param data Profiling data.
///
/// @returns #dnnl_success on success and a status describing the error
///     otherwise.
dnnl_status_t DNNL_API dnnl_query_profiling_data(dnnl_stream_t stream,
        dnnl_profiling_data_kind_t data_kind, int *num_entries, uint64_t *data);

/// @} dnnl_api_profiling
#endif

/// @addtogroup dnnl_api_blas
/// @{

/// Performs single-precision matrix-matrix multiply.
///
/// The operation is defined as:
///
/// `C := alpha * op( A ) * op( B ) + beta * C`
///
/// where
///  - `op( X ) = X` or `op( X ) = X**T`,
///  - `alpha` and `beta` are scalars, and
///  - `A`, `B`, and `C` are matrices:
///     - `op( A )` is an `MxK` matrix,
///     - `op( B )` is an `KxN` matrix,
///     - `C` is an `MxN` matrix.
///
/// The matrices are assumed to be stored in row-major order (the elements in
/// each of the matrix rows are contiguous in memory).
///
/// @note
///     This API does not support XERBLA. Instead, unlike the standard BLAS
///     functions, this one returns a dnnl_status_t value to allow error
///     handling.
///
/// @param transa Transposition flag for matrix A: 'N' or 'n' means A is not
///     transposed, and 'T' or 't' means that A is transposed.
/// @param transb Transposition flag for matrix B: 'N' or 'n' means B is not
///     transposed, and 'T' or 't' means that B is transposed.
/// @param M The M dimension.
/// @param N The N dimension.
/// @param K The K dimension.
/// @param alpha The alpha parameter that is used to scale the product of
///     matrices A and B.
/// @param A A pointer to the A matrix data.
/// @param lda The leading dimension for the matrix A.
/// @param B A pointer to the B matrix data.
/// @param ldb The leading dimension for the matrix B.
/// @param beta The beta parameter that is used to scale the matrix C.
/// @param C A pointer to the C matrix data.
/// @param ldc The leading dimension for the matrix C.
/// @returns #dnnl_success/#dnnl::status::success on success and a status
///     describing the error otherwise.
dnnl_status_t DNNL_API dnnl_sgemm(char transa, char transb, dnnl_dim_t M,
        dnnl_dim_t N, dnnl_dim_t K, float alpha, const float *A, dnnl_dim_t lda,
        const float *B, dnnl_dim_t ldb, float beta, float *C, dnnl_dim_t ldc);

/// Performs integer matrix-matrix multiply on 8-bit unsigned matrix A, 8-bit
/// signed matrix B, and 32-bit signed resulting matrix C.
///
/// The operation is defined as:
///
/// `C := alpha * (op(A) - A_offset) * (op(B) - B_offset) + beta * C + C_offset`
///
/// where
///  - `op( X ) = X` or `op( X ) = X**T`,
///  - `alpha` and `beta` are scalars, and
///  - `A`, `B`, and `C` are matrices:
///     - `op( A )` is an `MxK` matrix,
///     - `op( B )` is an `KxN` matrix,
///     - `C` is an `MxN` matrix.
///  - `A_offset` is an `MxK` matrix with every element equal the `ao` value,
///  - `B_offset` is an `KxN` matrix with every element equal the `bo` value,
///  - `C_offset` is an `MxN` matrix which is defined by the `co` array of size `len`:
///    - if `offsetc = F`: the `len` must be at least `1`,
///    - if `offsetc = C`: the `len` must be at least `max(1, m)`,
///    - if `offsetc = R`: the `len` must be at least `max(1, n)`,
///
/// The matrices are assumed to be stored in row-major order (the elements in
/// each of the matrix rows are contiguous in memory).
///
/// @note
///     This API does not support XERBLA. Instead, unlike the standard BLAS
///     functions, this one returns a dnnl_status_t value to allow error
///     handling.
///
/// @warning
///     On some architectures saturation may happen during intermediate
///     computations, which would lead to unexpected results. For more
///     details, refer to @ref dev_guide_int8_computations.
///
/// @param transa Transposition flag for matrix A: 'N' or 'n' means A is not
///     transposed, and 'T' or 't' means that A is transposed.
/// @param transb Transposition flag for matrix B: 'N' or 'n' means B is not
///     transposed, and 'T' or 't' means that B is transposed.
/// @param offsetc Flag specifying how offsets should be applied to matrix C:
///     - 'F' means that the same offset will be applied to each element of
///         the matrix C,
///     - 'C' means that individual offset will be applied to each element
///         within each column,
///     - 'R' means that individual offset will be applied to each element
///         within each row.
/// @param M The M dimension.
/// @param N The N dimension.
/// @param K The K dimension.
/// @param alpha The alpha parameter that is used to scale the product of
///     matrices A and B.
/// @param A A pointer to the A matrix data.
/// @param lda The leading dimension for the matrix A.
/// @param ao The offset value for the matrix A.
/// @param B A pointer to the B matrix data.
/// @param ldb The leading dimension for the matrix B.
/// @param bo The offset value for the matrix B.
/// @param beta The beta parameter that is used to scale the matrix C.
/// @param C A pointer to the C matrix data.
/// @param ldc The leading dimension for the matrix C.
/// @param co An array of offset values for the matrix C. The number of
///     elements in the array depends on the value of @p offsetc.
/// @returns #dnnl_success/#dnnl::status::success on success and a status
///     describing the error otherwise.
dnnl_status_t DNNL_API dnnl_gemm_u8s8s32(char transa, char transb, char offsetc,
        dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, float alpha, const uint8_t *A,
        dnnl_dim_t lda, uint8_t ao, const int8_t *B, dnnl_dim_t ldb, int8_t bo,
        float beta, int32_t *C, dnnl_dim_t ldc, const int32_t *co);

/// Performs integer matrix-matrix multiply on 8-bit signed matrix A, 8-bit
/// signed matrix B, and 32-bit signed resulting matrix C.
///
/// The operation is defined as:
///
/// `C := alpha * (op(A) - A_offset) * (op(B) - B_offset) + beta * C + C_offset`
///
/// where
///  - `op( X ) = X` or `op( X ) = X**T`,
///  - `alpha` and `beta` are scalars, and
///  - `A`, `B`, and `C` are matrices:
///     - `op( A )` is an `MxK` matrix,
///     - `op( B )` is an `KxN` matrix,
///     - `C` is an `MxN` matrix.
///  - `A_offset` is an `MxK` matrix with every element equal the `ao` value,
///  - `B_offset` is an `KxN` matrix with every element equal the `bo` value,
///  - `C_offset` is an `MxN` matrix which is defined by the `co` array of size `len`:
///    - if `offsetc = F`: the `len` must be at least `1`,
///    - if `offsetc = C`: the `len` must be at least `max(1, m)`,
///    - if `offsetc = R`: the `len` must be at least `max(1, n)`,
///
/// The matrices are assumed to be stored in row-major order (the elements in
/// each of the matrix rows are contiguous in memory).
///
/// @note
///     This API does not support XERBLA. Instead, unlike the standard BLAS
///     functions, this one returns a dnnl_status_t value to allow error
///     handling.
///
/// @warning
///     On some architectures saturation may happen during intermediate
///     computations, which would lead to unexpected results. For more
///     details, refer to @ref dev_guide_int8_computations.
///
/// @param transa Transposition flag for matrix A: 'N' or 'n' means A is not
///     transposed, and 'T' or 't' means that A is transposed.
/// @param transb Transposition flag for matrix B: 'N' or 'n' means B is not
///     transposed, and 'T' or 't' means that B is transposed.
/// @param offsetc Flag specifying how offsets should be applied to matrix C:
///     - 'F' means that the same offset will be applied to each element of
///         the matrix C,
///     - 'C' means that individual offset will be applied to each element
///         within each column,
///     - 'R' means that individual offset will be applied to each element
///         within each row.
/// @param M The M dimension.
/// @param N The N dimension.
/// @param K The K dimension.
/// @param alpha The alpha parameter that is used to scale the product of
///     matrices A and B.
/// @param A A pointer to the A matrix data.
/// @param lda The leading dimension for the matrix A.
/// @param ao The offset value for the matrix A.
/// @param B A pointer to the B matrix data.
/// @param ldb The leading dimension for the matrix B.
/// @param bo The offset value for the matrix B.
/// @param beta The beta parameter that is used to scale the matrix C.
/// @param C A pointer to the C matrix data.
/// @param ldc The leading dimension for the matrix C.
/// @param co An array of offset values for the matrix C. The number of
///     elements in the array depends on the value of @p offsetc.
/// @returns #dnnl_success/#dnnl::status::success on success and a status
///     describing the error otherwise.
dnnl_status_t DNNL_API dnnl_gemm_s8s8s32(char transa, char transb, char offsetc,
        dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, float alpha, const int8_t *A,
        dnnl_dim_t lda, int8_t ao, const int8_t *B, dnnl_dim_t ldb, int8_t bo,
        float beta, int32_t *C, dnnl_dim_t ldc, const int32_t *co);

/// @} dnnl_api_blas

/// @} dnnl_api

#ifdef __cplusplus
}
#endif

#endif /* ONEAPI_DNNL_DNNL_H */