#pragma once #include #include #if AT_KLEIDIAI_ENABLED() namespace at::native::kleidiai { /** * @brief Rearranges the quantized weight to support kleidiai inference * @param bl Groupsize for quantization should be multiple of 32 */ void kai_pack_int4_rhs( const Tensor& weight_packed, const Tensor& weight, const Tensor& scales, const std::optional& bias, const int64_t n, const int64_t k, const int64_t bl); /** * @brief Outputs the buffer size for the packed weights * @param bl Groupsize for quantization. 32 for groupwise , 0 for channelwise */ size_t kai_pack_rhs_int4_size( const int64_t n, const int64_t k, const int64_t bl); /** * @brief Run 2 operations ( Input quantize and pack -> 4 bit Matmul ) */ void kai_quant_pack_lhs_int4_mm( const Tensor& output, const Tensor& input, const Tensor& weight, const int64_t m, const int64_t n, const int64_t k, const int64_t bl); } // namespace at::native::kleidiai #endif