#pragma once #include #include namespace at::native::xpu { class QConvoneDNNXPU final { public: C10_API static at::Tensor run_pointwise( at::Tensor act, double act_scale, int64_t act_zero_point, at::Tensor weight, at::Tensor weight_scales, at::Tensor weight_zero_points, std::optional bias, torch::List stride, torch::List padding, torch::List dilation, int64_t groups, double inv_output_scale, int64_t output_zero_point, std::optional output_dtype, std::string_view attr, torch::List> scalars, std::optional algorithm); C10_API static at::Tensor run_pointwise_tensor( at::Tensor act, at::Tensor act_scale, at::Tensor act_zero_point, at::Tensor weight, at::Tensor weight_scales, at::Tensor weight_zero_points, std::optional bias, torch::List stride, torch::List padding, torch::List dilation, int64_t groups, double output_scale, int64_t output_zero_point, std::optional output_dtype, std::string_view attr, torch::List> scalars, std::optional algorithm); C10_API static at::Tensor run_pointwise_binary( at::Tensor act, double act_scale, int64_t act_zero_point, at::Tensor weight, at::Tensor weight_scales, at::Tensor weight_zero_points, at::Tensor accum, std::optional bias, torch::List stride, torch::List padding, torch::List dilation, int64_t groups, double output_scale, int64_t output_zero_point, std::optional output_dtype, double accum_scale, int64_t accum_zero_point, std::string_view binary_attr, std::optional alpha, std::optional unary_attr, torch::List> unary_scalars, std::optional unary_algorithm); C10_API static at::Tensor run_pointwise_binary_tensor( at::Tensor act, at::Tensor act_scale, at::Tensor act_zero_point, at::Tensor weight, at::Tensor weight_scales, at::Tensor weight_zero_points, at::Tensor accum, std::optional bias, torch::List stride, torch::List padding, torch::List dilation, int64_t groups, double output_scale, int64_t output_zero_point, std::optional output_dtype, double accum_scale, int64_t accum_zero_point, std::string_view binary_attr, std::optional alpha, std::optional unary_attr, torch::List> unary_scalars, std::optional unary_algorithm); static inline c10::ScalarType qconv_decide_out_dtype( const at::Tensor& act, const std::optional output_dtype); static at::Tensor qconv_prepack_xpu( at::Tensor weight, at::Tensor weight_scales, double input_scale, int64_t input_zero_point, torch::List stride, torch::List padding, torch::List dilation, int64_t groups, std::optional> input_shape); }; } // namespace at::native::xpu