/******************************************************************************* * Copyright 2024-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ /// @file /// Graph OpenCL interop API #ifndef ONEAPI_DNNL_DNNL_GRAPH_OCL_HPP #define ONEAPI_DNNL_DNNL_GRAPH_OCL_HPP /// @cond DO_NOT_DOCUMENT_THIS #include #include #include "oneapi/dnnl/dnnl_graph.hpp" #include "oneapi/dnnl/dnnl_graph_ocl.h" #include "oneapi/dnnl/dnnl_ocl.hpp" /// @endcond /// @addtogroup dnnl_api /// @{ namespace dnnl { /// @addtogroup dnnl_graph_api /// @{ namespace graph { /// @addtogroup dnnl_graph_api_interop Runtime interoperability API /// API extensions to interact with the underlying run-time. /// @{ /// @addtogroup dnnl_graph_api_ocl_interop OpenCL interoperability API /// API extensions to interact with the underlying OpenCL run-time. /// @{ /// OpenCL interoperability namespace namespace ocl_interop { /// Constructs an allocator from OpenCL malloc and free function pointer. OpenCL /// allocator should be used for OpenCL GPU runtime. Currently, only device USM /// allocator is supported. /// /// @param ocl_malloc The pointer to OpenCL malloc function /// @param ocl_free The pointer to OpenCL free function /// @returns Created allocator inline allocator make_allocator(dnnl_graph_ocl_allocate_f ocl_malloc, dnnl_graph_ocl_deallocate_f ocl_free) { dnnl_graph_allocator_t c_allocator = nullptr; error::wrap_c_api(dnnl_graph_ocl_interop_allocator_create( &c_allocator, ocl_malloc, ocl_free), "could not create allocator for opencl device"); return allocator(c_allocator); } /// Constructs an engine from an OpenCL device, an OpenCL context, and an /// allocator. /// /// @param device A valid OpenCL device to construct the engine /// @param context A valid OpenCL context to construct the engine /// @param alloc An allocator to associate with the engine /// @returns Created engine inline engine make_engine_with_allocator( cl_device_id device, cl_context context, const allocator &alloc) { dnnl_engine_t c_engine; error::wrap_c_api(dnnl_graph_ocl_interop_make_engine_with_allocator( &c_engine, device, context, alloc.get()), "could not make an engine with allocator"); return engine(c_engine); } /// Constructs an engine from an OpenCL device, an OpenCL context, an /// allocator, and a serialized engine cache blob. /// /// @param device A valid OpenCL device to construct the engine /// @param context A valid OpenCL context to construct the engine /// @param alloc An allocator to associate with the engine /// @param cache_blob Cache blob serialized beforehand /// @returns Created engine inline engine make_engine_with_allocator(cl_device_id device, cl_context context, const allocator &alloc, const std::vector &cache_blob) { dnnl_engine_t c_engine; error::wrap_c_api( dnnl_graph_ocl_interop_make_engine_from_cache_blob_with_allocator( &c_engine, device, context, alloc.get(), cache_blob.size(), cache_blob.data()), "could not make an engine with allocator from cache blob"); return engine(c_engine); } /// Executes a compiled partition in a specified stream and returns a OpenCL /// event. /// /// @param c_partition Compiled partition to execute. /// @param astream Stream object to run over /// @param inputs Arguments map. /// @param outputs Arguments map. /// @param deps Optional vector with `cl_event` dependencies. /// @returns Output event. inline cl_event execute(compiled_partition &c_partition, stream &astream, const std::vector &inputs, std::vector &outputs, const std::vector &deps = {}) { std::vector c_inputs; c_inputs.reserve(inputs.size()); for (auto &in : inputs) { c_inputs.push_back(in.get()); } std::vector c_outputs; c_outputs.reserve(outputs.size()); for (auto &out : outputs) { c_outputs.push_back(out.get()); } const cl_event *c_deps = deps.empty() ? nullptr : deps.data(); cl_event ocl_event; error::wrap_c_api( dnnl_graph_ocl_interop_compiled_partition_execute(c_partition.get(), astream.get(), c_inputs.size(), c_inputs.data(), c_outputs.size(), c_outputs.data(), c_deps, (int)deps.size(), &ocl_event), "could not execute the compiled_partition on a specified opencl " "stream"); return ocl_event; } } // namespace ocl_interop /// @} dnnl_graph_api_ocl_interop /// @} dnnl_graph_api_interop } // namespace graph /// @} dnnl_graph_api } // namespace dnnl /// @} dnnl_api #endif