28 #ifndef SPLA_CL_VECTOR_HPP
29 #define SPLA_CL_VECTOR_HPP
59 return "parallel vector element-wise add on opencl device";
63 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd>();
68 return execute_dn2dn(ctx);
71 return execute_dn2dn(ctx);
78 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd>();
84 std::shared_ptr<CLProgram> program;
91 auto* p_cl_r = r->template get<CLDenseVec<T>>();
92 const auto* p_cl_u = u->template get<CLDenseVec<T>>();
93 const auto* p_cl_v = v->template get<CLDenseVec<T>>();
94 auto* p_cl_acc = get_acc_cl();
95 auto& queue = p_cl_acc->get_queue_default();
97 const uint n = r->get_n_rows();
99 auto kernel = program->make_kernel(
"dense_to_dense");
100 kernel.setArg(0, p_cl_r->Ax);
101 kernel.setArg(1, p_cl_u->Ax);
102 kernel.setArg(2, p_cl_v->Ax);
105 cl::NDRange global(p_cl_acc->get_default_wgs() * div_up_clamp(n, p_cl_acc->get_default_wgs(), 1u, 1024u));
106 cl::NDRange local(p_cl_acc->get_default_wgs());
107 queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local);
112 bool ensure_kernel(
const ref_ptr<TOpBinary<T, T, T>>& op, std::shared_ptr<CLProgram>& program) {
113 CLProgramBuilder program_builder;
115 .set_name(
"vector_eadd")
116 .add_type(
"TYPE", get_ttype<T>().
template as<Type>())
117 .add_op(
"OP_BINARY", op.template as<OpBinary>())
118 .set_source(source_vector_eadd)
121 program = program_builder.get_program();
Status of library operation execution.
Definition: cl_v_eadd.hpp:50
std::string get_description() override
Definition: cl_v_eadd.hpp:58
Status execute(const DispatchContext &ctx) override
Definition: cl_v_eadd.hpp:62
~Algo_v_eadd_cl() override=default
std::string get_name() override
Definition: cl_v_eadd.hpp:54
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
Automates reference counting and behaves as shared smart pointer.
Definition: ref.hpp:117
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition: time_profiler.hpp:92