28 #ifndef SPLA_CL_V_COUNT_MF_HPP
29 #define SPLA_CL_V_COUNT_MF_HPP
59 return "parallel opencl vector count mf";
63 auto t = ctx.
task.template cast_safe<ScheduleTask_v_count_mf>();
67 return execute_sp(ctx);
69 return execute_dn(ctx);
71 return execute_sp(ctx);
76 auto t = ctx.
task.template cast_safe<ScheduleTask_v_count_mf>();
78 CLCooVec<T>* dec_v = v->template get<CLCooVec<T>>();
80 t->r->set_uint(dec_v->values);
85 Status execute_dn(
const DispatchContext& ctx) {
86 auto t = ctx.task.template cast_safe<ScheduleTask_v_count_mf>();
87 ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
88 CLDenseVec<T>* dec_v = v->template get<CLDenseVec<T>>();
90 std::shared_ptr<CLProgram> program;
93 auto* cl_acc = get_acc_cl();
94 auto& queue = cl_acc->get_queue_default();
96 CLCounterWrapper cl_count;
97 cl_count.set(queue, 0);
99 auto kernel = program->make_kernel(
"count_mf");
100 kernel.setArg(0, dec_v->Ax);
101 kernel.setArg(1, cl_count.buffer());
102 kernel.setArg(2, v->get_n_rows());
103 kernel.setArg(3, v->get_fill_value());
105 const uint n_groups = div_up_clamp(v->get_n_rows(), m_block_size, 1, 1024);
107 cl::NDRange global(m_block_size * n_groups);
108 cl::NDRange local(m_block_size);
109 queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local);
111 t->r->set_uint(cl_count.get(queue));
116 bool ensure_kernel(std::shared_ptr<CLProgram>& program) {
118 m_block_size = get_acc_cl()->get_default_wgs();
120 CLProgramBuilder program_builder;
123 .add_type(
"TYPE", get_ttype<T>().
template as<Type>())
124 .set_source(source_count)
127 program = program_builder.get_program();
133 uint m_block_size = 0;
Status of library operation execution.
Definition: cl_v_count_mf.hpp:50
~Algo_v_count_mf_cl() override=default
std::string get_name() override
Definition: cl_v_count_mf.hpp:54
Status execute(const DispatchContext &ctx) override
Definition: cl_v_count_mf.hpp:62
std::string get_description() override
Definition: cl_v_count_mf.hpp:58
OpenCL list-of-coordinates sparse vector representation.
Definition: cl_formats.hpp:77
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
Automates reference counting and behaves as shared smart pointer.
Definition: ref.hpp:117
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48