28 #ifndef SPLA_CL_V_ASSIGN_HPP
29 #define SPLA_CL_V_ASSIGN_HPP
54 return "v_assign_masked";
58 return "parallel vector masked assignment on opencl device";
62 auto t = ctx.
task.template cast_safe<ScheduleTask_v_assign_masked>();
66 return execute_sp2dn(ctx);
68 return execute_dn2dn(ctx);
70 return execute_sp2dn(ctx);
77 auto t = ctx.
task.template cast_safe<ScheduleTask_v_assign_masked>();
79 auto r = t->r.template cast_safe<TVector<T>>();
80 auto mask = t->mask.template cast_safe<TVector<T>>();
81 auto value = t->value.template cast_safe<TScalar<T>>();
82 auto op_assign = t->op_assign.template cast_safe<TOpBinary<T, T, T>>();
83 auto op_select = t->op_select.template cast_safe<TOpSelect<T>>();
88 std::shared_ptr<CLProgram> program;
91 auto* p_cl_r_dense = r->template get<CLDenseVec<T>>();
92 const auto* p_cl_mask_dense = mask->template get<CLDenseVec<T>>();
93 auto* p_cl_acc = get_acc_cl();
94 auto& queue = p_cl_acc->get_queue_default();
96 auto kernel_dense_to_dense = program->make_kernel(
"assign_dense_to_dense");
97 kernel_dense_to_dense.setArg(0, p_cl_r_dense->Ax);
98 kernel_dense_to_dense.setArg(1, p_cl_mask_dense->Ax);
99 kernel_dense_to_dense.setArg(2, value->get_value());
100 kernel_dense_to_dense.setArg(3, r->get_n_rows());
102 uint n_groups_to_dispatch = div_up_clamp(r->get_n_rows(), m_block_size, 1, 256);
104 cl::NDRange global(m_block_size * n_groups_to_dispatch);
105 cl::NDRange local(m_block_size);
106 queue.enqueueNDRangeKernel(kernel_dense_to_dense, cl::NDRange(), global, local);
111 Status execute_sp2dn(
const DispatchContext& ctx) {
114 auto t = ctx.task.template cast_safe<ScheduleTask_v_assign_masked>();
116 auto r = t->r.template cast_safe<TVector<T>>();
117 auto mask = t->mask.template cast_safe<TVector<T>>();
118 auto value = t->value.template cast_safe<TScalar<T>>();
119 auto op_assign = t->op_assign.template cast_safe<TOpBinary<T, T, T>>();
120 auto op_select = t->op_select.template cast_safe<TOpSelect<T>>();
125 auto* p_cl_r_dense = r->template get<CLDenseVec<T>>();
126 const auto* p_cl_mask_coo = mask->template get<CLCooVec<T>>();
127 auto* p_cl_acc = get_acc_cl();
128 auto& queue = p_cl_acc->get_queue_default();
130 if (p_cl_mask_coo->values == 0) {
135 std::shared_ptr<CLProgram> program;
138 auto kernel_sparse_to_dense = program->make_kernel(
"assign_sparse_to_dense");
139 kernel_sparse_to_dense.setArg(0, p_cl_r_dense->Ax);
140 kernel_sparse_to_dense.setArg(1, p_cl_mask_coo->Ai);
141 kernel_sparse_to_dense.setArg(2, p_cl_mask_coo->Ax);
142 kernel_sparse_to_dense.setArg(3, value->get_value());
143 kernel_sparse_to_dense.setArg(4, p_cl_mask_coo->values);
145 uint n_groups_to_dispatch = div_up_clamp(p_cl_mask_coo->values, m_block_size, 1, 256);
147 cl::NDRange global(m_block_size * n_groups_to_dispatch);
148 cl::NDRange local(m_block_size);
149 queue.enqueueNDRangeKernel(kernel_sparse_to_dense, cl::NDRange(), global, local);
154 bool ensure_kernel(
const ref_ptr<TOpBinary<T, T, T>>& op_assign,
const ref_ptr<TOpSelect<T>>& op_select, std::shared_ptr<CLProgram>& program) {
155 m_block_size = get_acc_cl()->get_default_wgs();
157 CLProgramBuilder program_builder;
159 .set_name(
"vector_assign")
160 .add_type(
"TYPE", get_ttype<T>().
template as<Type>())
161 .add_op(
"OP_BINARY", op_assign.template as<OpBinary>())
162 .add_op(
"OP_SELECT", op_select.template as<OpSelect>())
163 .set_source(source_vector_assign)
166 program = program_builder.get_program();
172 uint m_block_size = 0;
Status of library operation execution.
Definition: cl_v_assign.hpp:49
std::string get_name() override
Definition: cl_v_assign.hpp:53
Status execute(const DispatchContext &ctx) override
Definition: cl_v_assign.hpp:61
std::string get_description() override
Definition: cl_v_assign.hpp:57
~Algo_v_assign_masked_cl() override=default
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
Automates reference counting and behaves as shared smart pointer.
Definition: ref.hpp:117
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
#define LOG_MSG(status, msg)
Definition: logger.hpp:66
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition: time_profiler.hpp:92