57 return "parallel matrix reduction on opencl device";
61 auto t = ctx.
task.template cast_safe<ScheduleTask_m_reduce>();
65 return execute_csr(ctx);
68 return execute_csr(ctx);
71 return execute_csr(ctx);
78 auto t = ctx.
task.template cast_safe<ScheduleTask_m_reduce>();
80 auto r = t->r.template cast_safe<TScalar<T>>();
81 auto s = t->s.template cast_safe<TScalar<T>>();
82 auto M = t->M.template cast_safe<TMatrix<T>>();
83 auto op_reduce = t->op_reduce.template cast_safe<TOpBinary<T, T, T>>();
87 const auto* p_cl_csr = M->template get<CLCsr<T>>();
88 auto* p_cl_acc = get_acc_cl();
89 auto& queue = p_cl_acc->get_queue_default();
91 cl_reduce<T>(queue, p_cl_csr->Ax, p_cl_csr->values, s->get_value(), op_reduce, r->get_value());
void cl_reduce(cl::CommandQueue &queue, const cl::Buffer &values, uint n, T init, const ref_ptr< TOpBinary< T, T, T > > &op_reduce, T &result)
Definition cl_reduce.hpp:38
Execution context of a single task.
Definition dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition dispatcher.hpp:48