57 return "parallel vector reduction on opencl device";
61 auto t = ctx.
task.template cast_safe<ScheduleTask_v_reduce>();
65 return execute_sp(ctx);
68 return execute_dn(ctx);
71 return execute_sp(ctx);
74 return execute_dn(ctx);
77 return execute_sp(ctx);
84 auto t = ctx.
task.template cast_safe<ScheduleTask_v_reduce>();
86 auto r = t->r.template cast_safe<TScalar<T>>();
87 auto s = t->s.template cast_safe<TScalar<T>>();
88 auto v = t->v.template cast_safe<TVector<T>>();
89 auto op_reduce = t->op_reduce.template cast_safe<TOpBinary<T, T, T>>();
93 const auto* p_cl_dense_vec = v->template get<CLDenseVec<T>>();
94 auto* p_cl_acc = get_acc_cl();
95 auto& queue = p_cl_acc->get_queue_default();
97 cl_reduce<T>(queue, p_cl_dense_vec->Ax, v->get_n_rows(), s->get_value(), op_reduce, r->get_value());
102 Status execute_sp(
const DispatchContext& ctx) {
105 auto t = ctx.task.template cast_safe<ScheduleTask_v_reduce>();
107 auto r = t->r.template cast_safe<TScalar<T>>();
108 auto s = t->s.template cast_safe<TScalar<T>>();
109 auto v = t->v.template cast_safe<TVector<T>>();
110 auto op_reduce = t->op_reduce.template cast_safe<TOpBinary<T, T, T>>();
114 const auto* p_cl_coo_vec = v->template get<CLCooVec<T>>();
115 auto* p_cl_acc = get_acc_cl();
116 auto& queue = p_cl_acc->get_queue_default();
118 cl_reduce<T>(queue, p_cl_coo_vec->Ax, p_cl_coo_vec->values, s->get_value(), op_reduce, r->get_value());
void cl_reduce(cl::CommandQueue &queue, const cl::Buffer &values, uint n, T init, const ref_ptr< TOpBinary< T, T, T > > &op_reduce, T &result)
Definition cl_reduce.hpp:38
Execution context of a single task.
Definition dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition dispatcher.hpp:48