57 return "parallel vector map on opencl device";
61 auto t = ctx.
task.template cast_safe<ScheduleTask_v_map>();
65 return execute_sp(ctx);
68 return execute_dn(ctx);
71 return execute_sp(ctx);
74 return execute_dn(ctx);
77 return execute_sp(ctx);
84 auto t = ctx.
task.template cast_safe<ScheduleTask_v_map>();
86 auto r = t->r.template cast_safe<TVector<T>>();
87 auto v = t->v.template cast_safe<TVector<T>>();
88 auto op = t->op.template cast_safe<TOpUnary<T, T>>();
93 auto* p_cl_dense_r = r->template get<CLDenseVec<T>>();
94 const auto* p_cl_dense_v = v->template get<CLDenseVec<T>>();
95 auto* p_cl_acc = get_acc_cl();
96 auto& queue = p_cl_acc->get_queue_default();
98 cl_map(queue, p_cl_dense_v->Ax, p_cl_dense_r->Ax, r->get_n_rows(), op);
103 Status execute_sp(
const DispatchContext& ctx) {
106 auto t = ctx.task.template cast_safe<ScheduleTask_v_map>();
108 auto r = t->r.template cast_safe<TVector<T>>();
109 auto v = t->v.template cast_safe<TVector<T>>();
110 auto op = t->op.template cast_safe<TOpUnary<T, T>>();
115 auto* p_cl_coo_r = r->template get<CLCooVec<T>>();
116 const auto* p_cl_coo_v = v->template get<CLCooVec<T>>();
117 auto* p_cl_acc = get_acc_cl();
118 auto& queue = p_cl_acc->get_queue_default();
120 const uint N = p_cl_coo_v->values;
124 queue.enqueueCopyBuffer(p_cl_coo_v->Ai, p_cl_coo_r->Ai, 0, 0,
sizeof(
uint) * N);
125 cl_map(queue, p_cl_coo_v->Ax, p_cl_coo_r->Ax, N, op);
void cl_map(cl::CommandQueue &queue, const cl::Buffer &source, cl::Buffer &dest, uint n, const ref_ptr< TOpUnary< T, T > > &op)
Definition cl_map.hpp:38
Execution context of a single task.
Definition dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition dispatcher.hpp:48