52 return "sequential element-wise add vector operation";
56 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd_fdb>();
61 return execute_sp2dn(ctx);
64 return execute_dn2dn(ctx);
67 return execute_sp2dn(ctx);
74 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd_fdb>();
84 auto* p_r = r->template get<CpuDenseVec<T>>();
85 const auto* p_v = v->template get<CpuCooVec<T>>();
86 auto* p_fdb = fdb->template get<CpuCooVec<T>>();
87 const auto& function = op->function;
89 assert(p_fdb->values == 0);
91 for (
uint k = 0; k < p_v->values; k++) {
94 p_r->Ax[i] = function(prev, p_v->Ax[k]);
96 if (prev != p_r->Ax[i]) {
98 p_fdb->Ai.push_back(i);
99 p_fdb->Ax.push_back(p_r->Ax[i]);
106 Status execute_dn2dn(
const DispatchContext& ctx) {
109 auto t = ctx.task.template cast_safe<ScheduleTask_v_eadd_fdb>();
110 ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
111 ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
112 ref_ptr<TVector<T>> fdb = t->fdb.template cast_safe<TVector<T>>();
113 ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();
119 auto* p_r = r->template get<CpuDenseVec<T>>();
120 const auto* p_v = v->template get<CpuDenseVec<T>>();
121 auto* p_fdb = fdb->template get<CpuDenseVec<T>>();
122 const auto& function = op->function;
124 const uint N = v->get_n_rows();
125 const T fill_value = fdb->get_fill_value();
129 for (
uint i = 0; i < N; i++) {
131 p_r->Ax[i] = function(prev, p_v->Ax[i]);
133 if (prev != p_r->Ax[i]) {
134 p_fdb->Ax[i] = p_r->Ax[i];
Execution context of a single task.
Definition dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition dispatcher.hpp:48