28 #ifndef SPLA_CPU_V_EADD_FDB_HPP
29 #define SPLA_CPU_V_EADD_FDB_HPP
52 return "sequential element-wise add vector operation";
56 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd_fdb>();
61 return execute_sp2dn(ctx);
64 return execute_dn2dn(ctx);
67 return execute_sp2dn(ctx);
74 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd_fdb>();
84 auto* p_r = r->template get<CpuDenseVec<T>>();
85 const auto* p_v = v->template get<CpuCooVec<T>>();
86 auto* p_fdb = fdb->template get<CpuCooVec<T>>();
87 const auto&
function = op->function;
89 assert(p_fdb->values == 0);
91 for (
uint k = 0; k < p_v->values; k++) {
94 p_r->Ax[i] =
function(prev, p_v->Ax[k]);
96 if (prev != p_r->Ax[i]) {
98 p_fdb->Ai.push_back(i);
99 p_fdb->Ax.push_back(p_r->Ax[i]);
106 Status execute_dn2dn(
const DispatchContext& ctx) {
109 auto t = ctx.task.template cast_safe<ScheduleTask_v_eadd_fdb>();
110 ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
111 ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
112 ref_ptr<TVector<T>> fdb = t->fdb.template cast_safe<TVector<T>>();
113 ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();
119 auto* p_r = r->template get<CpuDenseVec<T>>();
120 const auto* p_v = v->template get<CpuDenseVec<T>>();
121 auto* p_fdb = fdb->template get<CpuDenseVec<T>>();
122 const auto&
function = op->function;
124 const uint N = v->get_n_rows();
125 const T fill_value = fdb->get_fill_value();
129 for (
uint i = 0; i < N; i++) {
131 p_r->Ax[i] =
function(prev, p_v->Ax[i]);
133 if (prev != p_r->Ax[i]) {
134 p_fdb->Ax[i] = p_r->Ax[i];
Status of library operation execution.
Definition: cpu_v_eadd_fdb.hpp:43
std::string get_description() override
Definition: cpu_v_eadd_fdb.hpp:51
Status execute(const DispatchContext &ctx) override
Definition: cpu_v_eadd_fdb.hpp:55
~Algo_v_eadd_fdb_cpu() override=default
std::string get_name() override
Definition: cpu_v_eadd_fdb.hpp:47
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
Automates reference counting and behaves as shared smart pointer.
Definition: ref.hpp:117
void cpu_dense_vec_fill(const T fill_value, CpuDenseVec< T > &vec)
Definition: cpu_format_dense_vec.hpp:48
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition: time_profiler.hpp:92