28 #ifndef SPLA_CPU_V_EADD_HPP
29 #define SPLA_CPU_V_EADD_HPP
52 return "sequential element-wise add vector operation";
56 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd>();
61 return execute_spNsp(ctx);
64 return execute_dnNdn(ctx);
67 return execute_spNsp(ctx);
74 auto t = ctx.
task.template cast_safe<ScheduleTask_v_eadd>();
85 const CpuCooVec<T>* p_u = u->template get<CpuCooVec<T>>();
86 const CpuCooVec<T>* p_v = v->template get<CpuCooVec<T>>();
87 const auto&
function = op->function;
89 const T u_fill_value = u->get_fill_value();
90 const T v_fill_value = v->get_fill_value();
92 assert(p_r->
Ax.empty());
94 const auto u_count = p_u->values;
95 const auto v_count = p_v->values;
99 while (u_iter < u_count && v_iter < v_count) {
100 if (p_u->Ai[u_iter] < p_v->Ai[v_iter]) {
101 p_r->
Ai.push_back(p_u->Ai[u_iter]);
102 p_r->
Ax.push_back(
function(p_u->Ax[u_iter], v_fill_value));
104 }
else if (p_v->Ai[v_iter] < p_u->Ai[u_iter]) {
105 p_r->
Ai.push_back(p_v->Ai[v_iter]);
106 p_r->
Ax.push_back(
function(u_fill_value, p_v->Ax[v_iter]));
109 p_r->
Ai.push_back(p_u->Ai[u_iter]);
110 p_r->
Ax.push_back(
function(p_u->Ax[u_iter], p_v->Ax[v_iter]));
116 while (u_iter < u_count) {
117 p_r->
Ai.push_back(p_u->Ai[u_iter]);
118 p_r->
Ax.push_back(
function(p_u->Ax[u_iter], v_fill_value));
122 while (v_iter < v_count) {
123 p_r->
Ai.push_back(p_v->Ai[v_iter]);
124 p_r->
Ax.push_back(
function(u_fill_value, p_v->Ax[v_iter]));
131 Status execute_dnNdn(
const DispatchContext& ctx) {
134 auto t = ctx.task.template cast_safe<ScheduleTask_v_eadd>();
135 ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
136 ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
137 ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
138 ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();
144 auto* p_r = r->template get<CpuDenseVec<T>>();
145 const auto* p_u = u->template get<CpuDenseVec<T>>();
146 const auto* p_v = v->template get<CpuDenseVec<T>>();
147 const auto&
function = op->function;
149 const uint N = r->get_n_rows();
151 for (
uint i = 0; i < N; i++) {
152 p_r->
Ax[i] =
function(p_u->Ax[i], p_v->Ax[i]);
Status of library operation execution.
Definition: cpu_v_eadd.hpp:43
Status execute(const DispatchContext &ctx) override
Definition: cpu_v_eadd.hpp:55
~Algo_v_eadd_cpu() override=default
std::string get_description() override
Definition: cpu_v_eadd.hpp:51
std::string get_name() override
Definition: cpu_v_eadd.hpp:47
CPU list-of-coordinates sparse vector representation.
Definition: cpu_formats.hpp:90
std::vector< uint > Ai
Definition: cpu_formats.hpp:96
std::vector< T > Ax
Definition: cpu_formats.hpp:97
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
uint values
Definition: tdecoration.hpp:58
Automates reference counting and behaves as shared smart pointer.
Definition: ref.hpp:117
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition: time_profiler.hpp:92