28 #ifndef SPLA_CPU_V_EMULT_HPP
29 #define SPLA_CPU_V_EMULT_HPP
52 return "sequential element-wise mult vector operation";
56 auto t = ctx.
task.template cast_safe<ScheduleTask_v_emult>();
61 return execute_spNsp(ctx);
64 return execute_spNdn(ctx);
67 return execute_dnNsp(ctx);
70 return execute_spNsp(ctx);
77 auto t = ctx.
task.template cast_safe<ScheduleTask_v_emult>();
88 const CpuCooVec<T>* p_u = u->template get<CpuCooVec<T>>();
89 const CpuCooVec<T>* p_v = v->template get<CpuCooVec<T>>();
90 const auto&
function = op->function;
92 assert(p_r->
Ai.empty());
93 assert(p_r->
Ax.empty());
95 const auto u_count = p_u->values;
96 const auto v_count = p_v->values;
100 while (u_iter < u_count && v_iter < v_count) {
101 if (p_u->Ai[u_iter] < p_v->Ai[v_iter]) {
103 }
else if (p_v->Ai[v_iter] < p_u->Ai[u_iter]) {
107 p_r->
Ai.push_back(p_u->Ai[u_iter]);
108 p_r->
Ax.push_back(
function(p_u->Ax[u_iter], p_v->Ax[v_iter]));
116 Status execute_spNdn(
const DispatchContext& ctx) {
119 auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
120 ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
121 ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
122 ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
123 ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();
129 CpuCooVec<T>* p_r = r->template get<CpuCooVec<T>>();
130 const CpuCooVec<T>* p_u = u->template get<CpuCooVec<T>>();
131 const CpuDenseVec<T>* p_v = v->template get<CpuDenseVec<T>>();
132 const auto&
function = op->function;
133 const auto skip = v->get_fill_value();
135 assert(p_r->Ai.empty());
136 assert(p_r->Ax.empty());
138 for (
uint k = 0; k < p_u->values; k++) {
139 const uint i = p_u->Ai[k];
141 if (p_v->Ax[i] != skip) {
143 p_r->Ai.push_back(i);
144 p_r->Ax.push_back(
function(p_u->Ax[k], p_v->Ax[i]));
150 Status execute_dnNsp(
const DispatchContext& ctx) {
153 auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
154 ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
155 ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
156 ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
157 ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();
163 CpuCooVec<T>* p_r = r->template get<CpuCooVec<T>>();
164 const CpuDenseVec<T>* p_u = u->template get<CpuDenseVec<T>>();
165 const CpuCooVec<T>* p_v = v->template get<CpuCooVec<T>>();
166 const auto&
function = op->function;
167 const auto skip = u->get_fill_value();
169 assert(p_r->Ai.empty());
170 assert(p_r->Ax.empty());
172 for (
uint k = 0; k < p_v->values; k++) {
173 const uint i = p_v->Ai[k];
175 if (p_u->Ax[i] != skip) {
177 p_r->Ai.push_back(i);
178 p_r->Ax.push_back(
function(p_u->Ax[i], p_v->Ax[k]));
Status of library operation execution.
Definition: cpu_v_emult.hpp:43
~Algo_v_emult_cpu() override=default
std::string get_description() override
Definition: cpu_v_emult.hpp:51
Status execute(const DispatchContext &ctx) override
Definition: cpu_v_emult.hpp:55
std::string get_name() override
Definition: cpu_v_emult.hpp:47
CPU list-of-coordinates sparse vector representation.
Definition: cpu_formats.hpp:90
std::vector< uint > Ai
Definition: cpu_formats.hpp:96
std::vector< T > Ax
Definition: cpu_formats.hpp:97
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
uint values
Definition: tdecoration.hpp:58
Automates reference counting and behaves as shared smart pointer.
Definition: ref.hpp:117
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition: time_profiler.hpp:92