28 #ifndef SPLA_CPU_MXV_HPP
29 #define SPLA_CPU_MXV_HPP
53 return "sequential masked matrix-vector product on cpu";
59 auto t = ctx.
task.template cast_safe<ScheduleTask_mxv_masked>();
61 auto r = t->r.template cast_safe<TVector<T>>();
62 auto mask = t->mask.template cast_safe<TVector<T>>();
63 auto M = t->M.template cast_safe<TMatrix<T>>();
64 auto v = t->v.template cast_safe<TVector<T>>();
65 auto op_multiply = t->op_multiply.template cast_safe<TOpBinary<T, T, T>>();
66 auto op_add = t->op_add.template cast_safe<TOpBinary<T, T, T>>();
67 auto op_select = t->op_select.template cast_safe<TOpSelect<T>>();
68 auto init = t->init.template cast_safe<TScalar<T>>();
70 const uint DM = M->get_n_rows();
71 const T sum_init = init->get_value();
79 const CpuDenseVec<T>* p_dense_mask = mask->template get<CpuDenseVec<T>>();
80 const CpuDenseVec<T>* p_dense_v = v->template get<CpuDenseVec<T>>();
81 const CpuLil<T>* p_lil_M = M->template get<CpuLil<T>>();
82 auto early_exit = t->get_desc_or_default()->get_early_exit();
84 auto& func_multiply = op_multiply->function;
85 auto& func_add = op_add->function;
86 auto& func_select = op_select->function;
88 for (
uint i = 0; i < DM; ++i) {
91 if (func_select(p_dense_mask->Ax[i])) {
92 const auto& row = p_lil_M->
Ar[i];
94 for (
const auto& j_x : row) {
95 const uint j = j_x.first;
96 sum = func_add(sum, func_multiply(j_x.second, p_dense_v->Ax[j]));
98 if ((sum != sum_init) && early_exit)
break;
102 p_dense_r->
Ax[i] = sum;
Status of library operation execution.
Definition: cpu_mxv.hpp:44
std::string get_description() override
Definition: cpu_mxv.hpp:52
std::string get_name() override
Definition: cpu_mxv.hpp:48
Status execute(const DispatchContext &ctx) override
Definition: cpu_mxv.hpp:56
~Algo_mxv_masked_cpu() override=default
CPU one-dim array for dense vector representation.
Definition: cpu_formats.hpp:74
std::vector< T > Ax
Definition: cpu_formats.hpp:80
CPU list-of-list matrix format for fast incremental build.
Definition: cpu_formats.hpp:107
std::vector< Row > Ar
Definition: cpu_formats.hpp:117
Algorithm suitable to process schedule task based on task string key.
Definition: registry.hpp:66
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
Execution context of a single task.
Definition: dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition: dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition: time_profiler.hpp:92