spla
Loading...
Searching...
No Matches
cl_v_assign.hpp
Go to the documentation of this file.
1/**********************************************************************************/
2/* This file is part of spla project */
3/* https://github.com/SparseLinearAlgebra/spla */
4/**********************************************************************************/
5/* MIT License */
6/* */
7/* Copyright (c) 2023 SparseLinearAlgebra */
8/* */
9/* Permission is hereby granted, free of charge, to any person obtaining a copy */
10/* of this software and associated documentation files (the "Software"), to deal */
11/* in the Software without restriction, including without limitation the rights */
12/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
13/* copies of the Software, and to permit persons to whom the Software is */
14/* furnished to do so, subject to the following conditions: */
15/* */
16/* The above copyright notice and this permission notice shall be included in all */
17/* copies or substantial portions of the Software. */
18/* */
19/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
20/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
21/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
22/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
23/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
24/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
25/* SOFTWARE. */
26/**********************************************************************************/
27
28#ifndef SPLA_CL_V_ASSIGN_HPP
29#define SPLA_CL_V_ASSIGN_HPP
30
32
33#include <core/dispatcher.hpp>
34#include <core/registry.hpp>
35#include <core/top.hpp>
36#include <core/tscalar.hpp>
37#include <core/ttype.hpp>
38#include <core/tvector.hpp>
39
40#include <opencl/cl_formats.hpp>
43
44#include <sstream>
45
46namespace spla {
47
48 template<typename T>
50 public:
51 ~Algo_v_assign_masked_cl() override = default;
52
53 std::string get_name() override {
54 return "v_assign_masked";
55 }
56
57 std::string get_description() override {
58 return "parallel vector masked assignment on opencl device";
59 }
60
61 Status execute(const DispatchContext& ctx) override {
62 auto t = ctx.task.template cast_safe<ScheduleTask_v_assign_masked>();
63 ref_ptr<TVector<T>> mask = t->mask.template cast_safe<TVector<T>>();
64
65 if (mask->is_valid(FormatVector::AccCoo))
66 return execute_sp2dn(ctx);
67 if (mask->is_valid(FormatVector::AccDense))
68 return execute_dn2dn(ctx);
69
70 return execute_sp2dn(ctx);
71 }
72
73 private:
74 Status execute_dn2dn(const DispatchContext& ctx) {
75 TIME_PROFILE_SCOPE("opencl/vector_assign_dense2dense");
76
77 auto t = ctx.task.template cast_safe<ScheduleTask_v_assign_masked>();
78
79 auto r = t->r.template cast_safe<TVector<T>>();
80 auto mask = t->mask.template cast_safe<TVector<T>>();
81 auto value = t->value.template cast_safe<TScalar<T>>();
82 auto op_assign = t->op_assign.template cast_safe<TOpBinary<T, T, T>>();
83 auto op_select = t->op_select.template cast_safe<TOpSelect<T>>();
84
85 r->validate_rwd(FormatVector::AccDense);
86 mask->validate_rw(FormatVector::AccDense);
87
88 std::shared_ptr<CLProgram> program;
89 if (!ensure_kernel(op_assign, op_select, program)) return Status::CompilationError;
90
91 auto* p_cl_r_dense = r->template get<CLDenseVec<T>>();
92 const auto* p_cl_mask_dense = mask->template get<CLDenseVec<T>>();
93 auto* p_cl_acc = get_acc_cl();
94 auto& queue = p_cl_acc->get_queue_default();
95
96 auto kernel_dense_to_dense = program->make_kernel("assign_dense_to_dense");
97 kernel_dense_to_dense.setArg(0, p_cl_r_dense->Ax);
98 kernel_dense_to_dense.setArg(1, p_cl_mask_dense->Ax);
99 kernel_dense_to_dense.setArg(2, value->get_value());
100 kernel_dense_to_dense.setArg(3, r->get_n_rows());
101
102 uint n_groups_to_dispatch = div_up_clamp(r->get_n_rows(), m_block_size, 1, 256);
103
104 cl::NDRange global(m_block_size * n_groups_to_dispatch);
105 cl::NDRange local(m_block_size);
106 queue.enqueueNDRangeKernel(kernel_dense_to_dense, cl::NDRange(), global, local);
107
108 return Status::Ok;
109 }
110
111 Status execute_sp2dn(const DispatchContext& ctx) {
112 TIME_PROFILE_SCOPE("opencl/vector_assign_sparse2dense");
113
114 auto t = ctx.task.template cast_safe<ScheduleTask_v_assign_masked>();
115
116 auto r = t->r.template cast_safe<TVector<T>>();
117 auto mask = t->mask.template cast_safe<TVector<T>>();
118 auto value = t->value.template cast_safe<TScalar<T>>();
119 auto op_assign = t->op_assign.template cast_safe<TOpBinary<T, T, T>>();
120 auto op_select = t->op_select.template cast_safe<TOpSelect<T>>();
121
122 r->validate_rwd(FormatVector::AccDense);
123 mask->validate_rw(FormatVector::AccCoo);
124
125 auto* p_cl_r_dense = r->template get<CLDenseVec<T>>();
126 const auto* p_cl_mask_coo = mask->template get<CLCooVec<T>>();
127 auto* p_cl_acc = get_acc_cl();
128 auto& queue = p_cl_acc->get_queue_default();
129
130 if (p_cl_mask_coo->values == 0) {
131 LOG_MSG(Status::Ok, "nothing to do");
132 return Status::Ok;
133 }
134
135 std::shared_ptr<CLProgram> program;
136 if (!ensure_kernel(op_assign, op_select, program)) return Status::CompilationError;
137
138 auto kernel_sparse_to_dense = program->make_kernel("assign_sparse_to_dense");
139 kernel_sparse_to_dense.setArg(0, p_cl_r_dense->Ax);
140 kernel_sparse_to_dense.setArg(1, p_cl_mask_coo->Ai);
141 kernel_sparse_to_dense.setArg(2, p_cl_mask_coo->Ax);
142 kernel_sparse_to_dense.setArg(3, value->get_value());
143 kernel_sparse_to_dense.setArg(4, p_cl_mask_coo->values);
144
145 uint n_groups_to_dispatch = div_up_clamp(p_cl_mask_coo->values, m_block_size, 1, 256);
146
147 cl::NDRange global(m_block_size * n_groups_to_dispatch);
148 cl::NDRange local(m_block_size);
149 queue.enqueueNDRangeKernel(kernel_sparse_to_dense, cl::NDRange(), global, local);
150
151 return Status::Ok;
152 }
153
154 bool ensure_kernel(const ref_ptr<TOpBinary<T, T, T>>& op_assign, const ref_ptr<TOpSelect<T>>& op_select, std::shared_ptr<CLProgram>& program) {
155 m_block_size = get_acc_cl()->get_default_wgs();
156
157 CLProgramBuilder program_builder;
158 program_builder
159 .set_name("vector_assign")
160 .add_type("TYPE", get_ttype<T>().template as<Type>())
161 .add_op("OP_BINARY", op_assign.template as<OpBinary>())
162 .add_op("OP_SELECT", op_select.template as<OpSelect>())
163 .set_source(source_vector_assign)
164 .acquire();
165
166 program = program_builder.get_program();
167
168 return true;
169 }
170
171 private:
172 uint m_block_size = 0;
173 };
174
175}// namespace spla
176
177#endif//SPLA_CL_V_ASSIGN_HPP
Status of library operation execution.
Definition cl_v_assign.hpp:49
std::string get_name() override
Definition cl_v_assign.hpp:53
Status execute(const DispatchContext &ctx) override
Definition cl_v_assign.hpp:61
std::string get_description() override
Definition cl_v_assign.hpp:57
~Algo_v_assign_masked_cl() override=default
Algorithm suitable to process schedule task based on task string key.
Definition registry.hpp:66
Automates reference counting and behaves as shared smart pointer.
Definition ref.hpp:117
std::uint32_t uint
Library index and size type.
Definition config.hpp:56
#define LOG_MSG(status, msg)
Definition logger.hpp:66
Definition algorithm.hpp:37
Execution context of a single task.
Definition dispatcher.hpp:46
ref_ptr< ScheduleTask > task
Definition dispatcher.hpp:48
#define TIME_PROFILE_SCOPE(name)
Definition time_profiler.hpp:92