spla
cl_format_coo_vec.hpp
Go to the documentation of this file.
1 /**********************************************************************************/
2 /* This file is part of spla project */
3 /* https://github.com/SparseLinearAlgebra/spla */
4 /**********************************************************************************/
5 /* MIT License */
6 /* */
7 /* Copyright (c) 2023 SparseLinearAlgebra */
8 /* */
9 /* Permission is hereby granted, free of charge, to any person obtaining a copy */
10 /* of this software and associated documentation files (the "Software"), to deal */
11 /* in the Software without restriction, including without limitation the rights */
12 /* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
13 /* copies of the Software, and to permit persons to whom the Software is */
14 /* furnished to do so, subject to the following conditions: */
15 /* */
16 /* The above copyright notice and this permission notice shall be included in all */
17 /* copies or substantial portions of the Software. */
18 /* */
19 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
20 /* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
21 /* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
22 /* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
23 /* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
24 /* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
25 /* SOFTWARE. */
26 /**********************************************************************************/
27 
28 #ifndef SPLA_CL_FORMAT_COO_VEC_HPP
29 #define SPLA_CL_FORMAT_COO_VEC_HPP
30 
31 #include <opencl/cl_debug.hpp>
32 #include <opencl/cl_fill.hpp>
33 #include <opencl/cl_formats.hpp>
36 
37 namespace spla {
38 
44  template<typename T>
45  void cl_coo_vec_init(const std::size_t n_values,
46  const uint* Ai,
47  const T* Ax,
48  CLCooVec<T>& storage) {
49  if (n_values == 0) {
50  LOG_MSG(Status::Ok, "nothing to do");
51 
52  storage.values = 0;
53  storage.Ai = cl::Buffer();
54  storage.Ax = cl::Buffer();
55  return;
56  }
57 
58  const std::size_t buffer_size_Ai = n_values * sizeof(uint);
59  const std::size_t buffer_size_Ax = n_values * sizeof(T);
60  const auto flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR;
61 
62  cl::Buffer buffer_Ai(get_acc_cl()->get_context(), flags, buffer_size_Ai, (void*) Ai);
63  cl::Buffer buffer_Ax(get_acc_cl()->get_context(), flags, buffer_size_Ax, (void*) Ax);
64 
65  storage.Ai = std::move(buffer_Ai);
66  storage.Ax = std::move(buffer_Ax);
67 
68  storage.values = n_values;
69  }
70 
71  template<typename T>
72  void cl_coo_vec_resize(const std::size_t n_values,
73  CLCooVec<T>& storage) {
74  if (n_values == 0) {
75  LOG_MSG(Status::Ok, "nothing to do");
76 
77  storage.values = 0;
78  storage.Ai = cl::Buffer();
79  storage.Ax = cl::Buffer();
80  return;
81  }
82 
83  const std::size_t buffer_size_Ai = n_values * sizeof(uint);
84  const std::size_t buffer_size_Ax = n_values * sizeof(T);
85  const auto flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS;
86 
87  cl::Buffer buffer_Ai(get_acc_cl()->get_context(), flags, buffer_size_Ai);
88  cl::Buffer buffer_Ax(get_acc_cl()->get_context(), flags, buffer_size_Ax);
89 
90  storage.Ai = std::move(buffer_Ai);
91  storage.Ax = std::move(buffer_Ax);
92  storage.values = n_values;
93  }
94 
95  template<typename T>
96  void cl_coo_vec_clear(CLCooVec<T>& storage) {
97  storage.Ai = cl::Buffer();
98  storage.Ax = cl::Buffer();
99  storage.values = 0;
100  }
101 
102  template<typename T>
103  void cl_coo_vec_read(const std::size_t n_values,
104  uint* Ai,
105  T* Ax,
106  const CLCooVec<T>& storage,
107  cl::CommandQueue& queue,
108  cl_mem_flags staging_flags = CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
109  bool blocking = true) {
110  if (n_values == 0) {
111  LOG_MSG(Status::Ok, "nothing to do");
112  return;
113  }
114 
115  const std::size_t buffer_size_Ai = n_values * sizeof(uint);
116  const std::size_t buffer_size_Ax = n_values * sizeof(T);
117 
118  cl::Buffer staging_Ai(get_acc_cl()->get_context(), staging_flags, buffer_size_Ai);
119  cl::Buffer staging_Ax(get_acc_cl()->get_context(), staging_flags, buffer_size_Ax);
120 
121  queue.enqueueCopyBuffer(storage.Ai, staging_Ai, 0, 0, buffer_size_Ai);
122  queue.enqueueCopyBuffer(storage.Ax, staging_Ax, 0, 0, buffer_size_Ax);
123  queue.enqueueReadBuffer(staging_Ai, blocking, 0, buffer_size_Ai, Ai);
124  queue.enqueueReadBuffer(staging_Ax, blocking, 0, buffer_size_Ax, Ax);
125  }
126 
127  template<typename T>
128  void cl_coo_vec_to_dense(const std::size_t n_rows,
129  const T fill_value,
130  const CLCooVec<T>& in,
131  CLDenseVec<T>& out,
132  cl::CommandQueue& queue) {
133  CLProgramBuilder builder;
134  builder.set_name("vector_format")
135  .add_type("TYPE", get_ttype<T>().template as<Type>())
136  .set_source(source_vector_formats)
137  .acquire();
138 
139  cl_fill_value<T>(queue, out.Ax, n_rows, fill_value);
140 
141  if (in.values == 0) {
142  LOG_MSG(Status::Ok, "nothing to do");
143  return;
144  }
145 
146  auto* acc = get_acc_cl();
147 
148  uint block_size = acc->get_default_wgs();
149  uint n_groups_to_dispatch = std::max(std::min(in.values / block_size, uint(1024)), uint(1));
150 
151  auto kernel = builder.make_kernel("sparse_to_dense");
152  kernel.setArg(0, in.Ai);
153  kernel.setArg(1, in.Ax);
154  kernel.setArg(2, out.Ax);
155  kernel.setArg(3, uint(in.values));
156 
157  cl::NDRange global(block_size * n_groups_to_dispatch);
158  cl::NDRange local(block_size);
159  queue.enqueueNDRangeKernel(kernel, cl::NDRange(), global, local);
160  CL_FINISH(queue);
161  }
162 
167 }// namespace spla
168 
169 #endif//SPLA_CL_FORMAT_COO_VEC_HPP
#define CL_FINISH(queue)
Definition: cl_debug.hpp:34
OpenCL list-of-coordinates sparse vector representation.
Definition: cl_formats.hpp:77
cl::Buffer Ax
Definition: cl_formats.hpp:84
cl::Buffer Ai
Definition: cl_formats.hpp:83
OpenCL one-dim array for dense vector representation.
Definition: cl_formats.hpp:61
cl::Buffer Ax
Definition: cl_formats.hpp:67
Runtime opencl program builder.
Definition: cl_program_builder.hpp:55
CLProgramBuilder & set_name(const char *name)
Definition: cl_program_builder.cpp:37
CLProgramBuilder & add_type(const char *alias, const ref_ptr< Type > &type)
Definition: cl_program_builder.cpp:45
cl::Kernel make_kernel(const char *name)
Definition: cl_program_builder.hpp:67
CLProgramBuilder & set_source(const char *source)
Definition: cl_program_builder.cpp:61
void acquire()
Definition: cl_program_builder.cpp:65
uint values
Definition: tdecoration.hpp:58
void cl_coo_vec_read(const std::size_t n_values, uint *Ai, T *Ax, const CLCooVec< T > &storage, cl::CommandQueue &queue, cl_mem_flags staging_flags=CL_MEM_READ_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, bool blocking=true)
Definition: cl_format_coo_vec.hpp:103
void cl_coo_vec_to_dense(const std::size_t n_rows, const T fill_value, const CLCooVec< T > &in, CLDenseVec< T > &out, cl::CommandQueue &queue)
Definition: cl_format_coo_vec.hpp:128
void cl_coo_vec_resize(const std::size_t n_values, CLCooVec< T > &storage)
Definition: cl_format_coo_vec.hpp:72
void cl_coo_vec_clear(CLCooVec< T > &storage)
Definition: cl_format_coo_vec.hpp:96
void cl_coo_vec_init(const std::size_t n_values, const uint *Ai, const T *Ax, CLCooVec< T > &storage)
Definition: cl_format_coo_vec.hpp:45
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
#define LOG_MSG(status, msg)
Definition: logger.hpp:66
Definition: algorithm.hpp:37
T min(T a, T b)
Definition: op.cpp:152
T max(T a, T b)
Definition: op.cpp:155