spla
cl_fill.hpp
Go to the documentation of this file.
1 /**********************************************************************************/
2 /* This file is part of spla project */
3 /* https://github.com/JetBrains-Research/spla */
4 /**********************************************************************************/
5 /* MIT License */
6 /* */
7 /* Copyright (c) 2023 SparseLinearAlgebra */
8 /* */
9 /* Permission is hereby granted, free of charge, to any person obtaining a copy */
10 /* of this software and associated documentation files (the "Software"), to deal */
11 /* in the Software without restriction, including without limitation the rights */
12 /* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
13 /* copies of the Software, and to permit persons to whom the Software is */
14 /* furnished to do so, subject to the following conditions: */
15 /* */
16 /* The above copyright notice and this permission notice shall be included in all */
17 /* copies or substantial portions of the Software. */
18 /* */
19 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
20 /* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
21 /* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
22 /* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
23 /* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
24 /* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
25 /* SOFTWARE. */
26 /**********************************************************************************/
27 
28 #ifndef SPLA_CL_FILL_HPP
29 #define SPLA_CL_FILL_HPP
30 
34 
35 namespace spla {
36 
37  template<typename T>
38  void cl_fill_zero(cl::CommandQueue& queue, cl::Buffer& values, uint n) {
39  CLProgramBuilder builder;
40  builder.set_name("fill")
41  .add_type("TYPE", get_ttype<T>().template as<Type>())
42  .set_source(source_fill)
43  .acquire();
44 
45  auto fill_zero = builder.make_kernel("fill_zero");
46  auto* acc = get_acc_cl();
47 
48  uint block_size = acc->get_default_wgs();
49  uint n_groups_to_dispatch = std::max(std::min(n / block_size, uint(1024)), uint(1));
50 
51  cl::NDRange global(block_size * n_groups_to_dispatch);
52  cl::NDRange local(block_size);
53 
54  fill_zero.setArg(0, values);
55  fill_zero.setArg(1, n);
56  queue.enqueueNDRangeKernel(fill_zero, cl::NDRange(), global, local);
57  }
58 
59  template<typename T>
60  void cl_fill_value(cl::CommandQueue& queue, const cl::Buffer& values, uint n, T value) {
61  CLProgramBuilder builder;
62  builder.set_name("fill")
63  .add_type("TYPE", get_ttype<T>().template as<Type>())
64  .set_source(source_fill)
65  .acquire();
66 
67  auto fill_value = builder.make_kernel("fill_value");
68  auto* acc = get_acc_cl();
69 
70  uint block_size = acc->get_default_wgs();
71  uint n_groups_to_dispatch = std::max(std::min(n / block_size, uint(1024)), uint(1));
72 
73  cl::NDRange global(block_size * n_groups_to_dispatch);
74  cl::NDRange local(block_size);
75 
76  fill_value.setArg(0, values);
77  fill_value.setArg(1, n);
78  fill_value.setArg(2, value);
79  queue.enqueueNDRangeKernel(fill_value, cl::NDRange(), global, local);
80  }
81 
82 }// namespace spla
83 
84 #endif//SPLA_CL_FILL_HPP
Runtime opencl program builder.
Definition: cl_program_builder.hpp:55
CLProgramBuilder & set_name(const char *name)
Definition: cl_program_builder.cpp:37
CLProgramBuilder & add_type(const char *alias, const ref_ptr< Type > &type)
Definition: cl_program_builder.cpp:45
cl::Kernel make_kernel(const char *name)
Definition: cl_program_builder.hpp:67
CLProgramBuilder & set_source(const char *source)
Definition: cl_program_builder.cpp:61
void acquire()
Definition: cl_program_builder.cpp:65
std::uint32_t uint
Library index and size type.
Definition: config.hpp:56
Definition: algorithm.hpp:37
void cl_fill_value(cl::CommandQueue &queue, const cl::Buffer &values, uint n, T value)
Definition: cl_fill.hpp:60
void cl_fill_zero(cl::CommandQueue &queue, cl::Buffer &values, uint n)
Definition: cl_fill.hpp:38
T min(T a, T b)
Definition: op.cpp:152
T max(T a, T b)
Definition: op.cpp:155