spla
cl_debug.hpp
Go to the documentation of this file.
1 /**********************************************************************************/
2 /* This file is part of spla project */
3 /* https://github.com/JetBrains-Research/spla */
4 /**********************************************************************************/
5 /* MIT License */
6 /* */
7 /* Copyright (c) 2023 SparseLinearAlgebra */
8 /* */
9 /* Permission is hereby granted, free of charge, to any person obtaining a copy */
10 /* of this software and associated documentation files (the "Software"), to deal */
11 /* in the Software without restriction, including without limitation the rights */
12 /* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
13 /* copies of the Software, and to permit persons to whom the Software is */
14 /* furnished to do so, subject to the following conditions: */
15 /* */
16 /* The above copyright notice and this permission notice shall be included in all */
17 /* copies or substantial portions of the Software. */
18 /* */
19 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
20 /* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
21 /* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
22 /* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
23 /* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
24 /* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
25 /* SOFTWARE. */
26 /**********************************************************************************/
27 
28 #ifndef SPLA_CL_DEBUG_HPP
29 #define SPLA_CL_DEBUG_HPP
30 
32 
33 #ifndef SPLA_RELEASE
34  #define CL_FINISH(queue) queue.finish()
35 
36  #define CL_DISPATCH_PROFILED(name, queue, kernel, ...) \
37  do { \
38  CL_FINISH(queue); \
39  { \
40  TIME_PROFILE_SUBSCOPE(name); \
41  cl::Event __cl_event; \
42  queue.enqueueNDRangeKernel(kernel, __VA_ARGS__, nullptr, &__cl_event); \
43  __cl_event.wait(); \
44  cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
45  cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
46  cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
47  TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
48  TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
49  CL_FINISH(queue); \
50  } \
51  } while (false)
52 
53  #define CL_READ_PROFILED(name, queue, buffer, ...) \
54  do { \
55  CL_FINISH(queue); \
56  { \
57  TIME_PROFILE_SUBSCOPE(name); \
58  cl::Event __cl_event; \
59  queue.enqueueReadBuffer(buffer, __VA_ARGS__, nullptr, &__cl_event); \
60  __cl_event.wait(); \
61  cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
62  cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
63  cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
64  TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
65  TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
66  CL_FINISH(queue); \
67  } \
68  } while (false)
69 
70  #define CL_COUNTER_GET(name, queue, counter, value) \
71  do { \
72  CL_FINISH(queue); \
73  { \
74  TIME_PROFILE_SUBSCOPE(name); \
75  cl::Event __cl_event; \
76  (value) = (counter).get(queue, &__cl_event); \
77  __cl_event.wait(); \
78  cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
79  cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
80  cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
81  TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
82  TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
83  CL_FINISH(queue); \
84  } \
85  } while (false)
86 
87  #define CL_COUNTER_SET(name, queue, counter, value) \
88  do { \
89  CL_FINISH(queue); \
90  { \
91  TIME_PROFILE_SUBSCOPE(name); \
92  cl::Event __cl_event; \
93  (counter).set(queue, (value), &__cl_event); \
94  __cl_event.wait(); \
95  cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
96  cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
97  cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
98  TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
99  TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
100  CL_FINISH(queue); \
101  } \
102  } while (false)
103 
104  #define CL_PROFILE_BEGIN(name, queue) \
105  do { \
106  auto& __prfq = queue; \
107  CL_FINISH(__prfq); \
108  { \
109  TIME_PROFILE_SUBSCOPE(name);
110 
111  #define CL_PROFILE_END() \
112  } \
113  CL_FINISH(__prfq); \
114  } \
115  while (false)
116 #else
117  #define CL_FINISH(queue)
118  #define CL_DISPATCH_PROFILED(name, queue, kernel, ...) queue.enqueueNDRangeKernel(kernel, __VA_ARGS__)
119  #define CL_READ_PROFILED(name, queue, buffer, ...) queue.enqueueReadBuffer(buffer, __VA_ARGS__)
120  #define CL_COUNTER_GET(name, queue, counter, value) (value) = (counter).get(queue)
121  #define CL_COUNTER_SET(name, queue, counter, value) (counter).set(queue, (value))
122  #define CL_PROFILE_BEGIN(name, queue)
123  #define CL_PROFILE_END()
124 #endif
125 
126 #endif//SPLA_CL_DEBUG_HPP