spla
Loading...
Searching...
No Matches
cl_debug.hpp
Go to the documentation of this file.
1/**********************************************************************************/
2/* This file is part of spla project */
3/* https://github.com/JetBrains-Research/spla */
4/**********************************************************************************/
5/* MIT License */
6/* */
7/* Copyright (c) 2023 SparseLinearAlgebra */
8/* */
9/* Permission is hereby granted, free of charge, to any person obtaining a copy */
10/* of this software and associated documentation files (the "Software"), to deal */
11/* in the Software without restriction, including without limitation the rights */
12/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
13/* copies of the Software, and to permit persons to whom the Software is */
14/* furnished to do so, subject to the following conditions: */
15/* */
16/* The above copyright notice and this permission notice shall be included in all */
17/* copies or substantial portions of the Software. */
18/* */
19/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
20/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
21/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
22/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
23/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
24/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
25/* SOFTWARE. */
26/**********************************************************************************/
27
28#ifndef SPLA_CL_DEBUG_HPP
29#define SPLA_CL_DEBUG_HPP
30
32
33#ifndef SPLA_RELEASE
34 #define CL_FINISH(queue) queue.finish()
35
36 #define CL_DISPATCH_PROFILED(name, queue, kernel, ...) \
37 do { \
38 CL_FINISH(queue); \
39 { \
40 TIME_PROFILE_SUBSCOPE(name); \
41 cl::Event __cl_event; \
42 queue.enqueueNDRangeKernel(kernel, __VA_ARGS__, nullptr, &__cl_event); \
43 __cl_event.wait(); \
44 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
45 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
46 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
47 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
48 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
49 CL_FINISH(queue); \
50 } \
51 } while (false)
52
53 #define CL_READ_PROFILED(name, queue, buffer, ...) \
54 do { \
55 CL_FINISH(queue); \
56 { \
57 TIME_PROFILE_SUBSCOPE(name); \
58 cl::Event __cl_event; \
59 queue.enqueueReadBuffer(buffer, __VA_ARGS__, nullptr, &__cl_event); \
60 __cl_event.wait(); \
61 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
62 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
63 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
64 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
65 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
66 CL_FINISH(queue); \
67 } \
68 } while (false)
69
70 #define CL_COUNTER_GET(name, queue, counter, value) \
71 do { \
72 CL_FINISH(queue); \
73 { \
74 TIME_PROFILE_SUBSCOPE(name); \
75 cl::Event __cl_event; \
76 (value) = (counter).get(queue, &__cl_event); \
77 __cl_event.wait(); \
78 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
79 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
80 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
81 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
82 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
83 CL_FINISH(queue); \
84 } \
85 } while (false)
86
87 #define CL_COUNTER_SET(name, queue, counter, value) \
88 do { \
89 CL_FINISH(queue); \
90 { \
91 TIME_PROFILE_SUBSCOPE(name); \
92 cl::Event __cl_event; \
93 (counter).set(queue, (value), &__cl_event); \
94 __cl_event.wait(); \
95 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
96 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
97 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
98 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
99 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
100 CL_FINISH(queue); \
101 } \
102 } while (false)
103
104 #define CL_PROFILE_BEGIN(name, queue) \
105 do { \
106 auto& __prfq = queue; \
107 CL_FINISH(__prfq); \
108 { \
109 TIME_PROFILE_SUBSCOPE(name);
110
111 #define CL_PROFILE_END() \
112 } \
113 CL_FINISH(__prfq); \
114 } \
115 while (false)
116#else
117 #define CL_FINISH(queue)
118 #define CL_DISPATCH_PROFILED(name, queue, kernel, ...) queue.enqueueNDRangeKernel(kernel, __VA_ARGS__)
119 #define CL_READ_PROFILED(name, queue, buffer, ...) queue.enqueueReadBuffer(buffer, __VA_ARGS__)
120 #define CL_COUNTER_GET(name, queue, counter, value) (value) = (counter).get(queue)
121 #define CL_COUNTER_SET(name, queue, counter, value) (counter).set(queue, (value))
122 #define CL_PROFILE_BEGIN(name, queue)
123 #define CL_PROFILE_END()
124#endif
125
126#endif//SPLA_CL_DEBUG_HPP