Go to the documentation of this file.
28 #ifndef SPLA_CL_DEBUG_HPP
29 #define SPLA_CL_DEBUG_HPP
34 #define CL_FINISH(queue) queue.finish()
36 #define CL_DISPATCH_PROFILED(name, queue, kernel, ...) \
40 TIME_PROFILE_SUBSCOPE(name); \
41 cl::Event __cl_event; \
42 queue.enqueueNDRangeKernel(kernel, __VA_ARGS__, nullptr, &__cl_event); \
44 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
45 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
46 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
47 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
48 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
53 #define CL_READ_PROFILED(name, queue, buffer, ...) \
57 TIME_PROFILE_SUBSCOPE(name); \
58 cl::Event __cl_event; \
59 queue.enqueueReadBuffer(buffer, __VA_ARGS__, nullptr, &__cl_event); \
61 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
62 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
63 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
64 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
65 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
70 #define CL_COUNTER_GET(name, queue, counter, value) \
74 TIME_PROFILE_SUBSCOPE(name); \
75 cl::Event __cl_event; \
76 (value) = (counter).get(queue, &__cl_event); \
78 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
79 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
80 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
81 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
82 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
87 #define CL_COUNTER_SET(name, queue, counter, value) \
91 TIME_PROFILE_SUBSCOPE(name); \
92 cl::Event __cl_event; \
93 (counter).set(queue, (value), &__cl_event); \
95 cl_ulong __queued = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>(); \
96 cl_ulong __start = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); \
97 cl_ulong __end = __cl_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); \
98 TIME_PROFILE_SUBLABEL.queued_nano = __end - __queued; \
99 TIME_PROFILE_SUBLABEL.executed_nano = __end - __start; \
104 #define CL_PROFILE_BEGIN(name, queue) \
106 auto& __prfq = queue; \
109 TIME_PROFILE_SUBSCOPE(name);
111 #define CL_PROFILE_END() \
117 #define CL_FINISH(queue)
118 #define CL_DISPATCH_PROFILED(name, queue, kernel, ...) queue.enqueueNDRangeKernel(kernel, __VA_ARGS__)
119 #define CL_READ_PROFILED(name, queue, buffer, ...) queue.enqueueReadBuffer(buffer, __VA_ARGS__)
120 #define CL_COUNTER_GET(name, queue, counter, value) (value) = (counter).get(queue)
121 #define CL_COUNTER_SET(name, queue, counter, value) (counter).set(queue, (value))
122 #define CL_PROFILE_BEGIN(name, queue)
123 #define CL_PROFILE_END()