spla
Loading...
Searching...
No Matches
auto_reduce.hpp
Go to the documentation of this file.
1
2// Copyright (c) 2021 - 2023 SparseLinearAlgebra
3// Autogenerated file, do not modify
5
6#pragma once
7
8static const char source_reduce[] = R"(
9
10
11// wave-wide reduction in local memory
12void reduction_group(uint block_size,
13 uint lid,
14 volatile __local TYPE* s_sum) {
15 if (BLOCK_SIZE >= block_size) {
16 if (lid < (block_size / 2)) {
17 s_sum[lid] = OP_BINARY(s_sum[lid], s_sum[lid + (block_size / 2)]);
18 }
19 if (block_size > WARP_SIZE) {
20 barrier(CLK_LOCAL_MEM_FENCE);
21 }
22 }
23}
24
25__kernel void reduce(__global const TYPE* g_vec,
26 __global TYPE* g_sum,
27 const TYPE init,
28 const uint n) {
29 const uint gid = get_group_id(0);
30 const uint gsize = get_global_size(0);
31 const uint lsize = get_local_size(0);
32 const uint lid = get_local_id(0);
33
34 __local TYPE s_sum[BLOCK_SIZE];
35 TYPE sum = init;
36
37 const uint gstart = gid * lsize;
38
39 for (uint i = gstart + lid; i < n; i += gsize) {
40 sum = OP_BINARY(sum, g_vec[i]);
41 }
42
43 s_sum[lid] = sum;
44 barrier(CLK_LOCAL_MEM_FENCE);
45
46 reduction_group(1024, lid, s_sum);
47 reduction_group(512, lid, s_sum);
48 reduction_group(256, lid, s_sum);
49 reduction_group(128, lid, s_sum);
50 reduction_group(64, lid, s_sum);
51 reduction_group(32, lid, s_sum);
52 reduction_group(16, lid, s_sum);
53 reduction_group(8, lid, s_sum);
54 reduction_group(4, lid, s_sum);
55 reduction_group(2, lid, s_sum);
56
57 if (lid == 0) {
58 g_sum[gid] = s_sum[0];
59 }
60}
61)";