Go to the documentation of this file.
8 static const char source_sort_radix[] = R
"(
12 // Number of different values in a mask, equals 1 << bits count
14 // Mask used to fetch bits, mask equals (1 << bits count) - 1
18 __kernel void radix_sort_local(__global const uint* g_keys,
19 __global uint* g_offsets,
20 __global uint* g_blocks_size,
23 const uint gid = get_global_id(0);
24 const uint gpid = get_group_id(0);
25 const uint ngroups = get_num_groups(0);
26 const uint lid = get_local_id(0);
28 __local uint s_mask[BLOCK_SIZE];
29 __local uint s_offsets[BLOCK_SIZE];
31 s_mask[lid] = gid < n ? (g_keys[gid] >> shift) & BITS_MASK : 0;
33 for (uint slot = 0; slot < BITS_VALS; slot += 1) {
34 barrier(CLK_LOCAL_MEM_FENCE);
39 process_bit = s_mask[lid] == slot ? 1 : 0;
42 s_offsets[lid] = process_bit;
44 for (uint offset = 1; offset < BLOCK_SIZE; offset *= 2) {
45 barrier(CLK_LOCAL_MEM_FENCE);
46 uint value = s_offsets[lid];
49 value += s_offsets[lid - offset];
52 barrier(CLK_LOCAL_MEM_FENCE);
53 s_offsets[lid] = value;
56 barrier(CLK_LOCAL_MEM_FENCE);
58 if (gid < n && process_bit) {
59 g_offsets[gid] = s_offsets[lid] - 1;
63 g_blocks_size[slot * ngroups + gpid] = s_offsets[BLOCK_SIZE - 1];
68 __kernel void radix_sort_scatter(__global const uint* g_in_keys,
69 __global const TYPE* g_in_values,
70 __global uint* g_out_keys,
71 __global TYPE* g_out_values,
72 __global const uint* g_offsets,
73 __global const uint* g_blocks_offsets,
76 const uint gid = get_global_id(0);
77 const uint gpid = get_group_id(0);
78 const uint ngroups = get_num_groups(0);
82 const uint key = g_in_keys[gid];
83 const TYPE values = g_in_values[gid];
85 uint slot = (key >> shift) & BITS_MASK;
86 uint offset = g_blocks_offsets[slot * ngroups + gpid] + g_offsets[gid];
88 g_out_keys[offset] = key;
89 g_out_values[offset] = values;