12 | int N,
| ^~~~~~
13 | int C,
| ~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:12:9: note: the first parameter in the range is 'N'
12 | int N,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:13:9: note: the last parameter in the range is 'C'
13 | int C,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:16:5: warning: 2 adjacent parameters of 'fusedPoolingAndReductionKernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
16 | int W,
| ^~~~~~
17 | int poolD,
| ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:16:9: note: the first parameter in the range is 'W'
16 | int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:17:9: note: the last parameter in the range is 'poolD'
17 | int poolD,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:26:14: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
26 | int nc = blockIdx.x; // in [0, N*C)
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:40:20: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
40 | for (int idx = threadIdx.x; idx < numPools; idx += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:40:56: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
40 | for (int idx = threadIdx.x; idx < numPools; idx += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:83:20: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
83 | int numWarps = (blockDim.x + 31) >> 5;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:95:33: warning: narrowing conversion from 'int' to 'float' [bugprone-narrowing-conversions]
95 | float avg = block_sum / numPools;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:109:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
109 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:113:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
113 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:115:19: warning: the parameter 'bias' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
115 | torch::Tensor bias
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:124:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
124 | int N = conv_out.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:125:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
125 | int C = conv_out.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:126:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
126 | int D = conv_out.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:127:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
127 | int H = conv_out.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:128:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
128 | int W = conv_out.size(4);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:133:25: warning: narrowing conversion from 'unsigned long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
133 | int sharedMemSize = ((threadsPerBlock + 31) / 32) * sizeof(float);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:147:9: warning: narrowing conversion from 'value_type' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
147 | pool_size[0], pool_size[1], pool_size[2],
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:147:23: warning: narrowing conversion from 'value_type' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
147 | pool_size[0], pool_size[1], pool_size[2],
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:147:37: warning: narrowing conversion from 'value_type' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
147 | pool_size[0], pool_size[1], pool_size[2],
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_8/b3_s1_fused_pooling_uniform/base/base.cu:149:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
149 | sum_dim,
| ^