9 | const float* __restrict__ x, // Input tensor: [B, C_in, H, W]
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 | const float* __restrict__ weight, // Convolution weight: [C_out, C_in, K, K]
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 | const float* __restrict__ bias, // Convolution bias: [C_out]
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:9:31: note: the first parameter in the range is 'x'
9 | const float* __restrict__ x, // Input tensor: [B, C_in, H, W]
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:11:31: note: the last parameter in the range is 'bias'
11 | const float* __restrict__ bias, // Convolution bias: [C_out]
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:13:5: warning: 2 adjacent parameters of 'optimized_conv_min_tanh_forward_kernel' of similar type ('const int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
13 | const int batch,
| ^~~~~~~~~~~~~~~~
14 | const int in_channels,
| ~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:13:15: note: the first parameter in the range is 'batch'
13 | const int batch,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:14:15: note: the last parameter in the range is 'in_channels'
14 | const int in_channels,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:16:5: warning: 2 adjacent parameters of 'optimized_conv_min_tanh_forward_kernel' of similar type ('const int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
16 | const int in_width,
| ^~~~~~~~~~~~~~~~~~~
17 | const int out_channels,
| ~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:16:15: note: the first parameter in the range is 'in_width'
16 | const int in_width,
| ^~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:17:15: note: the last parameter in the range is 'out_channels'
17 | const int out_channels,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:18:5: warning: 2 adjacent parameters of 'optimized_conv_min_tanh_forward_kernel' of similar type ('const int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
18 | const int kernel_size,
| ^~~~~~~~~~~~~~~~~~~~~~
19 | const int out_height,
| ~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:18:15: note: the first parameter in the range is 'kernel_size'
18 | const int kernel_size,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:19:15: note: the last parameter in the range is 'out_height'
19 | const int out_height,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:27:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
27 | int tid = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:28:24: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
28 | int blockThreads = blockDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:37:17: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
37 | int index = blockIdx.x * blockDim.x + tid;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:76:16: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
76 | at::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:77:16: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
77 | at::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:78:16: warning: the parameter 'conv_bias' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
78 | at::Tensor conv_bias,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:79:16: warning: the parameter 'output' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
79 | at::Tensor output) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:81:23: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
81 | const int batch = x.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:82:29: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
82 | const int in_channels = x.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:83:27: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
83 | const int in_height = x.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:84:26: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
84 | const int in_width = x.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:86:30: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
86 | const int out_channels = conv_weight.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:87:29: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
87 | const int kernel_size = conv_weight.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:96:30: warning: narrowing conversion from 'unsigned long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
96 | const int shared_bytes = (weight_elems + out_channels) * sizeof(float);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:120:16: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
120 | at::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:121:16: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
121 | at::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:122:16: warning: the parameter 'conv_bias' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
122 | at::Tensor conv_bias) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:128:23: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
128 | const int batch = x.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:129:27: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
129 | const int in_height = x.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:130:26: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
130 | const int in_width = x.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_25/b9_s0_warp_divergence_optimized_conv/base/base.cu:131:29: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
131 | const int kernel_size = conv_weight.size(2);
| ^