12 | const float* __restrict__ input,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 | const float* __restrict__ weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:12:31: note: the first parameter in the range is 'input'
12 | const float* __restrict__ input,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:13:31: note: the last parameter in the range is 'weight'
13 | const float* __restrict__ weight,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:15:5: warning: 3 adjacent parameters of 'conv2d_coalesced_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
15 | int batch_size,
| ^~~~~~~~~~~~~~~
16 | int in_channels,
| ~~~~~~~~~~~~~~~~
17 | int out_channels,
| ~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:15:9: note: the first parameter in the range is 'batch_size'
15 | int batch_size,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:17:9: note: the last parameter in the range is 'out_channels'
17 | int out_channels,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:19:5: warning: 2 adjacent parameters of 'conv2d_coalesced_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
19 | int input_width,
| ^~~~~~~~~~~~~~~~
20 | int output_height,
| ~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:19:9: note: the first parameter in the range is 'input_width'
19 | int input_width,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:20:9: note: the last parameter in the range is 'output_height'
20 | int output_height,
| ^~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:21:5: warning: 3 adjacent parameters of 'conv2d_coalesced_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
21 | int output_width,
| ^~~~~~~~~~~~~~~~~
22 | int stride,
| ~~~~~~~~~~~
23 | int padding) {
| ~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:21:9: note: the first parameter in the range is 'output_width'
21 | int output_width,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:23:9: note: the last parameter in the range is 'padding'
23 | int padding) {
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:31:17: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
31 | int out_x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:32:17: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
32 | int out_y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:33:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
33 | int b = blockIdx.z;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:36:23: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
36 | int in_x_origin = blockIdx.x * BLOCK_SIZE - padding;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:37:23: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
37 | int in_y_origin = blockIdx.y * BLOCK_SIZE - padding;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:40:17: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
40 | int t_idx = threadIdx.y * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:41:29: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
41 | int threads_per_block = blockDim.x * blockDim.y;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:72:31: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
72 | int local_y = threadIdx.y;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:73:31: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
73 | int local_x = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:97:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
97 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:98:19: warning: the parameter 'weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
98 | torch::Tensor weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:101:5: warning: 3 adjacent parameters of 'forward' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
101 | int padding,
| ^~~~~~~~~~~~
102 | int dilation,
| ~~~~~~~~~~~~~
103 | int groups) {
| ~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:101:9: note: the first parameter in the range is 'padding'
101 | int padding,
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:103:9: note: the last parameter in the range is 'groups'
103 | int groups) {
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:113:22: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
113 | int batch_size = x.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:114:23: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
114 | int in_channels = x.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:115:24: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
115 | int input_height = x.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:116:23: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
116 | int input_width = x.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b10_s3_conv2d_coalesced_coalescing/base/base.cu:117:24: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
117 | int out_channels = weight.size(0);
| ^