13 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:14:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
14 | int stride = gridDim.x * blockDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:30:35: warning: 3 adjacent parameters of 'max_pool2d_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
30 | int batch_size, int channels, int height, int width, int stride) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:30:39: note: the first parameter in the range is 'height'
30 | int batch_size, int channels, int height, int width, int stride) {
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:30:62: note: the last parameter in the range is 'stride'
30 | int batch_size, int channels, int height, int width, int stride) {
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:35:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
35 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:36:24: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
36 | int stride_total = gridDim.x * blockDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:48:30: warning: result of multiplication in type 'int' is used as a pointer offset after an implicit widening conversion to type 'ptrdiff_t' [bugprone-implicit-widening-of-multiplication-result]
48 | const float* row0 = input + (base + h_offset) * width + w_offset;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:48:38: note: make conversion explicit to silence this warning
8 | const float* row0 = input + (base + h_offset) * width + w_offset;
| ^~~~~~~~~~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:48:38: note: perform multiplication in a wider type
48 | const float* row0 = input + (base + h_offset) * width + w_offset;
| ^~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:58:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
58 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:59:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
59 | int stride = gridDim.x * blockDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:100:70: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
100 | relu_kernel<<<num_blocks, block_size>>>(conv1.data_ptr<float>(), conv1.numel());
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:105:10: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
105 | conv1.size(0), conv1.size(1), conv1.size(2), conv1.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:105:25: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
105 | conv1.size(0), conv1.size(1), conv1.size(2), conv1.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:105:40: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
105 | conv1.size(0), conv1.size(1), conv1.size(2), conv1.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:105:55: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
105 | conv1.size(0), conv1.size(1), conv1.size(2), conv1.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:109:70: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
109 | relu_kernel<<<num_blocks, block_size>>>(conv2.data_ptr<float>(), conv2.numel());
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:112:10: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
112 | conv2.size(0), conv2.size(1), conv2.size(2), conv2.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:112:25: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
112 | conv2.size(0), conv2.size(1), conv2.size(2), conv2.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:112:40: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
112 | conv2.size(0), conv2.size(1), conv2.size(2), conv2.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:112:55: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
112 | conv2.size(0), conv2.size(1), conv2.size(2), conv2.size(3), 2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:120:68: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
120 | relu_kernel<<<num_blocks, block_size>>>(fc1.data_ptr<float>(), fc1.numel());
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b9_s1_warp_uniform_optimized/base/base.cu:122:68: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
122 | relu_kernel<<<num_blocks, block_size>>>(fc2.data_ptr<float>(), fc2.numel());
| ^