16 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:17:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
17 | int stride = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:25:47: warning: 4 adjacent parameters of 'max_pool' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
25 | __device__ float max_pool(const float* input, int height, int width, int in_h_start, int in_w_start, int pool_height, int pool_width) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:25:51: note: the first parameter in the range is 'height'
25 | __device__ float max_pool(const float* input, int height, int width, int in_h_start, int in_w_start, int pool_height, int pool_width) {
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:25:90: note: the last parameter in the range is 'in_w_start'
25 | __device__ float max_pool(const float* input, int height, int width, int in_h_start, int in_w_start, int pool_height, int pool_width) {
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:25:102: warning: 2 adjacent parameters of 'max_pool' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
25 | __device__ float max_pool(const float* input, int height, int width, int in_h_start, int in_w_start, int pool_height, int pool_width) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:25:106: note: the first parameter in the range is 'pool_height'
25 | __device__ float max_pool(const float* input, int height, int width, int in_h_start, int in_w_start, int pool_height, int pool_width) {
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:25:123: note: the last parameter in the range is 'pool_width'
25 | __device__ float max_pool(const float* input, int height, int width, int in_h_start, int in_w_start, int pool_height, int pool_width) {
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:44:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
44 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:45:26: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
45 | int stride_threads = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:56:33: warning: result of multiplication in type 'int' is used as a pointer offset after an implicit widening conversion to type 'ptrdiff_t' [bugprone-implicit-widening-of-multiplication-result]
56 | output[idx] = max_pool(&input[((b * channels + c) * height) * width], height, width, in_h_start, in_w_start, pool_height, pool_width);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:56:39: note: make conversion explicit to silence this warning
8 | output[idx] = max_pool(&input[((b * channels + c) * height) * width], height, width, in_h_start, in_w_start, pool_height, pool_width);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:56:39: note: perform multiplication in a wider type
56 | output[idx] = max_pool(&input[((b * channels + c) * height) * width], height, width, in_h_start, in_w_start, pool_height, pool_width);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:61:76: warning: 2 adjacent parameters of 'linear_transform' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters]
61 | __device__ float linear_transform(const float* input, const float* weight, float bias, int in_features) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:61:82: note: the first parameter in the range is 'bias'
61 | __device__ float linear_transform(const float* input, const float* weight, float bias, int in_features) {
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:61:92: note: the last parameter in the range is 'in_features'
61 | __device__ float linear_transform(const float* input, const float* weight, float bias, int in_features) {
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:61:88: note: 'float' and 'int' may be implicitly converted
61 | __device__ float linear_transform(const float* input, const float* weight, float bias, int in_features) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:75:5: warning: 2 adjacent parameters of 'linear_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
75 | int in_features, int out_features
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:75:9: note: the first parameter in the range is 'in_features'
75 | int in_features, int out_features
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:75:26: note: the last parameter in the range is 'out_features'
75 | int in_features, int out_features
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:77:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
77 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:78:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
78 | int stride = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:81:48: warning: result of multiplication in type 'int' is used as a pointer offset after an implicit widening conversion to type 'ptrdiff_t' [bugprone-implicit-widening-of-multiplication-result]
81 | output[idx] = linear_transform(input, &weight[idx * in_features], bias[idx], in_features);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:81:55: note: make conversion explicit to silence this warning
81 | output[idx] = linear_transform(input, &weight[idx * in_features], bias[idx], in_features);
| ^~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:81:55: note: perform multiplication in a wider type
81 | output[idx] = linear_transform(input, &weight[idx * in_features], bias[idx], in_features);
| ^~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:112:70: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
112 | relu_kernel<<<max_blocks, block_size>>>(conv1.data_ptr<float>(), conv1.numel());
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:117:70: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
117 | relu_kernel<<<max_blocks, block_size>>>(conv2.data_ptr<float>(), conv2.numel());
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:125:68: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
125 | relu_kernel<<<max_blocks, block_size>>>(fc1.data_ptr<float>(), fc1.numel());
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_3/task_4/b6_s1_modular_device_functions_base/base/base.cu:129:68: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
129 | relu_kernel<<<max_blocks, block_size>>>(fc2.data_ptr<float>(), fc2.numel());
| ^