10 | const int size,
| ^~~~~~~~~~~~~~~
11 | const float scale_factor
| ~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:10:15: note: the first parameter in the range is 'size'
10 | const int size,
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:11:17: note: the last parameter in the range is 'scale_factor'
11 | const float scale_factor
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:11:5: note: 'const int' and 'const float' may be implicitly converted: 'const int' (as 'int') -> 'const float' (as 'float'), 'const float' (as 'float') -> 'const int' (as 'int')
11 | const float scale_factor
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:15:21: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
15 | const int tid = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:16:21: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
16 | const int bid = blockIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:43:5: warning: 2 adjacent parameters of 'module_fn_cuda' of similar type ('double') are easily swapped by mistake [bugprone-easily-swappable-parameters]
43 | double momentum,
| ^~~~~~~~~~~~~~~~
44 | double scale_factor,
| ~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:43:12: note: the first parameter in the range is 'momentum'
43 | double momentum,
| ^~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:44:12: note: the last parameter in the range is 'scale_factor'
44 | double scale_factor,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:45:19: warning: the parameter 'conv_transpose' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
45 | torch::Tensor conv_transpose,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:46:5: warning: 2 adjacent parameters of 'module_fn_cuda' of similar type ('torch::Tensor') are easily swapped by mistake [bugprone-easily-swappable-parameters]
46 | torch::Tensor conv_transpose_bias,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
47 | torch::Tensor bn_weight,
| ~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:46:19: note: the first parameter in the range is 'conv_transpose_bias'
46 | torch::Tensor conv_transpose_bias,
| ^~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:47:19: note: the last parameter in the range is 'bn_weight'
47 | torch::Tensor bn_weight,
| ^~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:28: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:38: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:5:20: note: expanded from macro 'TILE_SIZE'
5 | #define TILE_SIZE (BLOCK_SIZE * 4)
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:4:20: note: expanded from macro 'BLOCK_SIZE'
4 | #define BLOCK_SIZE 256
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:38: note: make conversion explicit to silence this warning
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:5:20: note: expanded from macro 'TILE_SIZE'
5 | #define TILE_SIZE (BLOCK_SIZE * 4)
| ^~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:4:20: note: expanded from macro 'BLOCK_SIZE'
4 | #define BLOCK_SIZE 256
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:38: note: perform multiplication in a wider type
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:5:20: note: expanded from macro 'TILE_SIZE'
5 | #define TILE_SIZE (BLOCK_SIZE * 4)
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:4:20: note: expanded from macro 'BLOCK_SIZE'
4 | #define BLOCK_SIZE 256
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:55: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:5:20: note: expanded from macro 'TILE_SIZE'
5 | #define TILE_SIZE (BLOCK_SIZE * 4)
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:4:20: note: expanded from macro 'BLOCK_SIZE'
4 | #define BLOCK_SIZE 256
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:55: note: make conversion explicit to silence this warning
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:5:20: note: expanded from macro 'TILE_SIZE'
5 | #define TILE_SIZE (BLOCK_SIZE * 4)
| ^~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:4:20: note: expanded from macro 'BLOCK_SIZE'
4 | #define BLOCK_SIZE 256
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:68:55: note: perform multiplication in a wider type
68 | const int num_blocks = (x_size + TILE_SIZE - 1) / TILE_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:5:20: note: expanded from macro 'TILE_SIZE'
5 | #define TILE_SIZE (BLOCK_SIZE * 4)
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:4:20: note: expanded from macro 'BLOCK_SIZE'
4 | #define BLOCK_SIZE 256
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_77/b5_s2_77_convtranspose3d_scale_batchnorm_globalavgpool_shared_mem/base/base.cu:73:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
73 | x_size,
| ^