18 | float* output, const float* input,
| ^~~~~~~~~~~~~~~~~~~
19 | const float* weight, const float* bias,
| ~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:18:33: note: the first parameter in the range is 'input'
18 | float* output, const float* input,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:19:18: note: the last parameter in the range is 'weight'
19 | const float* weight, const float* bias,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:19:26: warning: 3 adjacent parameters of 'batch_norm_warp_kernel' of similar type ('const float *') are easily swapped by mistake [bugprone-easily-swappable-parameters]
19 | const float* weight, const float* bias,
| ^~~~~~~~~~~~~~~~~~
20 | const float* mean, const float* var,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:19:39: note: the first parameter in the range is 'bias'
19 | const float* weight, const float* bias,
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:20:37: note: the last parameter in the range is 'var'
20 | const float* mean, const float* var,
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:24:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
24 | int tid = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:25:22: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
25 | int global_tid = blockIdx.x * blockDim.x + tid;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:26:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
26 | int stride = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:63:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
63 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:67:5: warning: 2 adjacent parameters of 'dense_layer_fn' of similar type ('torch::Tensor') are easily swapped by mistake [bugprone-easily-swappable-parameters]
67 | torch::Tensor bn_var,
| ^~~~~~~~~~~~~~~~~~~~~
68 | torch::Tensor conv_weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:67:19: note: the first parameter in the range is 'bn_var'
67 | torch::Tensor bn_var,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:68:19: note: the last parameter in the range is 'conv_weight'
68 | torch::Tensor conv_weight,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:68:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
68 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:72:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
72 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:72:27: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
72 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:72:41: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
72 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:72:55: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
72 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:104:62: warning: the parameter 'layer_params' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
104 | torch::Tensor dense_block_fn(torch::Tensor x, pybind11::list layer_params, bool is_training) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:124:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
124 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:128:5: warning: 2 adjacent parameters of 'transition_layer_fn' of similar type ('torch::Tensor') are easily swapped by mistake [bugprone-easily-swappable-parameters]
128 | torch::Tensor bn_var,
| ^~~~~~~~~~~~~~~~~~~~~
129 | torch::Tensor conv_weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:128:19: note: the first parameter in the range is 'bn_var'
128 | torch::Tensor bn_var,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:129:19: note: the last parameter in the range is 'conv_weight'
129 | torch::Tensor conv_weight,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:129:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
129 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:133:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
133 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:133:27: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
133 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:133:41: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
133 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:133:55: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
133 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:167:57: warning: the parameter 'params_obj' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
167 | torch::Tensor forward(torch::Tensor x, pybind11::object params_obj, bool is_training) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:183:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
183 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:183:27: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
183 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:183:41: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
183 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:183:55: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
183 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:215:28: warning: narrowing conversion from 'size_t' (aka 'unsigned long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
215 | int num_dense_blocks = dense_blocks.size();
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:239:9: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
239 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:239:23: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
239 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:239:37: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
239 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b4_s2_warp_optimized_densenet_op/base/base.cu:239:51: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
239 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^