13 | const float* __restrict__ input,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 | const float* __restrict__ weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:13:31: note: the first parameter in the range is 'input'
13 | const float* __restrict__ input,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:14:31: note: the last parameter in the range is 'weight'
14 | const float* __restrict__ weight,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:15:5: warning: 3 adjacent parameters of 'coalesced_bn_kernel' of similar type ('const float *__restrict') are easily swapped by mistake [bugprone-easily-swappable-parameters]
15 | const float* __restrict__ bias,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16 | const float* __restrict__ mean,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17 | const float* __restrict__ var,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:15:31: note: the first parameter in the range is 'bias'
15 | const float* __restrict__ bias,
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:17:31: note: the last parameter in the range is 'var'
17 | const float* __restrict__ var,
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:22:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
22 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:23:16: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
23 | int stride = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:54:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
54 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:58:5: warning: 2 adjacent parameters of 'dense_layer_fn' of similar type ('torch::Tensor') are easily swapped by mistake [bugprone-easily-swappable-parameters]
58 | torch::Tensor bn_var,
| ^~~~~~~~~~~~~~~~~~~~~
59 | torch::Tensor conv_weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:58:19: note: the first parameter in the range is 'bn_var'
58 | torch::Tensor bn_var,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:59:19: note: the last parameter in the range is 'conv_weight'
59 | torch::Tensor conv_weight,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:59:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
59 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:63:11: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
63 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:63:25: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
63 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:63:39: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
63 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:63:53: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
63 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:95:62: warning: the parameter 'layer_params' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
95 | torch::Tensor dense_block_fn(torch::Tensor x, pybind11::list layer_params, bool is_training) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:115:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
115 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:119:5: warning: 2 adjacent parameters of 'transition_layer_fn' of similar type ('torch::Tensor') are easily swapped by mistake [bugprone-easily-swappable-parameters]
119 | torch::Tensor bn_var,
| ^~~~~~~~~~~~~~~~~~~~~
120 | torch::Tensor conv_weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:119:19: note: the first parameter in the range is 'bn_var'
119 | torch::Tensor bn_var,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:120:19: note: the last parameter in the range is 'conv_weight'
120 | torch::Tensor conv_weight,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:120:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
120 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:124:11: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
124 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:124:25: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
124 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:124:39: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
124 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:124:53: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
124 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:158:57: warning: the parameter 'params_obj' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
158 | torch::Tensor forward(torch::Tensor x, pybind11::object params_obj, bool is_training) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:174:11: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
174 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:174:25: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
174 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:174:39: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
174 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:174:53: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
174 | int N = sizes[0], C = sizes[1], H = sizes[2], W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:208:26: warning: narrowing conversion from 'size_t' (aka 'unsigned long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
208 | int num_dense_blocks = dense_blocks.size();
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:231:7: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
231 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:231:21: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
231 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:231:35: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
231 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_3/task_16/b5_s2_coalesced_densenet_bn/base/base.cu:231:49: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
231 | N = sizes[0]; C = sizes[1]; H = sizes[2]; W = sizes[3];
| ^