15 | int N, int C, int H, int W,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:15:9: note: the first parameter in the range is 'N'
15 | int N, int C, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:15:16: note: the last parameter in the range is 'C'
15 | int N, int C, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:15:26: warning: 2 adjacent parameters of 'fused_gelu_group_norm_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
15 | int N, int C, int H, int W,
| ^~~~~~
16 | int num_groups,
| ~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:15:30: note: the first parameter in the range is 'W'
15 | int N, int C, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:16:9: note: the last parameter in the range is 'num_groups'
16 | int num_groups,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:17:5: warning: 2 adjacent parameters of 'fused_gelu_group_norm_kernel' of similar type ('const float *__restrict') are easily swapped by mistake [bugprone-easily-swappable-parameters]
17 | const float* __restrict__ gn_weight,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | const float* __restrict__ gn_bias,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:17:31: note: the first parameter in the range is 'gn_weight'
17 | const float* __restrict__ gn_weight,
| ^~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:18:31: note: the last parameter in the range is 'gn_bias'
18 | const float* __restrict__ gn_bias,
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:22:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
22 | int group_id = blockIdx.x; // group index across all samples
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:36:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
36 | for (int idx = threadIdx.x; idx < group_elems; idx += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:36:57: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
36 | for (int idx = threadIdx.x; idx < group_elems; idx += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:46:14: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
46 | int lane = threadIdx.x & 31;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:47:16: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
47 | int warpId = threadIdx.x >> 5;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:65:19: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
65 | int num_warps = (blockDim.x + 31) / 32;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:78:26: warning: narrowing conversion from 'int' to 'float' [bugprone-narrowing-conversions]
78 | mean = group_sum / group_elems;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:79:28: warning: narrowing conversion from 'int' to 'float' [bugprone-narrowing-conversions]
79 | var = group_sum_sq / group_elems - mean * mean;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:88:18: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
88 | for (int idx = threadIdx.x; idx < group_elems; idx += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:88:57: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
88 | for (int idx = threadIdx.x; idx < group_elems; idx += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:130:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
130 | int N = conv_out.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:131:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
131 | int C = conv_out.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:132:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
132 | int H = conv_out.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:133:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
133 | int W = conv_out.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_2/task_19/b1_s3_opt_convtrans_gelu_gn/base/base.cu:135:18: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
135 | int groups = num_groups;
| ^