10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
| ^
| ()
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:11:41: warning: macro argument should be enclosed in parentheses [bugprone-macro-parentheses]
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
| ^
| ()
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:37:5: warning: 2 adjacent parameters of 'warp_optimized_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
37 | int N, int C, int D, int H, int W,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:37:9: note: the first parameter in the range is 'N'
37 | int N, int C, int D, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:37:16: note: the last parameter in the range is 'C'
37 | int N, int C, int D, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:37:33: warning: 3 adjacent parameters of 'warp_optimized_kernel' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters]
37 | int N, int C, int D, int H, int W,
| ^~~~~~
38 | int groups,
| ~~~~~~~~~~~
39 | float eps
| ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:37:37: note: the first parameter in the range is 'W'
37 | int N, int C, int D, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:39:11: note: the last parameter in the range is 'eps'
39 | float eps
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:39:5: note: 'int' and 'float' may be implicitly converted
39 | float eps
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:44:21: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
44 | const int tid = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:48:19: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
48 | const int n = blockIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:49:19: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
49 | const int g = blockIdx.y;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:98:23: warning: narrowing conversion from 'int' to 'float' [bugprone-narrowing-conversions]
98 | mean = mean / group_size;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:99:36: warning: narrowing conversion from 'int' to 'float' [bugprone-narrowing-conversions]
99 | float variance = inv_std / group_size - mean * mean;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:143:5: warning: 2 adjacent parameters of 'forward' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
143 | int padding,
| ^~~~~~~~~~~~
144 | int groups,
| ~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:143:9: note: the first parameter in the range is 'padding'
143 | int padding,
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:144:9: note: the last parameter in the range is 'groups'
144 | int groups,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:146:19: warning: the parameter 'conv_transpose' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
146 | torch::Tensor conv_transpose,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:148:19: warning: the parameter 'group_norm_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
148 | torch::Tensor group_norm_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:149:19: warning: the parameter 'group_norm_bias' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
149 | torch::Tensor group_norm_bias
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:170:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
170 | x.size(0), x.size(1), x.size(2), x.size(3), x.size(4),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:170:20: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
170 | x.size(0), x.size(1), x.size(2), x.size(3), x.size(4),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:170:31: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
170 | x.size(0), x.size(1), x.size(2), x.size(3), x.size(4),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:170:42: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
170 | x.size(0), x.size(1), x.size(2), x.size(3), x.size(4),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250203_optimize_b10_s4_e0_sweep/level_2/task_60/b10_s1_warp_optimized_fused_base/base/base.cu:170:53: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
170 | x.size(0), x.size(1), x.size(2), x.size(3), x.size(4),
| ^