9 | const float* __restrict__ input,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 | const float* __restrict__ weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:9:31: note: the first parameter in the range is 'input'
9 | const float* __restrict__ input,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:10:31: note: the last parameter in the range is 'weight'
10 | const float* __restrict__ weight,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:13:22: warning: 2 adjacent parameters of 'conv_transpose2d_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
13 | int C_in, int H, int W,
| ^~~~~~
14 | int C_out, int K, // square kernel
| ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:13:26: note: the first parameter in the range is 'W'
13 | int C_in, int H, int W,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:14:9: note: the last parameter in the range is 'C_out'
14 | int C_out, int K, // square kernel
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:14:16: warning: 2 adjacent parameters of 'conv_transpose2d_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
14 | int C_out, int K, // square kernel
| ^~~~~~~~~~~~~~~~~~~~~~~
15 | int stride,
| ~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:14:20: note: the first parameter in the range is 'K'
14 | int C_out, int K, // square kernel
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:15:9: note: the last parameter in the range is 'stride'
15 | int stride,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:16:5: warning: 2 adjacent parameters of 'conv_transpose2d_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
16 | int padding,
| ^~~~~~~~~~~~
17 | int H_out, int W_out) {
| ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:16:9: note: the first parameter in the range is 'padding'
16 | int padding,
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:17:9: note: the last parameter in the range is 'H_out'
17 | int H_out, int W_out) {
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:19:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
19 | int tid = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:56:5: warning: 2 adjacent parameters of 'add_bias_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
56 | int total_elements,
| ^~~~~~~~~~~~~~~~~~~
57 | int C_out,
| ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:56:9: note: the first parameter in the range is 'total_elements'
56 | int total_elements,
| ^~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:57:9: note: the last parameter in the range is 'C_out'
57 | int C_out,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:60:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
60 | int tid = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:74:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
74 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:75:19: warning: the parameter 'weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
75 | torch::Tensor weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:88:14: warning: the variable 'bias_val' is copy-constructed from a const reference but is only used as const reference; consider making it a const reference [performance-unnecessary-copy-initialization]
88 | auto bias_val = bias.value();
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:95:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
95 | int N = x_sizes[0];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:96:16: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
96 | int C_in = x_sizes[1];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:97:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
97 | int H = x_sizes[2];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:98:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
98 | int W = x_sizes[3];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:102:17: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
102 | int C_out = w_sizes[1];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:103:13: warning: narrowing conversion from 'long' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
103 | int K = w_sizes[2];
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:106:17: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
106 | int H_out = (H - 1) * stride - 2 * padding + K + output_padding;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:107:17: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
107 | int W_out = (W - 1) * stride - 2 * padding + K + output_padding;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:145:30: warning: result of multiplication in type 'int' is used as a pointer offset after an implicit widening conversion to type 'ptrdiff_t' [bugprone-implicit-widening-of-multiplication-result]
145 | const float* x_ptr = x.data_ptr<float>() + start * C_in * H * W;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:145:52: note: make conversion explicit to silence this warning
3 | const float* x_ptr = x.data_ptr<float>() + start * C_in * H * W;
| ^~~~~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:145:52: note: perform multiplication in a wider type
145 | const float* x_ptr = x.data_ptr<float>() + start * C_in * H * W;
| ^~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:147:26: warning: result of multiplication in type 'int' is used as a pointer offset after an implicit widening conversion to type 'ptrdiff_t' [bugprone-implicit-widening-of-multiplication-result]
147 | float* out_ptr = output.data_ptr<float>() + start * C_out * H_out * W_out;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:147:53: note: make conversion explicit to silence this warning
147 | float* out_ptr = output.data_ptr<float>() + start * C_out * H_out * W_out;
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:147:53: note: perform multiplication in a wider type
147 | float* out_ptr = output.data_ptr<float>() + start * C_out * H_out * W_out;
| ^~~~~~~~~~~~~~~~~~~~~
| static_cast<ptrdiff_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:156:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
156 | stride,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:157:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
157 | padding,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_57/b8_s3_hybrid_conv_transpose2d/base/base.cu:170:14: warning: the variable 'bias_tensor' is copy-constructed from a const reference but is only used as const reference; consider making it a const reference [performance-unnecessary-copy-initialization]
170 | auto bias_tensor = bias.value();
| ^
| const &