12 | const float* __restrict__ input,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 | const float* __restrict__ weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:12:31: note: the first parameter in the range is 'input'
12 | const float* __restrict__ input,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:13:31: note: the last parameter in the range is 'weight'
13 | const float* __restrict__ weight,
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:15:5: warning: 3 adjacent parameters of 'conv2d_kernel' of similar type ('const int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
15 | const int batch_size,
| ^~~~~~~~~~~~~~~~~~~~~
16 | const int in_channels,
| ~~~~~~~~~~~~~~~~~~~~~~
17 | const int out_channels,
| ~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:15:15: note: the first parameter in the range is 'batch_size'
15 | const int batch_size,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:17:15: note: the last parameter in the range is 'out_channels'
17 | const int out_channels,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:19:5: warning: 2 adjacent parameters of 'conv2d_kernel' of similar type ('const int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
19 | const int input_width,
| ^~~~~~~~~~~~~~~~~~~~~~
20 | const int output_height,
| ~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:19:15: note: the first parameter in the range is 'input_width'
19 | const int input_width,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:20:15: note: the last parameter in the range is 'output_height'
20 | const int output_height,
| ^~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:21:5: warning: 3 adjacent parameters of 'conv2d_kernel' of similar type ('const int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
21 | const int output_width,
| ^~~~~~~~~~~~~~~~~~~~~~~
22 | const int stride,
| ~~~~~~~~~~~~~~~~~
23 | const int padding) {
| ~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:21:15: note: the first parameter in the range is 'output_width'
21 | const int output_width,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:23:15: note: the last parameter in the range is 'padding'
23 | const int padding) {
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:28:20: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
28 | const int tx = threadIdx.x % BLOCK_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:29:20: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
29 | const int ty = threadIdx.y % BLOCK_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:30:21: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
30 | const int tid = threadIdx.y * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:31:15: warning: Value stored to 'warp_id' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
31 | const int warp_id = tid / 32;
| ^~~~~~~ ~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:31:15: note: Value stored to 'warp_id' during its initialization is never read
31 | const int warp_id = tid / 32;
| ^~~~~~~ ~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:33:20: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
33 | const int bx = blockIdx.x * (BLOCK_SIZE * TILE_SIZE);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:34:20: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
34 | const int by = blockIdx.y * (BLOCK_SIZE * TILE_SIZE);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:35:19: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
35 | const int b = blockIdx.z / ((out_channels + WARPS_PER_BLOCK - 1) / WARPS_PER_BLOCK);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:36:26: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
36 | const int oc_block = (blockIdx.z % ((out_channels + WARPS_PER_BLOCK - 1) / WARPS_PER_BLOCK)) * WARPS_PER_BLOCK;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:53:51: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
53 | for (int i = 0; i < SHARED_SIZE; i += blockDim.y) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:55:55: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
55 | for (int j = 0; j < SHARED_SIZE; j += blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:117:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
117 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:118:19: warning: the parameter 'weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
118 | torch::Tensor weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:121:5: warning: 3 adjacent parameters of 'forward' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
121 | int padding,
| ^~~~~~~~~~~~
122 | int dilation,
| ~~~~~~~~~~~~~
123 | int groups) {
| ~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:121:9: note: the first parameter in the range is 'padding'
121 | int padding,
| ^~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:123:9: note: the last parameter in the range is 'groups'
123 | int groups) {
| ^~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:134:42: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
134 | auto output_height = (input_height + 2 * padding - KERNEL_SIZE) / stride + 1;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:134:42: note: make conversion explicit to silence this warning
4 | auto output_height = (input_height + 2 * padding - KERNEL_SIZE) / stride + 1;
| ^~~~~~~~~~~
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:134:42: note: perform multiplication in a wider type
134 | auto output_height = (input_height + 2 * padding - KERNEL_SIZE) / stride + 1;
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:135:40: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
135 | auto output_width = (input_width + 2 * padding - KERNEL_SIZE) / stride + 1;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:135:40: note: make conversion explicit to silence this warning
135 | auto output_width = (input_width + 2 * padding - KERNEL_SIZE) / stride + 1;
| ^~~~~~~~~~~
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:135:40: note: perform multiplication in a wider type
135 | auto output_width = (input_width + 2 * padding - KERNEL_SIZE) / stride + 1;
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:142:25: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
142 | (output_width + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:142:25: note: make conversion explicit to silence this warning
142 | (output_width + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:142:25: note: perform multiplication in a wider type
142 | (output_width + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:142:56: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
142 | (output_width + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:142:56: note: make conversion explicit to silence this warning
142 | (output_width + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:142:56: note: perform multiplication in a wider type
142 | (output_width + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:143:26: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
143 | (output_height + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:143:26: note: make conversion explicit to silence this warning
143 | (output_height + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:143:26: note: perform multiplication in a wider type
143 | (output_height + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:143:57: warning: performing an implicit widening conversion to type 'int64_t' (aka 'long') of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
143 | (output_height + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:143:57: note: make conversion explicit to silence this warning
143 | (output_height + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:143:57: note: perform multiplication in a wider type
143 | (output_height + BLOCK_SIZE * TILE_SIZE - 1) / (BLOCK_SIZE * TILE_SIZE),
| ^
| static_cast<int64_t>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:5:20: note: expanded from macro 'BLOCK_SIZE'
5 | #define BLOCK_SIZE 16
| ^~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:151:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
151 | batch_size,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:152:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
152 | in_channels,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:153:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
153 | out_channels,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:154:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
154 | input_height,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:155:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
155 | input_width,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:156:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
156 | output_height,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_63/b9_s2_balanced_conv2d_cuda/base/base.cu:157:9: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
157 | output_width,
| ^