7 | const float* __restrict__ A,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 | const float* __restrict__ B,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:7:31: note: the first parameter in the range is 'A'
7 | const float* __restrict__ A,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:8:31: note: the last parameter in the range is 'B'
8 | const float* __restrict__ B,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:11:5: warning: 2 adjacent parameters of 'stride_loop_vectorized_diag_matmul_kernel' of similar type ('const int64_t') are easily swapped by mistake [bugprone-easily-swappable-parameters]
11 | const int64_t M,
| ^~~~~~~~~~~~~~~~
12 | const int64_t vec_total) {
| ~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:11:19: note: the first parameter in the range is 'M'
11 | const int64_t M,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:12:19: note: the last parameter in the range is 'vec_total'
12 | const int64_t vec_total) {
| ^~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:13:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
13 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:14:16: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
14 | int stride = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:27:17: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
27 | int row = base_idx / M;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:48:5: warning: 2 adjacent parameters of 'stride_loop_scalar_diag_matmul_kernel' of similar type ('const int64_t') are easily swapped by mistake [bugprone-easily-swappable-parameters]
48 | const int64_t total,
| ^~~~~~~~~~~~~~~~~~~~
49 | const int64_t M) {
| ~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:48:19: note: the first parameter in the range is 'total'
48 | const int64_t total,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:49:19: note: the last parameter in the range is 'M'
49 | const int64_t M) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:50:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
50 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:51:16: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
51 | int stride = blockDim.x * gridDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:55:15: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
55 | int row = i / M;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:79:18: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
79 | int blocks = (vec_total + threads - 1) / threads;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250202_optimize_b10_s4_e0_sweep/level_1/task_12/b10_s3_stride_loop_diag_matmul/base/base.cu:83:18: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
83 | int blocks = (total + threads - 1) / threads;
| ^