10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
| ^
| ()
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:11:41: warning: macro argument should be enclosed in parentheses [bugprone-macro-parentheses]
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
| ^
| ()
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:20:5: warning: 2 adjacent parameters of 'combined_activation_bias_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
20 | int total_elements,
| ^~~~~~~~~~~~~~~~~~~
21 | int out_channels,
| ~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:20:9: note: the first parameter in the range is 'total_elements'
20 | int total_elements,
| ^~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:21:9: note: the last parameter in the range is 'out_channels'
21 | int out_channels,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:24:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
24 | int tid = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:31:40: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
31 | for (int i = tid; i < numVec; i += gridDim.x * blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:59:62: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
59 | for (int i = scalarStart + tid; i < total_elements; i += gridDim.x * blockDim.x) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:74:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
74 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:75:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
75 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:77:19: warning: the parameter 'bias' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
77 | torch::Tensor bias
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:87:15: warning: Value stored to 'batch_size' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
87 | const int batch_size = output.size(0);
| ^~~~~~~~~~ ~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:87:15: note: Value stored to 'batch_size' during its initialization is never read
87 | const int batch_size = output.size(0);
| ^~~~~~~~~~ ~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:87:28: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
87 | const int batch_size = output.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:88:30: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
88 | const int out_channels = output.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:89:23: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
89 | const int depth = output.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:90:24: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
90 | const int height = output.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:91:23: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
91 | const int width = output.size(4);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_sweep_rag_optim/level_2/task_7/b8_s2_combined_activation_bias/base/base.cu:92:26: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
92 | int total_elements = output.numel();
| ^