6 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
| ^
| ()
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:7:41: warning: macro argument should be enclosed in parentheses [bugprone-macro-parentheses]
7 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
| ^
| ()
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:45:21: warning: 2 adjacent parameters of 'apply_activations_and_bias_kernel' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
45 | int batch_size, int out_channels, int depth, int height, int width
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:45:25: note: the first parameter in the range is 'out_channels'
45 | int batch_size, int out_channels, int depth, int height, int width
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:45:43: note: the last parameter in the range is 'depth'
45 | int batch_size, int out_channels, int depth, int height, int width
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:47:21: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
47 | const int tid = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:48:15: warning: Value stored to 'lane_id' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
48 | const int lane_id = tid % WARP_SIZE;
| ^~~~~~~ ~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:48:15: note: Value stored to 'lane_id' during its initialization is never read
48 | const int lane_id = tid % WARP_SIZE;
| ^~~~~~~ ~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:49:15: warning: Value stored to 'warp_id' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
49 | const int warp_id = tid / WARP_SIZE;
| ^~~~~~~ ~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:49:15: note: Value stored to 'warp_id' during its initialization is never read
49 | const int warp_id = tid / WARP_SIZE;
| ^~~~~~~ ~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:50:30: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
50 | const int block_offset = blockIdx.x * BLOCK_SIZE;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:54:15: warning: Value stored to 'elements_per_channel' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
54 | const int elements_per_channel = spatial_size;
| ^~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:54:15: note: Value stored to 'elements_per_channel' during its initialization is never read
54 | const int elements_per_channel = spatial_size;
| ^~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:89:19: warning: the parameter 'x' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
89 | torch::Tensor x,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:90:19: warning: the parameter 'conv_weight' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
90 | torch::Tensor conv_weight,
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:92:19: warning: the parameter 'bias' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
92 | torch::Tensor bias
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:101:22: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
101 | int batch_size = output.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:102:24: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
102 | int out_channels = output.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:103:17: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
103 | int depth = output.size(2);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:104:18: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
104 | int height = output.size(3);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250213_optimize_b10_s4_e0_cross_no/level_2/task_7/b3_s3_coalesced_memory_activation_kernel_base/base/base.cu:105:17: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
105 | int width = output.size(4);
| ^