9 | int batch,
| ^~~~~~~~~~
10 | int group,
| ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:9:9: note: the first parameter in the range is 'batch'
9 | int batch,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:10:9: note: the last parameter in the range is 'group'
10 | int group,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:11:5: warning: 2 adjacent parameters of 'compute_group_mean_var' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters]
11 | int channels_per_group,
| ^~~~~~~~~~~~~~~~~~~~~~~
12 | int num_channels,
| ~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:11:9: note: the first parameter in the range is 'channels_per_group'
11 | int channels_per_group,
| ^~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:12:9: note: the last parameter in the range is 'num_channels'
12 | int num_channels,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:13:5: warning: 2 adjacent parameters of 'compute_group_mean_var' of similar type ('scalar_t &') are easily swapped by mistake [bugprone-easily-swappable-parameters]
13 | scalar_t &mean,
| ^~~~~~~~~~~~~~~
14 | scalar_t &var) {
| ~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:13:15: note: the first parameter in the range is 'mean'
13 | scalar_t &mean,
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:14:15: note: the last parameter in the range is 'var'
14 | scalar_t &var) {
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:36:5: warning: 2 adjacent parameters of 'group_norm_normalize' of similar type ('scalar_t') are easily swapped by mistake [bugprone-easily-swappable-parameters]
36 | scalar_t mean,
| ^~~~~~~~~~~~~~
37 | scalar_t var,
| ~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:36:14: note: the first parameter in the range is 'mean'
36 | scalar_t mean,
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:37:14: note: the last parameter in the range is 'var'
37 | scalar_t var,
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:38:5: warning: 2 adjacent parameters of 'group_norm_normalize' of similar type ('scalar_t') are easily swapped by mistake [bugprone-easily-swappable-parameters]
38 | scalar_t eps,
| ^~~~~~~~~~~~~
39 | scalar_t gamma,
| ~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:38:14: note: the first parameter in the range is 'eps'
38 | scalar_t eps,
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:39:14: note: the last parameter in the range is 'gamma'
39 | scalar_t gamma,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:57:5: warning: 3 adjacent parameters of 'linear_forward_kernel' of similar type ('const scalar_t *__restrict') are easily swapped by mistake [bugprone-easily-swappable-parameters]
57 | const scalar_t* __restrict__ x,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 | const scalar_t* __restrict__ weight,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 | const scalar_t* __restrict__ bias,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:57:34: note: the first parameter in the range is 'x'
57 | const scalar_t* __restrict__ x,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:59:34: note: the last parameter in the range is 'bias'
59 | const scalar_t* __restrict__ bias,
| ^~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:61:5: warning: 2 adjacent parameters of 'linear_forward_kernel' of similar type ('size_t') are easily swapped by mistake [bugprone-easily-swappable-parameters]
61 | size_t batch_size,
| ^~~~~~~~~~~~~~~~~~
62 | size_t in_features,
| ~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:61:12: note: the first parameter in the range is 'batch_size'
61 | size_t batch_size,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:62:12: note: the last parameter in the range is 'in_features'
62 | size_t in_features,
| ^~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:65:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
65 | int row = blockIdx.y * blockDim.y + threadIdx.y; // batch index
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:66:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
66 | int col = blockIdx.x * blockDim.x + threadIdx.x; // output feature index
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:73:20: warning: narrowing conversion from 'size_t' (aka 'unsigned long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
73 | int numTiles = (in_features + TILE_DIM - 1) / TILE_DIM;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:75:19: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
75 | int x_idx = t * TILE_DIM + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:76:19: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
76 | int w_idx = t * TILE_DIM + threadIdx.y;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:93:5: warning: 2 adjacent parameters of 'group_norm_forward_kernel' of similar type ('const scalar_t *__restrict') are easily swapped by mistake [bugprone-easily-swappable-parameters]
93 | const scalar_t* __restrict__ x,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
94 | const scalar_t* __restrict__ gamma,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:93:34: note: the first parameter in the range is 'x'
93 | const scalar_t* __restrict__ x,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:94:34: note: the last parameter in the range is 'gamma'
94 | const scalar_t* __restrict__ gamma,
| ^~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:97:5: warning: 2 adjacent parameters of 'group_norm_forward_kernel' of similar type ('int64_t') are easily swapped by mistake [bugprone-easily-swappable-parameters]
97 | int64_t batch_size,
| ^~~~~~~~~~~~~~~~~~~
98 | int64_t num_channels,
| ~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:97:13: note: the first parameter in the range is 'batch_size'
97 | int64_t batch_size,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:98:13: note: the last parameter in the range is 'num_channels'
98 | int64_t num_channels,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:99:5: warning: 3 adjacent parameters of 'group_norm_forward_kernel' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters]
99 | int64_t num_groups,
| ^~~~~~~~~~~~~~~~~~~
100 | int64_t channels_per_group,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 | float eps = 1e-5f) {
| ~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:99:13: note: the first parameter in the range is 'num_groups'
99 | int64_t num_groups,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:101:11: note: the last parameter in the range is 'eps'
101 | float eps = 1e-5f) {
| ^~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:99:5: note:
99 | int64_t num_groups,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:101:5: note: 'int64_t' and 'float' may be implicitly converted: 'int64_t' (as 'long') -> 'float', 'float' -> 'int64_t' (as 'long')
101 | float eps = 1e-5f) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:103:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
103 | int batch = blockIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:104:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
104 | int group = blockIdx.y;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:111:21: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
111 | int channel = group * channels_per_group + c;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:127:13: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
127 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:150:3: warning: inside a lambda, '__func__' expands to the name of the function call operator; consider capturing the name of the enclosing function explicitly [bugprone-lambda-function-name]
150 | AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "linear_forward_cuda", ([&] {
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:237:34: note: expanded from macro 'AT_DISPATCH_FLOATING_TYPES'
237 | AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:233:3: note: expanded from macro 'AT_DISPATCH_CASE_FLOATING_TYPES'
233 | AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:74:3: note: expanded from macro 'AT_DISPATCH_CASE'
74 | AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__)
| ^
note: (skipping 1 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:58:7: note: expanded from macro 'AT_PRIVATE_CHECK_SELECTIVE_BUILD'
58 | AT_ERROR( \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/c10/util/Exception.h:711:32: note: expanded from macro 'AT_ERROR'
711 | C10_EXPAND_MSVC_WORKAROUND(TORCH_CHECK(false, ::c10::str(__VA_ARGS__))); \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/c10/util/Exception.h:536:9: note: expanded from macro 'TORCH_CHECK'
536 | __func__, \
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:179:3: warning: inside a lambda, '__func__' expands to the name of the function call operator; consider capturing the name of the enclosing function explicitly [bugprone-lambda-function-name]
179 | AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "group_norm_forward_cuda", ([&] {
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:237:34: note: expanded from macro 'AT_DISPATCH_FLOATING_TYPES'
237 | AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:233:3: note: expanded from macro 'AT_DISPATCH_CASE_FLOATING_TYPES'
233 | AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:74:3: note: expanded from macro 'AT_DISPATCH_CASE'
74 | AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__)
| ^
note: (skipping 1 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:58:7: note: expanded from macro 'AT_PRIVATE_CHECK_SELECTIVE_BUILD'
58 | AT_ERROR( \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/c10/util/Exception.h:711:32: note: expanded from macro 'AT_ERROR'
711 | C10_EXPAND_MSVC_WORKAROUND(TORCH_CHECK(false, ::c10::str(__VA_ARGS__))); \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/c10/util/Exception.h:536:9: note: expanded from macro 'TORCH_CHECK'
536 | __func__, \
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:204:22: warning: narrowing conversion from 'size_t' (aka 'unsigned long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
204 | const int blocks = (total_elements + threads - 1) / threads;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:205:3: warning: inside a lambda, '__func__' expands to the name of the function call operator; consider capturing the name of the enclosing function explicitly [bugprone-lambda-function-name]
205 | AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "hardtanh_forward_cuda", ([&] {
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:237:34: note: expanded from macro 'AT_DISPATCH_FLOATING_TYPES'
237 | AT_DISPATCH_SWITCH(TYPE, NAME, AT_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:233:3: note: expanded from macro 'AT_DISPATCH_CASE_FLOATING_TYPES'
233 | AT_DISPATCH_CASE(at::ScalarType::Double, __VA_ARGS__) \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:74:3: note: expanded from macro 'AT_DISPATCH_CASE'
74 | AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__)
| ^
note: (skipping 1 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/ATen/Dispatch.h:58:7: note: expanded from macro 'AT_PRIVATE_CHECK_SELECTIVE_BUILD'
58 | AT_ERROR( \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/c10/util/Exception.h:711:32: note: expanded from macro 'AT_ERROR'
711 | C10_EXPAND_MSVC_WORKAROUND(TORCH_CHECK(false, ::c10::str(__VA_ARGS__))); \
| ^
/home/robert_sakana_ai/miniconda3/envs/llm2cuda/lib/python3.11/site-packages/torch/include/c10/util/Exception.h:536:9: note: expanded from macro 'TORCH_CHECK'
536 | __func__, \
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:226:5: warning: 2 adjacent parameters of 'module_fn_cuda_forward' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters]
226 | int64_t num_groups,
| ^~~~~~~~~~~~~~~~~~~
227 | float hardtanh_min,
| ~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:226:13: note: the first parameter in the range is 'num_groups'
226 | int64_t num_groups,
| ^~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:227:11: note: the last parameter in the range is 'hardtanh_min'
227 | float hardtanh_min,
| ^~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:226:5: note:
226 | int64_t num_groups,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:227:5: note: 'int64_t' and 'float' may be implicitly converted: 'int64_t' (as 'long') -> 'float', 'float' -> 'int64_t' (as 'long')
227 | float hardtanh_min,
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:238:11: warning: Value stored to 'in_features' during its initialization is never read [clang-analyzer-deadcode.DeadStores]
238 | int64_t in_features = x.size(1);
| ^~~~~~~~~~~ ~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250208_optimize_b5_s4_e1_sweep/level_2/task_30/b3_s1_sync_reduction_optim/base/base.cu:238:11: note: Value stored to 'in_features' during its initialization is never read
238 | int64_t in_features = x.size(1);
| ^~~~~~~~~~~ ~~~~~~~~~