11 | __global__ void cosine_similarity_loss_kernel_constant(const int N, const int D, float* output) {
| ^~~~~~~~~~~~~~~~~~~~~~~~
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:11:66: note: the first parameter in the range is 'N'
11 | __global__ void cosine_similarity_loss_kernel_constant(const int N, const int D, float* output) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:11:79: note: the last parameter in the range is 'D'
11 | __global__ void cosine_similarity_loss_kernel_constant(const int N, const int D, float* output) {
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:14:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
14 | int row = blockIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:15:15: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
15 | int tid = threadIdx.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:16:21: warning: narrowing conversion from 'unsigned int' to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
16 | int blockSize = blockDim.x;
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:88:60: warning: the parameter 'predictions' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
88 | torch::Tensor cosine_similarity_loss_forward(torch::Tensor predictions, torch::Tensor targets) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:88:87: warning: the parameter 'targets' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
88 | torch::Tensor cosine_similarity_loss_forward(torch::Tensor predictions, torch::Tensor targets) {
| ^
| const &
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:95:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
95 | int N = predictions.size(0);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:96:13: warning: narrowing conversion from 'int64_t' (aka 'long') to signed type 'int' is implementation-defined [bugprone-narrowing-conversions]
96 | int D = predictions.size(1);
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:101:74: warning: performing an implicit widening conversion to type 'unsigned long' of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
101 | cudaMemcpyToSymbol(const_predictions, predictions.data_ptr<float>(), N * D * sizeof(float));
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:101:74: note: make conversion explicit to silence this warning
5 | cudaMemcpyToSymbol(const_predictions, predictions.data_ptr<float>(), N * D * sizeof(float));
| ^~~~~
| static_cast<unsigned long>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:101:74: note: perform multiplication in a wider type
101 | cudaMemcpyToSymbol(const_predictions, predictions.data_ptr<float>(), N * D * sizeof(float));
| ^
| static_cast<long>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:102:66: warning: performing an implicit widening conversion to type 'unsigned long' of a multiplication performed in type 'int' [bugprone-implicit-widening-of-multiplication-result]
102 | cudaMemcpyToSymbol(const_targets, targets.data_ptr<float>(), N * D * sizeof(float));
| ^
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:102:66: note: make conversion explicit to silence this warning
102 | cudaMemcpyToSymbol(const_targets, targets.data_ptr<float>(), N * D * sizeof(float));
| ^~~~~
| static_cast<unsigned long>( )
/home/robert_sakana_ai/llm_cuda/experiments/20250212_optimize_b5_s4_e1_v2/level_1/task_97/b5_s1_97_cosine_similarity_loss_constant_memory/base/base.cu:102:66: note: perform multiplication in a wider type
102 | cudaMemcpyToSymbol(const_targets, targets.data_ptr<float>(), N * D * sizeof(float));
| ^
| static_cast<long>( )