← Back to Leaderboard

The AI CUDA Engineer 👷

31_ELU31_ELU

Level 1 • Task 31
import torch
import torch.nn as nn
import torch.nn.functional as F


def module_fn(x: torch.Tensor, alpha: float) -> torch.Tensor:
    """
    Applies ELU activation to the input tensor.

    Args:
        x (torch.Tensor): Input tensor of any shape.
        alpha (float): The alpha parameter for the ELU function.

    Returns:
        torch.Tensor: Output tensor with ELU applied, same shape as input.
    """
    return F.elu(x, alpha=alpha)


class Model(nn.Module):
    """
    Simple model that performs an ELU activation.
    """

    def __init__(self, alpha):
        """
        Initializes the ELU model.

        Args:
            alpha (float): The alpha parameter for the ELU function.
        """
        super(Model, self).__init__()
        self.alpha = alpha

    def forward(self, x: torch.Tensor, fn=module_fn) -> torch.Tensor:
        """
        Applies ELU activation to the input tensor.

        Args:
            x (torch.Tensor): Input tensor of any shape.

        Returns:
            torch.Tensor: Output tensor with ELU applied, same shape as input.
        """
        return fn(x, self.alpha)


batch_size = 16
dim = 16384
alpha = 1.0


def get_inputs():
    x = torch.randn(batch_size, dim)
    return [x]


def get_init_inputs():
    return [alpha]
import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    """
    Simple model that performs an ELU activation.
    """
    def __init__(self, alpha: float = 1.0):
        """
        Initializes the ELU model.

        Args:
            alpha (float, optional): The alpha parameter for the ELU function. Defaults to 1.0.
        """
        super(Model, self).__init__()
        self.alpha = alpha
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Applies ELU activation to the input tensor.

        Args:
            x (torch.Tensor): Input tensor of any shape.

        Returns:
            torch.Tensor: Output tensor with ELU applied, same shape as input.
        """
        return F.elu(x, alpha=self.alpha)

batch_size = 16
dim = 16384

def get_inputs():
    x = torch.randn(batch_size, dim)
    return [x]

def get_init_inputs():
    return [1.0]  # Provide alpha value for initialization

Kernel Information

Related Kernels (Level 1, Task 31 • 31_ELU)

Rank Kernel Name Runtime (ms) Speedup Native Speedup Compile
🥇 31_elu_shared_base 0.01 1.14 4.80
🥇 hybrid_elu_optimized_base 0.01 1.14 4.80
🥇 31_elu_vectorized_base 0.01 1.14 4.80
🥇 vec_shared_elu_base 0.01 1.14 4.80
🥇 31_elu_grid_stride_base_base 0.01 1.14 4.80
🥇 31_elu_vectorized_edit_1 0.01 1.14 4.80
🥇 elu_unroll_kernel_base 0.01 1.14 4.80
🥇 ldg_elu_128_base 0.01 1.14 4.80
9 31_ELU 0.01 0.97 4.12
9 31_elu_aligned_coalesced_base 0.01 0.97 4.12
9 hybrid_elu_base 0.01 0.97 4.12
9 31_elu_optimized_indexing_base 0.01 0.97 4.12
9 31_elu_reduced_divergence_base 0.01 0.97 4.12
9 elu_hybrid_base 0.01 0.97 4.12
9 31_elu_coalesced_base 0.01 0.97 4.12
9 31_elu_shared_mem_base 0.01 0.97 4.12
9 modular_elu_base 0.01 0.97 4.12
9 elu_vec4_shared_base 0.01 0.97 4.12
9 elu_tuned_blocksize_base 0.01 0.97 4.12
9 branchless_elu_vectorized_base 0.01 0.97 4.12
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <math.h>

#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)

__global__ void elu_kernel(const float* x, float* out, float alpha, int n) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < n) {
        out[idx] = (x[idx] > 0) ? x[idx] : alpha * (expf(x[idx]) - 1);
    }
}

torch::Tensor elu_cuda(torch::Tensor x, float alpha) {
    CHECK_INPUT(x);

    auto out = torch::empty_like(x);
    int n = x.numel();

    const int threads = 256;
    const int blocks = (n + threads - 1) / threads;

    elu_kernel<<<blocks, threads>>>(x.data_ptr<float>(), out.data_ptr<float>(), alpha, n);

    return out;
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
    m.def("forward", &elu_cuda, "ELU activation (CUDA)");
}