enable cjit launcher

Signed-off-by: Jinjie Liu <jjliu@baai.ac.cn>
2026-07-01 08:51:56 +08:00 · 2026-01-31 10:36:41 +08:00
parent f0739b9dca
commit ac7497b2c8
7 changed files with 180 additions and 38 deletions
@@ -1,9 +1,15 @@
 from __future__ import annotations

-from typing import Any, Callable, Final, List, Sequence, Type, Union
+from functools import cached_property
+import os
+from typing import Any, Callable, Final, List, Sequence, Type
+
+import jinja2
 from triton.backends.nvidia.driver import CudaDriver
 from triton.runtime import _allocation
-from . import TVMFFILauncherImpl, utils, string_to_type
+import tvm_ffi
+
+from . import TVMFFILauncherImpl, utils, string_to_type, type_to_ctype


 class TVMLauncher(object):
@@ -18,27 +24,53 @@ class TVMLauncher(object):
        self.profile_scratch_align: Final[int] = metadata.profile_scratch_align
        self.launch_cooperative_grid: Final[bool] = metadata.launch_cooperative_grid
        self.launch_pdl: Final[bool] = metadata.launch_pdl
-        self.impl: TVMFFILauncherImpl = TVMFFILauncherImpl(
-            [string_to_type(t) for t in self.signature],
-            self.launch_cooperative_grid,
-            self.launch_pdl,
+        self.enable_jit: Final[bool] = (
+            os.getenv("TRITON_TVM_FFI_ENABLE_JIT", None) is not None
        )
-        self.launch: Callable[
-            [
-                int,
-                int,
-                int,
-                int,
-                int,
-                tuple[int, int, int],
-                object,
-                object,
-                object,
-                object,
-                object,
-                Sequence[Union[Any]],
-            ]
-        ] = self.impl.launch
+        if self.enable_jit:
+            mod = tvm_ffi.cpp.load_inline(
+                "launch",
+                cpp_sources=self.codegen,
+                extra_ldflags=["-Wl,--no-as-needed", "-lcuda"],
+                extra_include_paths=[
+                    f"{tvm_ffi.cpp.extension._find_cuda_home()}/include"
+                ],
+            )
+            launch = mod.get_function("launch")
+            self.launch = launch
+        else:
+            self.impl: TVMFFILauncherImpl = TVMFFILauncherImpl(
+                [string_to_type(t) for t in self.signature],
+                self.launch_cooperative_grid,
+                self.launch_pdl,
+            )
+            self.launch = (
+                lambda grid_x,
+                grid_y,
+                grid_z,
+                stream,
+                function,
+                kernel_metadata,
+                launch_metadata,
+                launch_enter_hook,
+                launch_exit_hook,
+                global_scratch,
+                profile_scratch,
+                *args: self.impl.launch(
+                    grid_x,
+                    grid_y,
+                    grid_z,
+                    stream,
+                    function,
+                    kernel_metadata,
+                    launch_metadata,
+                    launch_enter_hook,
+                    launch_exit_hook,
+                    global_scratch,
+                    profile_scratch,
+                    args,
+                )
+            )

    def __call__(
        self,
@@ -72,22 +104,51 @@ class TVMLauncher(object):
        assert not self.launch_cooperative_grid
        assert not self.launch_pdl

-        # args: Sequence[TypedValue] = TypedValue.make_typed_values(self.signature, args)
+        if self.enable_jit:
+            (num_warps, num_ctas, shared_memory) = kernel_metadata
+            return self.launch(
+                gridX,
+                gridY,
+                gridZ,
+                stream,
+                function,
+                num_warps,
+                num_ctas,
+                shared_memory,
+                *args,
+            )
+        else:
+            return self.launch(
+                gridX,
+                gridY,
+                gridZ,
+                stream,
+                function,
+                kernel_metadata,
+                launch_metadata,
+                launch_enter_hook,
+                launch_exit_hook,
+                global_scratch,
+                profile_scratch,
+                *args,
+            )

-        return self.impl.launch(
-            gridX,
-            gridY,
-            gridZ,
-            stream,
-            function,
-            kernel_metadata,
-            launch_metadata,
-            launch_enter_hook,
-            launch_exit_hook,
-            global_scratch,
-            profile_scratch,
-            args,
+    @cached_property
+    def codegen(self) -> str:
+        env: Final[jinja2.Environment] = jinja2.Environment(
+            loader=jinja2.PackageLoader("triton_tvm_ffi", "templates"),
+            trim_blocks=True,
+            lstrip_blocks=True,
        )
+        template = env.get_template("launch.c.j2")
+        signature = list(
+            filter(
+                lambda t: t != "void",
+                map(lambda t: type_to_ctype(string_to_type(t)), self.signature),
+            )
+        )
+        html = template.render(signature=signature)
+        return html


 class TVMFFIDriver(CudaDriver):