[Dlight] Scheduling Low batch GEMM using GEMV-like rule (#16579)

* low batch * fix * fix lint * do dequantize only once * change default * add test * fix lint * fix lint
apache · Feb 21, 2024 · ff0b99c · ff0b99c
1 parent bd79374
commit ff0b99c
Show file tree

Hide file tree

Showing 5 changed files with 876 additions and 3 deletions.
diff --git a/python/tvm/dlight/gpu/__init__.py b/python/tvm/dlight/gpu/__init__.py
@@ -19,6 +19,7 @@
 For CUDA/ROCm/Vulkan/Metal-specific rules, use `tvm.dlight.cuda/rocm/vulkan/metal` instead
 """
 from .gemv import GEMV
+from .low_batch_gemv import LowBatchGEMV
 from .fallback import Fallback
 from .matmul import Matmul
 from .reduction import Reduction