[microNPU][ETHOSU] Fix LUT size for int16 activations (#16680)

When passing the look-up table values to the TE graph, the table size value for int8 type was used, now the required value is set depending on the type of input data
apache · Mar 11, 2024 · 254e90a · 254e90a
1 parent 596db03
commit 254e90a
Show file tree

Hide file tree

Showing 7 changed files with 55 additions and 9 deletions.
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
@@ -22,7 +22,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices
+from .common import get_layout_transform_matrices, get_lut_expr
 
 
 def binary_elementwise_compute(
@@ -180,7 +180,7 @@ def binary_elementwise_compute(
 
     has_lut = activation in ("TANH", "LUT", "SIGMOID")
     # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0
+    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
 
     # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
     if has_lut:

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/common.py b/python/tvm/relay/backend/contrib/ethosu/te/common.py
@@ -61,3 +61,29 @@ def get_layout_transform_matrices(ofm_channels: int) -> Tuple[List[List[float]],
     ]
 
     return nhwc_to_nhcwb16, nhcwb16_to_nhwc
+
+
+def get_lut_expr(lut, ifm_dtype):
+    """Get the LUT expression to pass it to the TE graph.
+    For information about the LUT see
+    https://developer.arm.com/documentation/102420/0200/Functional-description/Functional-blocks-/Output-unit/tanh--sigmoid--and-LUT
+
+    Parameters
+    ----------
+    lut : te.Tensor
+        The look-up table values.
+    ifm_dtype : str
+        The type of Input Feature Map tensor (IFM).
+
+    Returns
+    -------
+    lut_expr : tvm.tir.expr.Cast
+        The LUT expression to pass it to the TE graph
+    """
+    assert ifm_dtype in ["int8", "int16"]
+    if ifm_dtype == "int8":
+        assert lut.shape[0] == 256
+    if ifm_dtype == "int16":
+        assert lut.shape[0] == 512
+    lut_expr = (lut[0] + lut[lut.shape[0] - 1]).astype(ifm_dtype)
+    return lut_expr
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
@@ -23,7 +23,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices
+from .common import get_layout_transform_matrices, get_lut_expr
 
 
 def conv2d_compute(
@@ -155,7 +155,7 @@ def conv2d_compute(
     has_lut = activation in ("TANH", "LUT", "SIGMOID")
 
     # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0
+    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
 
     # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
     if has_lut:

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
@@ -23,7 +23,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices
+from .common import get_layout_transform_matrices, get_lut_expr
 
 
 def depthwise_conv2d_compute(
@@ -147,7 +147,7 @@ def depthwise_conv2d_compute(
     has_lut = activation in ("TANH", "LUT", "SIGMOID")
 
     # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0
+    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
 
     # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
     if has_lut:

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/identity.py b/python/tvm/relay/backend/contrib/ethosu/te/identity.py
@@ -20,6 +20,7 @@
 from tvm import te
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
+from .common import get_lut_expr
 from .dma import read_compute, write_compute
 
 
@@ -72,7 +73,7 @@ def identity_compute(
     has_lut = activation in ("TANH", "LUT", "SIGMOID")
 
     # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0
+    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
 
     # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
     if has_lut:

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py b/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
@@ -23,7 +23,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
-from .common import get_layout_transform_matrices
+from .common import get_layout_transform_matrices, get_lut_expr
 
 
 def pooling_compute(
@@ -147,7 +147,7 @@ def pooling_compute(
     has_lut = activation in ("TANH", "LUT", "SIGMOID")
 
     # This is a trick to insert the LUT tensor into the TE graph if LUT is present
-    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0
+    lut_expr = get_lut_expr(lut, ifm.dtype) if has_lut else 0
 
     # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
     if has_lut:

diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py
@@ -178,6 +178,25 @@ def test_copy_luts():
     assert ".local" in sch.stages[10].op.name
 
 
+# This test makes sure that LUT have a correct size
+@pytest.mark.parametrize("dtype,lut_size", [["int8", 256], ["int16", 512]])
+def test_lut_size(dtype, lut_size):
+    ifm_shape = (1, 2, 4, 8)
+    ifm = relay.var("IFM", shape=ifm_shape, dtype=dtype)
+    lut = relay.const([i for i in range(lut_size)], dtype=dtype)
+    identity = make_ethosu_identity(ifm, lut=lut, activation="TANH")
+    func = relay.Function(relay.analysis.free_vars(identity), identity)
+    func = run_opt_pass(func, relay.transform.InferType())
+
+    func, const_dict = extract_constants(func)
+    te_graph = lower_to_te(func)
+
+    sch = te.create_schedule([te_graph.outputs[0].op])
+    copy_luts()(te_graph, const_dict, sch)
+
+    assert sch.stages[3].all_iter_vars[0].dom == tvm.ir.expr.Range(0, lut_size)
+
+
 def test_schedule_cache_reads():
     a = te.placeholder((12, 12), dtype="uint8", name="a")
     b = te.placeholder((12, 12), dtype="uint8", name="b")