From 9e68531ac4846f6740bcd6d78f0c2fb6c0bb77d7 Mon Sep 17 00:00:00 2001
From: Jack Zhang <dvorjackz@gmail.com>
Date: Tue, 17 Dec 2024 21:37:55 -0800
Subject: [PATCH] Only add pass when vision model

---
 examples/models/llama/export_llama_lib.py     | 12 ++++++++--
 .../models/llama3_2_vision/runner/native.py   |  2 --
 exir/emit/_emitter.py                         |  1 -
 exir/passes/init_mutable_buffer_pass.py       | 21 ------------------
 exir/program/_program.py                      |  2 --
 extension/llm/export/builder.py               | 22 +++++++++++--------
 runtime/executor/method.cpp                   |  5 -----
 7 files changed, 23 insertions(+), 42 deletions(-)
 delete mode 100644 exir/passes/init_mutable_buffer_pass.py

diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index ea4296cc52..65bc8991a8 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -23,6 +23,9 @@
 import torch
 
 from executorch.devtools.etrecord import generate_etrecord
+from executorch.exir.passes.cache_pos_init_mutable_pass import (
+    CachePosToInitializedMutableBufferPass,
+)
 
 from executorch.extension.llm.export.builder import DType, LLMEdgeManager
 
@@ -760,6 +763,9 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
     for partitioner in partitioners:
         logging.info(f"--> {partitioner.__class__.__name__}")
 
+    additional_passes = []
+    if args.model in TORCHTUNE_DEFINED_MODELS:
+        additional_passes = [CachePosToInitializedMutableBufferPass()]
     if args.generate_etrecord:
         if not builder_exported_to_edge.edge_manager:
             raise ValueError("Unable to generate etrecord due to missing edge manager.")
@@ -774,7 +780,9 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
             # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`.
             canonicalize_program(builder.edge_manager.exported_program())
 
-        builder = builder.to_executorch()
+        builder = builder.to_executorch(
+            passes=additional_passes,
+        )
 
         # Generate ETRecord
         if edge_manager_copy:
@@ -792,7 +800,7 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
             # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`.
             canonicalize_program(builder.edge_manager.exported_program())
 
-        builder = builder.to_executorch()
+        builder = builder.to_executorch(passes=additional_passes)
 
     if args.profile_memory:
         generate_memory_trace(builder.export_program, "memory_profile.json")
diff --git a/examples/models/llama3_2_vision/runner/native.py b/examples/models/llama3_2_vision/runner/native.py
index 8180f1abbf..105ddf2054 100644
--- a/examples/models/llama3_2_vision/runner/native.py
+++ b/examples/models/llama3_2_vision/runner/native.py
@@ -19,7 +19,6 @@
 )
 
 from executorch.extension.pybindings.portable_lib import (
-    _load_for_executorch,
     _load_for_executorch_from_buffer,
 )
 
@@ -50,7 +49,6 @@ def __init__(self, args):
         with open(args.pte, "rb") as f:
             self.model_bytes = f.read()
             self.model = _load_for_executorch_from_buffer(self.model_bytes)
-        # self.model = _load_for_executorch(args.pte)
         self.use_kv_cache = args.kv_cache
 
     def forward(
diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py
index 2ee6bb60b6..119fee3cc6 100644
--- a/exir/emit/_emitter.py
+++ b/exir/emit/_emitter.py
@@ -1607,7 +1607,6 @@ def placeholder(
 
         if isinstance(target, str) and isinstance(spec, TensorSpec):
             fqn, is_mutable_buffer = self._find_fqn_for_placeholder(target, spec)
-            print(f"fqn: {fqn}, is_mutable_buffer: {is_mutable_buffer}")
 
             # If the placeholder has a constant_tag, it is external to the PTE file
             # and requires a fqn and location=TensorDataLocation.EXTERNAL
diff --git a/exir/passes/init_mutable_buffer_pass.py b/exir/passes/init_mutable_buffer_pass.py
deleted file mode 100644
index 688410cc2f..0000000000
--- a/exir/passes/init_mutable_buffer_pass.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-
-from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue
-from executorch.exir.passes.spec_prop_pass import make_spec
-
-
-class InitMutableBufferPass(ExportPass):
-    def __init__(self) -> None:
-        super().__init__()
-
-    def placeholder(self, name: str, arg, meta):
-        if "cache_pos" in name:
-            meta["et_init_buffer"] = True
-
-        return super().placeholder(name, arg, meta)
diff --git a/exir/program/_program.py b/exir/program/_program.py
index e6247231f0..fd1d0aca3d 100644
--- a/exir/program/_program.py
+++ b/exir/program/_program.py
@@ -34,7 +34,6 @@
     OpReplacePass,
 )
 from executorch.exir.passes.external_constants_pass import external_constants_pass
-from executorch.exir.passes.init_mutable_buffer_pass import InitMutableBufferPass
 from executorch.exir.passes.insert_write_back_for_buffers_pass import (
     insert_write_back_for_buffers_pass,
 )
@@ -707,7 +706,6 @@ def edge_to_executorch_passes(
     passes: List[PassType] = [
         *config.passes,
         SpecPropPass(),
-        InitMutableBufferPass(),
         # ExecuTorch backend ops are unable to handle unbacked symints. So after
         # this pass, passes cannot be Interpreter-based, because it will fail if
         # there exists an unbacked symint operation.
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
index 8bb98ebeae..619d9782a7 100644
--- a/extension/llm/export/builder.py
+++ b/extension/llm/export/builder.py
@@ -25,6 +25,7 @@
 from executorch.exir.backend.utils import format_delegated_graph
 from executorch.exir.capture._config import EdgeCompileConfig, ExecutorchBackendConfig
 
+from executorch.exir.pass_manager import PassType
 from executorch.exir.passes import MemoryPlanningPass
 from executorch.exir.passes.quant_fusion_pass import QuantFusionPass
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
@@ -395,26 +396,29 @@ def to_backend(self, partitioners: Optional[List[Partitioner]]) -> "LLMEdgeManag
 
         return self
 
-    def to_executorch(self) -> "LLMEdgeManager":
+    def to_executorch(self, passes: Optional[List[PassType]]) -> "LLMEdgeManager":
         """
         Lower the model to executorch and get an ExecutorchProgram.
         """
         assert self.edge_manager, "Need to run export_to_edge() first"
+        to_executorch_passes = [
+            # If there are Linear operations left in the graph, let's execute
+            # them with the optimized op_linear rather than materializing a
+            # transpose followed by a regular op_mm.
+            ConvertToLinearPass(),
+            QuantFusionPass(),
+        ]
+        if passes:
+            to_executorch_passes.extend(passes)
+
         self.export_program = self.edge_manager.to_executorch(
             ExecutorchBackendConfig(
                 extract_delegate_segments=True,
-                passes=[
-                    # If there are Linear operations left in the graph, let's execute
-                    # them with the optimized op_linear rather than materializing a
-                    # transpose followed by a regular op_mm.
-                    ConvertToLinearPass(),
-                    QuantFusionPass(),
-                ],
+                passes=to_executorch_passes,
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
             )
         )
-        print(self.export_program.dump_executorch_program(verbose=True))
         logging.info(
             "Required memory for activation in bytes: {}".format(
                 self.export_program._emitter_output.program.execution_plan[
diff --git a/runtime/executor/method.cpp b/runtime/executor/method.cpp
index 72110879b3..b1094ed122 100644
--- a/runtime/executor/method.cpp
+++ b/runtime/executor/method.cpp
@@ -11,7 +11,6 @@
 #include <cinttypes> // @donotremove
 #include <cstdint>
 #include <cstdio>
-#include <iostream>
 
 #include <executorch/extension/evalue_util/print_evalue.h>
 #include <executorch/runtime/backend/interface.h>
@@ -1181,10 +1180,6 @@ Error Method::execute_instruction() {
   if (err == Error::Ok) {
     step_state_.instr_idx = next_instr_idx;
   }
-
-  // TODO: Print an EValue.
-  std::cout << "(" << values_[1] << " ) Printing kv_cache k_cache: " << executorch::extension::evalue_edge_items(9216) << values_[2] << std::endl;
-  
   return err;
 }