neuralmagic · mgoin · Mar 29, 2024 · Feb 27, 2024 · Feb 27, 2024 · Mar 5, 2024
diff --git a/...ions/huggingface-transformers/tutorials/sparse-transfer-learning-bert-python.md b/...ions/huggingface-transformers/tutorials/sparse-transfer-learning-bert-python.md
@@ -77,7 +77,7 @@ With the models downloaded, we will set up the Hugging Face `tokenizer`, `config
 We instantiate these classes by passing the local path to the directory containing the `pytorch_model.bin`, `tokenizer.json`, and `config.json` files from the SparseZoo download.
 
 ```python
-from sparseml.transformers.utils import SparseAutoModel
+from sparseml.transformers import SparseAutoModel
 from transformers import AutoModelForSequenceClassification, AutoConfig, AutoTokenizer
 
 NUM_LABELS = 2

diff --git a/setup.py b/setup.py
@@ -79,8 +79,7 @@
     "opencv-python<=4.6.0.66",
 ]
 _transformers_deps = _pytorch_deps + [
-    f"{'nm-transformers' if is_release else 'nm-transformers-nightly'}"
-    f"~={version_nm_deps}",
+    "transformers<3.35 ",
     "datasets<=2.14.6",
     "dvc",
     "scikit-learn",

diff --git a/src/sparseml/evaluation/integrations/perplexity.py b/src/sparseml/evaluation/integrations/perplexity.py
@@ -14,8 +14,7 @@
 
 from typing import List, Optional, Union
 
-from sparseml.transformers.utils.sparse_model import SparseAutoModelForCausalLM
-from sparseml.transformers.utils.sparse_tokenizer import SparseAutoTokenizer
+from sparseml.transformers import SparseAutoModelForCausalLM, SparseAutoTokenizer
 
 
 try:

diff --git a/src/sparseml/export/validators.py b/src/sparseml/export/validators.py
@@ -17,9 +17,12 @@
 import os.path
 from collections import OrderedDict
 from pathlib import Path
-from typing import Callable, List, Optional, Union
+from typing import Callable, List, Optional
+from typing import OrderedDict as OrderedDictType
+from typing import Union
 
 import numpy
+import onnx
 
 from sparseml.export.export_data import InputsNames, LabelNames, OutputsNames
 from sparseml.export.helpers import ONNX_MODEL_NAME, onnx_data_files
@@ -164,8 +167,11 @@ def validate_correctness(
 
     sample_inputs_files = sorted(glob.glob(os.path.join(sample_inputs_path, "*")))
     sample_outputs_files = sorted(glob.glob(os.path.join(sample_outputs_path, "*")))
-
-    session = ort.InferenceSession(os.path.join(directory, onnx_model_name))
+    model_path = os.path.join(directory, onnx_model_name)
+    expected_input_names = [
+        inp.name for inp in onnx.load(model_path, load_external_data=False).graph.input
+    ]
+    session = ort.InferenceSession(model_path)
 
     validations = (
         []
@@ -180,6 +186,11 @@ def validate_correctness(
         sample_input_with_batch_dim = OrderedDict(
             (key, numpy.expand_dims(value, 0)) for key, value in sample_input.items()
         )
+
+        sample_input_with_batch_dim = _potentially_rename_input(
+            sample_input_with_batch_dim, expected_input_names
+        )
+
         outputs = session.run(None, sample_input_with_batch_dim)
         if isinstance(outputs, list):
             validations_sample = []
@@ -205,3 +216,17 @@ def validate_correctness(
         f"Successfully validated the exported model on all {len(validations)} samples."
     )
     return True
+
+
+def _potentially_rename_input(
+    sample_input_with_batch_dim: OrderedDictType[str, numpy.ndarray],
+    expected_input_names: List[str],
+) -> OrderedDictType[str, numpy.ndarray]:
+    # if required, rename the input names of the sample to match
+    # the input names of the model
+    input_names = list(sample_input_with_batch_dim.keys())
+    if set(input_names) != set(expected_input_names):
+        return OrderedDict(
+            zip(expected_input_names, sample_input_with_batch_dim.values())
+        )
+    return sample_input_with_batch_dim
diff --git a/src/sparseml/pytorch/sparsification/pruning/modifier_pruning_layer.py b/src/sparseml/pytorch/sparsification/pruning/modifier_pruning_layer.py
@@ -30,7 +30,7 @@
     ScheduledModifier,
     ScheduledUpdateModifier,
 )
-from sparseml.pytorch.utils import get_layer, get_prunable_layers, replace_layer
+from sparseml.pytorch.utils import get_layer, get_prunable_layers, swap_modules
 from sparseml.pytorch.utils.logger import BaseLogger
 from sparseml.sparsification import SparsificationTypes
 from sparseml.utils import ALL_PRUNABLE_TOKEN, ALL_TOKEN, validate_str_iterable
@@ -219,11 +219,11 @@ def _check_update_pruning(self, module: Module, epoch: float, steps_per_epoch: i
             epoch >= self.start_epoch or self.start_epoch == -1
         ):
             for name in list(self._layer_modules.keys()):
-                self._layer_modules[name] = replace_layer(module, name, Identity())
+                self._layer_modules[name] = swap_modules(module, name, Identity())
             self._layers_replaced = True
 
         if self._layers_replaced and (epoch >= self.end_epoch and self.end_epoch != -1):
             for name, replaced in self._layer_modules.items():
-                replace_layer(module, name, replaced)
+                swap_modules(module, name, replaced)
                 self._layer_modules[name] = None
             self._layers_replaced = False
diff --git a/src/sparseml/pytorch/utils/helpers.py b/src/sparseml/pytorch/utils/helpers.py
@@ -85,12 +85,12 @@
     "tensor_sample",
     "mask_difference",
     "get_layer",
-    "replace_layer",
     "get_terminal_layers",
     "get_conv_layers",
     "get_linear_layers",
     "get_prunable_layers",
     "get_quantizable_layers",
+    "swap_modules",
     "get_named_layers_and_params_by_regex",
     "any_str_or_regex_matches_param_name",
     "NamedLayerParam",
@@ -725,31 +725,6 @@ def get_layer(name: str, module: Module) -> Module:
     return layer
 
 
-def replace_layer(
-    module: Module,
-    name: str,
-    replace: Module,
-) -> Module:
-    """
-    General function to replace a layer in a module with the given new one.
-
-    :param module: the module to replace the layer in
-    :param name: the name of the layer to replace the activation for
-    :param replace: the module to replace the layer with
-    :return: the original layer that was replaced
-    """
-    parent = module
-    sections = name.split(".")
-
-    for sec in sections[:-1]:
-        parent = parent.__getattr__(sec)
-
-    cur = parent.__getattr__(sections[-1])
-    parent.__setattr__(sections[-1], replace)
-
-    return cur
-
-
 def get_terminal_layers(module: Module) -> Dict[str, Module]:
     """
     :param module: the module to grab all terminal layers for
@@ -1248,3 +1223,38 @@ def _exe_input(_, inp, out):
     for h in handles:
         h.remove()
     return order
+
+
+def swap_modules(
+    module: torch.nn.Module, submodule_name: str, submodule_to_replace: torch.nn.Module
+) -> torch.nn.Module:
+    """
+    Iteratively unfold the submodules of the module according to the submodule_name
+    to eventually replace the leaf submodule (accessed from the module through the
+    submodule_name) with the submodule_to_replace.
+
+    E.g
+    ```
+    swap_modules(module=Model,
+                 module_name="layers.0.sublayer",
+                 module_to_replace=ReplaceModule
+                 )
+    ```
+    this will iteratively traverse through the submodules
+    'layers' -> '0' -> to eventually replace 'sublayer' with ReplaceModule
+
+    :param module: the module to replace with the module_to_replace
+    :param submodule_name: the name of the module to replace
+    :param submodule_to_replace: the module to replace the module with
+    :return: the replaced module
+    """
+    parent = module
+    sections = submodule_name.split(".")
+
+    for sec in sections[:-1]:
+        parent = parent.__getattr__(sec)
+
+    cur = parent.__getattr__(sections[-1])
+    parent.__setattr__(sections[-1], submodule_to_replace)
+
+    return cur
diff --git a/src/sparseml/transformers/__init__.py b/src/sparseml/transformers/__init__.py
@@ -17,46 +17,21 @@
 """
 
 # flake8: noqa
-
-import logging as _logging
-
 from sparseml.analytics import sparseml_analytics as _analytics
+from sparseml.transformers.base import check_transformers_install
 
-
-try:
-    import datasets as _datasets
-    import transformers as _transformers
-except ImportError:
-    raise ImportError("Please install sparseml[transformers] to use this pathway")
-
-
+check_transformers_install()
 _analytics.send_event("python__transformers__init")
 
 
-_LOGGER = _logging.getLogger(__name__)
-
-
-def _check_transformers_install():
-    # check for NM integration in transformers version
-    import transformers as _transformers
-
-    if not getattr(_transformers, "NM_INTEGRATED", False):
-        message = (
-            "****************************************************************\n"
-            "WARNING: It appears that the Neural Magic fork of Transformers is not installed!\n"
-            "This is CRITICAL for the proper application of quantization in SparseML flows.\n\n"
-            "To resolve this, please run: `pip uninstall transformers;pip install nm-transformers`\n"
-            "Failing to do so is UNSUPPORTED and may significantly affect model performance.\n"
-            "****************************************************************"
-        )
-        _LOGGER.warning(message)
-
-
-_check_transformers_install()
-
 # isort: skip_file
 # (import order matters for circular import avoidance)
 from .utils import *
+from .sparsification import (
+    SparseAutoModel,
+    SparseAutoModelForCausalLM,
+    SparseAutoConfig,
+    SparseAutoTokenizer,
+)
 from .export import *
 from .finetune import *
-from .compression import *
diff --git a/src/sparseml/transformers/base.py b/src/sparseml/transformers/base.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def check_transformers_install():
+    try:
+        import transformers  # noqa F401
+    except ImportError as transformers_err:
+        _LOGGER.warning(
+            "transformers dependency is not installed. "
+            "To install, run `pip sparseml[transformers]`"
+        )
+        raise transformers_err
diff --git a/src/sparseml/transformers/export.py b/src/sparseml/transformers/export.py
@@ -88,9 +88,8 @@
 from sparseml.pytorch.opset import TORCH_DEFAULT_ONNX_OPSET
 from sparseml.pytorch.optim import ScheduledModifierManager
 from sparseml.pytorch.utils import export_onnx
-from sparseml.transformers import SparseAutoTokenizer
+from sparseml.transformers import SparseAutoModel, SparseAutoTokenizer
 from sparseml.transformers.sparsification import Trainer
-from sparseml.transformers.utils import SparseAutoModel
 from sparsezoo.utils.onnx import EXTERNAL_ONNX_DATA_NAME
 
 

diff --git a/src/sparseml/transformers/finetune/text_generation.py b/src/sparseml/transformers/finetune/text_generation.py
@@ -40,7 +40,10 @@
 from sparseml.transformers.finetune.runner import StageRunner
 from sparseml.transformers.finetune.trainer import Trainer
 from sparseml.transformers.finetune.training_args import TrainingArguments
-from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
+from sparseml.transformers.sparsification.sparse_model import (
+    SparseAutoModel,
+    get_shared_tokenizer_src,
+)
 from sparseml.transformers.utils.helpers import detect_last_checkpoint
 
 

diff --git a/src/sparseml/transformers/masked_language_modeling.py b/src/sparseml/transformers/masked_language_modeling.py
@@ -54,8 +54,12 @@
 from transformers.utils.versions import require_version
 
 from sparseml.pytorch.utils.distributed import record
-from sparseml.transformers.sparsification import Trainer, TrainingArguments
-from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
+from sparseml.transformers.sparsification import (
+    SparseAutoModel,
+    Trainer,
+    TrainingArguments,
+)
+from sparseml.transformers.sparsification.sparse_model import get_shared_tokenizer_src
 
 
 metadata_args = [

diff --git a/src/sparseml/transformers/modify/__init__.py b/src/sparseml/transformers/modify/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/sparseml/transformers/question_answering.py b/src/sparseml/transformers/question_answering.py
@@ -47,10 +47,11 @@
 from sparseml.pytorch.utils.distributed import record
 from sparseml.transformers.sparsification import (
     QuestionAnsweringTrainer,
+    SparseAutoModel,
     TrainingArguments,
     postprocess_qa_predictions,
 )
-from sparseml.transformers.utils import SparseAutoModel, get_shared_tokenizer_src
+from sparseml.transformers.sparsification.sparse_model import get_shared_tokenizer_src
 
 
 # You can also adapt this script on your own question answering task.

diff --git a/src/sparseml/transformers/sparsification/__init__.py b/src/sparseml/transformers/sparsification/__init__.py
@@ -20,5 +20,8 @@
 # flake8: noqa
 
 from .question_answering import *
+from .sparse_config import *
+from .sparse_model import *
+from .sparse_tokenizer import *
 from .trainer import *
 from .training_args import *
diff --git a/src/sparseml/transformers/sparsification/modification/__init__.py b/src/sparseml/transformers/sparsification/modification/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .modify_model import modify_model
+from .modifying_bert import *
+from .modifying_distilbert import *
+from .modifying_llama import *
+from .modifying_mistral import *
+from .modifying_mobilebert import *
+from .modifying_opt import *