Skip to content

Commit

Permalink
Merge branch 'main' into test_update
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka authored Apr 9, 2024
2 parents d03bf24 + d636d35 commit 98e0313
Show file tree
Hide file tree
Showing 8 changed files with 339 additions and 18 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"GPUtil>=1.4.0",
"protobuf>=3.12.2,<=3.20.3",
"click>=7.1.2,!=8.0.0", # latest version < 8.0 + blocked version with reported bug
"clearml==1.14.4",
]
_nm_deps = [f"{'sparsezoo' if is_release else 'sparsezoo-nightly'}~={version_nm_deps}"]
_deepsparse_deps = [
Expand Down
105 changes: 105 additions & 0 deletions src/sparseml/pytorch/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,21 @@
wandb = None
wandb_err = err


try:
from clearml import Task

clearml_err = None
except Exception as err:
clearml = None
clearml_err = err

from sparseml.utils import ALL_TOKEN, create_dirs


__all__ = [
"BaseLogger",
"ClearMLLogger",
"LambdaLogger",
"PythonLogger",
"TensorBoardLogger",
Expand Down Expand Up @@ -628,6 +638,101 @@ def save(
return True


class ClearMLLogger(LambdaLogger):
@staticmethod
def available() -> bool:
"""
:return: True if wandb is available and installed, False, otherwise
"""
return not clearml_err

def __init__(
self,
name: str = "clearml",
enabled: bool = True,
project_name: str = "sparseml",
task_name: str = "",
):
if task_name == "":
now = datetime.now()
task_name = now.strftime("%d-%m-%Y_%H.%M.%S")

self.task = Task.init(project_name=project_name, task_name=task_name)

super().__init__(
lambda_func=self.log_scalar,
name=name,
enabled=enabled,
)

def log_hyperparams(
self,
params: Dict,
level: Optional[int] = None,
) -> bool:
"""
:param params: Each key-value pair in the dictionary is the name of the
hyper parameter and it's corresponding value.
:return: True if logged, False otherwise.
"""
if not self.enabled:
return False

self.task.connect(params)
return True

def log_scalar(
self,
tag: str,
value: float,
step: Optional[int] = None,
wall_time: Optional[float] = None,
level: Optional[int] = None,
) -> bool:
"""
:param tag: identifying tag to log the value with
:param value: value to save
:param step: global step for when the value was taken
:param wall_time: global wall time for when the value was taken,
defaults to time.time()
:param kwargs: additional logging arguments to support Python and custom loggers
:return: True if logged, False otherwise.
"""
logger = self.task.get_logger()
# each series is superimposed on the same plot on title
logger.report_scalar(
title=tag, series=str(level) or tag, value=value, iteration=step
)
return True

def log_scalars(
self,
tag: str,
values: Dict[str, float],
step: Optional[int] = None,
wall_time: Optional[float] = None,
level: Optional[int] = None,
) -> bool:
"""
:param tag: identifying tag to log the values with
:param values: values to save
:param step: global step for when the values were taken
:param wall_time: global wall time for when the values were taken,
defaults to time.time()
:param kwargs: additional logging arguments to support Python and custom loggers
:return: True if logged, False otherwise.
"""
for k, v in values.items():
self.log_scalar(
tag=f"{tag}.{k}",
value=v,
step=step,
wall_time=wall_time,
level=level,
)
return True


class SparsificationGroupLogger(BaseLogger):
"""
Modifier logger that handles outputting values to other supported systems.
Expand Down
4 changes: 2 additions & 2 deletions src/sparseml/transformers/sparsification/modification/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
__all__ = ["check_transformers_version"]

_TRANSFORMERS_MIN_VERSION = "4.39.0"
_TRANSFORMERS_MAX_VERSION = "4.39.2"
_TRANSFORMERS_MAX_VERSION = "4.39.3"


def check_transformers_version(
Expand Down Expand Up @@ -56,7 +56,7 @@ def check_transformers_version(
_LOGGER.warning(
"Attempting to modify the transformers model to support "
"the SparseML-specific functionalities. However, the detected "
f"transformers version ({current_version}) does not fall within the"
f"transformers version ({current_version}) does not fall within the "
f"supported version range ({min_version} - {max_version}). "
"This may lead to unexpected behavior. Please ensure that the "
"correct transformers version is installed."
Expand Down
16 changes: 4 additions & 12 deletions src/sparseml/transformers/sparsification/sparse_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@
modify_save_pretrained,
)
from sparseml.transformers.sparsification.modification import modify_model
from sparseml.transformers.utils.helpers import resolve_recipe
from sparseml.utils import download_zoo_training_dir
from sparseml.utils.fsdp.context import main_process_first_context
from sparseml.transformers.utils.helpers import download_model_directory, resolve_recipe


__all__ = ["SparseAutoModel", "SparseAutoModelForCausalLM", "get_shared_tokenizer_src"]
Expand Down Expand Up @@ -101,15 +99,9 @@ def skip(*args, **kwargs):
else pretrained_model_name_or_path
)

if pretrained_model_name_or_path.startswith("zoo:"):
_LOGGER.debug(
"Passed zoo stub to SparseAutoModelForCausalLM object. "
"Loading model from SparseZoo training files..."
)
with main_process_first_context():
pretrained_model_name_or_path = download_zoo_training_dir(
zoo_stub=pretrained_model_name_or_path
)
pretrained_model_name_or_path = download_model_directory(
pretrained_model_name_or_path, **kwargs
)

# determine compression format, if any, from the model config
compressor = infer_compressor_from_model_config(pretrained_model_name_or_path)
Expand Down
98 changes: 97 additions & 1 deletion src/sparseml/transformers/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import PaddingStrategy

from huggingface_hub import HUGGINGFACE_CO_URL_HOME, hf_hub_download
from huggingface_hub import HUGGINGFACE_CO_URL_HOME, HfFileSystem, hf_hub_download
from sparseml.export.helpers import ONNX_MODEL_NAME
from sparseml.utils import download_zoo_training_dir
from sparseml.utils.fsdp.context import main_process_first_context
from sparsezoo import Model, setup_model


Expand All @@ -52,6 +54,8 @@
"ALL_TASK_NAMES",
"create_fake_dataloader",
"POSSIBLE_TOKENIZER_FILES",
"download_repo_from_huggingface_hub",
"download_model_directory",
]


Expand Down Expand Up @@ -92,6 +96,7 @@ class TaskNames(Enum):
"special_tokens_map.json",
"tokenizer_config.json",
}
RELEVANT_HF_SUFFIXES = ["json", "md", "bin", "safetensors", "yaml", "yml", "py"]


def remove_past_key_value_support_from_config(config: AutoConfig) -> AutoConfig:
Expand Down Expand Up @@ -553,3 +558,94 @@ def fetch_recipe_path(target: str):
recipe_path = hf_hub_download(repo_id=target, filename=DEFAULT_RECIPE_NAME)

return recipe_path


def download_repo_from_huggingface_hub(repo_id, **kwargs):
"""
Download relevant model files from the Hugging Face Hub
using the huggingface_hub.hf_hub_download function
Note(s):
- Does not download the entire repo, only the relevant files
for the model, such as the model weights, tokenizer files, etc.
- Does not re-download files that already exist locally, unless
the force_download flag is set to True
:pre-condition: the repo_id must be a valid Hugging Face Hub repo id
:param repo_id: the repo id to download
:param kwargs: additional keyword arguments to pass to hf_hub_download
"""
hf_filesystem = HfFileSystem()
files = hf_filesystem.ls(repo_id)

if not files:
raise ValueError(f"Could not find any files in HF repo {repo_id}")

# All file(s) from hf_filesystem have "name" key
# Extract the file names from the files
relevant_file_names = (
Path(file["name"]).name
for file in files
if any(file["name"].endswith(suffix) for suffix in RELEVANT_HF_SUFFIXES)
)

hub_kwargs_names = (
"subfolder",
"repo_type",
"revision",
"library_name",
"library_version",
"cache_dir",
"local_dir",
"local_dir_use_symlinks",
"user_agent",
"force_download",
"force_filename",
"proxies",
"etag_timeout",
"resume_download",
"token",
"local_files_only",
"headers",
"legacy_cache_layout",
"endpoint",
)
hub_kwargs = {name: kwargs[name] for name in hub_kwargs_names if name in kwargs}

for file_name in relevant_file_names:
last_file = hf_hub_download(repo_id=repo_id, filename=file_name, **hub_kwargs)

# parent directory of the last file is the model directory
return str(Path(last_file).parent.resolve().absolute())


def download_model_directory(pretrained_model_name_or_path: str, **kwargs):
"""
Download the model directory from the HF hub or SparseZoo if the model
is not found locally
:param pretrained_model_name_or_path: the name of or path to the model to load
can be a SparseZoo/HuggingFace model stub
:param kwargs: additional keyword arguments to pass to the download function
:return: the path to the downloaded model directory
"""
pretrained_model_path: Path = Path(pretrained_model_name_or_path)

if pretrained_model_path.exists():
_LOGGER.debug(
"Model directory already exists locally.",
)
return pretrained_model_name_or_path

with main_process_first_context():
if pretrained_model_name_or_path.startswith("zoo:"):
_LOGGER.debug(
"Passed zoo stub to SparseAutoModelForCausalLM object. "
"Loading model from SparseZoo training files..."
)
return download_zoo_training_dir(zoo_stub=pretrained_model_name_or_path)

_LOGGER.debug("Downloading model from HuggingFace Hub.")
return download_repo_from_huggingface_hub(
repo_id=pretrained_model_name_or_path, **kwargs
)
8 changes: 5 additions & 3 deletions tests/sparseml/pytorch/utils/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import pytest

from sparseml.pytorch.utils import (
ClearMLLogger,
LambdaLogger,
LoggerManager,
PythonLogger,
Expand All @@ -45,6 +46,7 @@
or True
),
*([WANDBLogger()] if WANDBLogger.available() else []),
*([ClearMLLogger()] if ClearMLLogger.available() else []),
SparsificationGroupLogger(
lambda_func=lambda tag, value, values, step, wall_time, level: logging.info(
f"{tag}, {value}, {values}, {step}, {wall_time}, {level}"
Expand Down Expand Up @@ -79,12 +81,12 @@ def test_log_scalar(self, logger):

def test_log_scalars(self, logger):
logger.log_scalars("test-scalars-tag", {"scalar1": 0.0, "scalar2": 1.0})
logger.log_scalars("test-scalars-tag", {"scalar1": 0.0, "scalar2": 1.0}, 1)
logger.log_scalars("test-scalars-tag2", {"scalar1": 0.0, "scalar2": 1.0}, 1)
logger.log_scalars(
"test-scalars-tag", {"scalar1": 0.0, "scalar2": 1.0}, 2, time.time() - 1
"test-scalars-tag3", {"scalar1": 0.0, "scalar2": 1.0}, 2, time.time() - 1
)
logger.log_scalars(
"test-scalars-tag",
"test-scalars-tag4",
{"scalar1": 0.0, "scalar2": 1.0},
2,
time.time() - 1,
Expand Down
63 changes: 63 additions & 0 deletions tests/sparseml/test_clear_ml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path

from clearml import Task
from sparseml.transformers import apply
from sparseml.utils import is_package_available


is_torch_available = is_package_available("torch")
if is_torch_available:
import torch

torch_err = None
else:
torch = object
torch_err = ModuleNotFoundError(
"`torch` is not installed, use `pip install torch` to log to Weights and Biases"
)


def test_oneshot_and_finetune(tmp_path: Path):
recipe_str = "tests/sparseml/transformers/finetune/test_alternate_recipe.yaml"
model = "Xenova/llama2.c-stories15M"
device = "cuda:0"
if is_torch_available and not torch.cuda.is_available():
device = "cpu"
dataset = "wikitext"
dataset_config_name = "wikitext-2-raw-v1"
concatenate_data = True
run_stages = True
output_dir = tmp_path
max_steps = 50
splits = {"train": "train[:50%]", "calibration": "train[50%:60%]"}

# clearML will automatically log default capturing entries without
# explicitly calling logger. Logs accessible in https://app.clear.ml/
Task.init(project_name="test", task_name="test_oneshot_and_finetune")

apply(
model=model,
dataset=dataset,
dataset_config_name=dataset_config_name,
run_stages=run_stages,
output_dir=output_dir,
recipe=recipe_str,
max_steps=max_steps,
concatenate_data=concatenate_data,
splits=splits,
oneshot_device=device,
)
Loading

0 comments on commit 98e0313

Please sign in to comment.