Skip to content

Commit

Permalink
[DeepSparse Evaluation API] UX Improvements (#1568)
Browse files Browse the repository at this point in the history
* initial commit

* add some more tests for hardening

* Update src/deepsparse/evaluation/cli.py

* Update src/deepsparse/transformers/pipelines/text_generation/pipeline.py

* Apply suggestions from code review

* quality

* Update test_evaluator.py

* quality
  • Loading branch information
dbogunowicz committed Feb 5, 2024
1 parent 8c072a3 commit f6f8641
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 150 deletions.
30 changes: 11 additions & 19 deletions src/deepsparse/evaluation/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
Module for evaluating models on the various evaluation integrations
OPTIONS:
--target TARGET A path to a remote or local directory containing ONNX/torch model
--model_path MODEL_PATH
A path to an ONNX model, local directory containing ONNX model
(including all the auxiliary files) or a SparseZoo stub
-d DATASET, --dataset DATASET
The dataset to evaluate on. The user may pass multiple datasets
Expand All @@ -30,9 +31,7 @@
integration name that is registered in the evaluation registry
-e ENGINE_TYPE, --engine_type ENGINE_TYPE
Inference engine to use for the evaluation. The default
is the DeepSparse engine. If the evaluation should be run
without initializing a pipeline (e.g. for the evaluation
of a torch model), the engine type should be set to None
is the DeepSparse engine.
-s SAVE_PATH, --save_path SAVE_PATH
The path to save the evaluation results.
By default the results will be saved in the
Expand Down Expand Up @@ -90,10 +89,10 @@
)
)
@click.option(
"--target",
"--model_path",
type=click.Path(dir_okay=True, file_okay=True),
required=True,
help="A path to a remote or local directory containing ONNX/torch model "
help="A path to an ONNX model, local directory containing ONNX model"
"(including all the auxiliary files) or a SparseZoo stub",
)
@click.option(
Expand All @@ -118,9 +117,7 @@
type=click.Choice([DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE]),
default=DEEPSPARSE_ENGINE,
help="The engine to use for the evaluation. The default is the "
"DeepSparse engine. If the evaluation should be run without "
"initializing a pipeline (e.g. for the evaluation of a torch "
"model), the engine type should be set to None",
"DeepSparse engine. ",
)
@click.option(
"-s",
Expand Down Expand Up @@ -167,7 +164,7 @@
)
@click.argument("integration_args", nargs=-1, type=click.UNPROCESSED)
def main(
target,
model_path,
dataset,
integration,
engine_type,
Expand All @@ -183,14 +180,9 @@ def main(
# format kwargs to a dict
integration_args = args_to_dict(integration_args)

_LOGGER.info(f"Target to evaluate: {target}")
if engine_type:
_LOGGER.info(f"A pipeline with the engine type: {engine_type} will be created")
else:
_LOGGER.info(
"No engine type specified. The target "
"will be evaluated using the native framework"
)
_LOGGER.info(
f"Creating {engine_type} pipeline to evaluate from model path: {model_path}"
)

_LOGGER.info(
f"Datasets to evaluate on: {datasets}\n"
Expand All @@ -201,7 +193,7 @@ def main(
)

result: Result = evaluate(
target=target,
model=model_path,
datasets=datasets,
integration=integration,
engine_type=engine_type,
Expand Down
34 changes: 22 additions & 12 deletions src/deepsparse/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Any, List, Optional, Union
from pathlib import Path
from typing import List, Optional, Union

from deepsparse import Pipeline
from deepsparse.evaluation.registry import EvaluationRegistry
from deepsparse.evaluation.results import Result
from deepsparse.evaluation.utils import create_model_from_target
from deepsparse.evaluation.utils import create_pipeline
from deepsparse.operators.engine_operator import (
DEEPSPARSE_ENGINE,
ORT_ENGINE,
Expand All @@ -30,30 +32,38 @@


def evaluate(
target: Any,
model: Union[Pipeline, Path, str],
datasets: Union[str, List[str]],
integration: Optional[str] = None,
engine_type: Union[
DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, None
DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE
] = DEEPSPARSE_ENGINE,
batch_size: int = 1,
splits: Union[List[str], str, None] = None,
metrics: Union[List[str], str, None] = None,
**kwargs,
) -> Result:

# if target is a string, turn it into an appropriate model/pipeline
# otherwise assume it is a model/pipeline
model = (
create_model_from_target(target, engine_type)
if isinstance(target, str)
else target
if isinstance(model, Pipeline):
_LOGGER.info(
"Passed a Pipeline object into evaluate function. This will "
"override the following arguments:"
)
batch_size = model.batch_size
_LOGGER.info(f"batch_size: {batch_size}")
engine_type = engine_type
_LOGGER.info(f"engine_type: {engine_type}")

# if target is a string, turn it into an appropriate pipeline
# otherwise assume it is a pipeline
pipeline = (
create_pipeline(model, engine_type) if isinstance(model, (Path, str)) else model
)

eval_integration = EvaluationRegistry.resolve(model, datasets, integration)
eval_integration = EvaluationRegistry.resolve(pipeline, datasets, integration)

return eval_integration(
model=model,
pipeline=pipeline,
datasets=datasets,
engine_type=engine_type,
batch_size=batch_size,
Expand Down
9 changes: 5 additions & 4 deletions src/deepsparse/evaluation/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
Implementation of a registry for evaluation functions
"""
import logging
from typing import Any, Callable, List, Optional, Union
from typing import Callable, List, Optional, Union

from deepsparse import Pipeline
from sparsezoo.utils.registry import RegistryMixin


Expand All @@ -38,7 +39,7 @@ def load_from_registry(cls, name: str) -> Callable[..., "Result"]: # noqa: F821
@classmethod
def resolve(
cls,
model: Any,
pipeline: Pipeline,
datasets: Union[str, List[str]],
integration: Optional[str] = None,
) -> Callable[..., "Result"]: # noqa: F821
Expand All @@ -59,12 +60,12 @@ def resolve(
"No integration specified, inferring the evaluation"
"function from the input arguments..."
)
integration = resolve_integration(model, datasets)
integration = resolve_integration(pipeline, datasets)

if integration is None:
raise ValueError(
"Unable to resolve an evaluation function for the given model. "
"Specify an integration name or use a model that is supported "
"Specify an integration name or use a pipeline that is supported "
)
_LOGGER.info(f"Inferred the evaluation function: {integration}")

Expand Down
85 changes: 31 additions & 54 deletions src/deepsparse/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,11 @@
import os
from typing import Any, Dict, List, Optional, Tuple, Union


try:
from transformers import AutoModelForCausalLM, PreTrainedModel

transformers_error = None
except ImportError as import_error:
transformers_error = import_error


from deepsparse import Pipeline
from deepsparse.operators.engine_operator import DEEPSPARSE_ENGINE, ORT_ENGINE


__all__ = [
"create_model_from_target",
"create_pipeline",
"get_save_path",
"args_to_dict",
"resolve_integration",
Expand Down Expand Up @@ -57,36 +47,36 @@ def potentially_check_dependency_import(integration_name: str) -> bool:


def resolve_integration(
model: Union[Pipeline, "PreTrainedModel"], datasets: Union[str, List[str]]
pipeline: Pipeline, datasets: Union[str, List[str]]
) -> Union[str, None]:
"""
Given a model and dataset, infer the name of the evaluation integration
Given a pipeline and dataset, infer the name of the evaluation integration
to use. If unable to infer a name, return None.
Currently:
if the model is a generative language model,
default to 'lm-evaluation-harness' otherwise return None
:param model: The model to infer the integration for
:param pipeline: The pipeline to infer the integration for
:param datasets: The datasets to infer the integration for
:return: The name of the integration to use or None if unable to infer
"""
if if_generative_language_model(model):
if if_generative_language_model(pipeline):
return LM_EVALUATION_HARNESS
return None


def if_generative_language_model(model: Any) -> bool:
def if_generative_language_model(pipeline: Pipeline) -> bool:
"""
Checks if the model is a generative language model.
"""
_check_transformers_dependency()
if isinstance(model, Pipeline):
return model.__class__.__name__ == "TextGenerationPipeline"
elif isinstance(model, PreTrainedModel):
return "CausalLM" in model.__class__.__name__
else:
return False
pipeline_name = pipeline.__class__.__name__
if pipeline_name == "TextGenerationPipeline" or (
pipeline_name == "TextGenerationPipelineNoKVCache"
):
return True

return False


def args_to_dict(args: Tuple[Any, ...]) -> Dict[str, Any]:
Expand Down Expand Up @@ -134,43 +124,30 @@ def get_save_path(
return os.path.join(base_path, file_name)


def create_model_from_target(
target: str,
def create_pipeline(
model_path: str,
engine_type: Optional[str] = None,
**kwargs,
) -> Union[Pipeline, "AutoModelForCausalLM"]:
) -> Pipeline:
"""
Create a model or a pipeline from a target path.
Create a pipeline for evaluation
Note: This function is currently limited to:
- creating pipelines of type 'text-generation'
- creating dense huggingface models of type 'AutoModelForCausalLM'
This function will be expanded in the future to support more
model types and frameworks.
Note: This function is currently primarily
focused on creating pipelines of type 'text-generation'
This function will be expanded in the future to support
more tasks and models
:param target: The target path to initialize the
:param model_path: The target path to initialize the
text generation model from. This can be a local
or remote path to the model or a sparsezoo stub
:param engine_type: The engine type to initialize the model with.
:return: The initialized model
:return: The initialized pipeline
"""
_check_transformers_dependency()

if engine_type in [DEEPSPARSE_ENGINE, ORT_ENGINE]:
return Pipeline.create(
task="text-generation",
model_path=target,
sequence_length=kwargs.pop("sequence_length", 2048),
engine_type=engine_type,
batch_size=kwargs.pop("batch_size", 1),
**kwargs,
)
else:
return AutoModelForCausalLM.from_pretrained(target, **kwargs)


def _check_transformers_dependency():
if transformers_error:
raise ImportError(
"transformers is needed to use this module"
) from transformers_error
return Pipeline.create(
task=kwargs.pop("task", "text-generation"),
model_path=model_path,
sequence_length=kwargs.pop("sequence_length", 2048),
engine_type=engine_type,
batch_size=kwargs.pop("batch_size", 1),
**kwargs,
)
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,14 @@ def sequence_length(self) -> int:
"""
return self.ops["single_engine"].sequence_length

@property
def batch_size(self) -> int:
return self.ops["single_engine"].batch_size

@property
def engine_type(self) -> str:
return self.ops["single_engine"]._engine_type

def _get_continuous_batching_scheduler(
self, batch_sizes: List[int], engines: List[EngineOperator]
) -> ContinuousBatchingScheduler:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,11 @@ def expand_inputs(self, items, batch_size):
out, orig_batch_size = split_engine_inputs(items, batch_size)
combined_batches = [{"input_ids": b[0], "attention_mask": b[1]} for b in out]
return combined_batches, orig_batch_size

@property
def batch_size(self) -> int:
return self.ops["engine_operator"].batch_size

@property
def engine_type(self) -> str:
return self.ops["engine_operator"]._engine_type
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from transformers import AutoModelForCausalLM

import pytest
from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness
from deepsparse.evaluation.utils import create_model_from_target
from deepsparse.evaluation.utils import create_pipeline


@pytest.mark.parametrize(
"pipeline, model_torch",
[
(
create_model_from_target(
create_pipeline(
"hf:mgoin/TinyStories-1M-deepsparse", engine_type="onnxruntime"
),
create_model_from_target("roneneldan/TinyStories-1M"),
AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M"),
)
],
)
Expand Down
Loading

0 comments on commit f6f8641

Please sign in to comment.