diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py index ed7ea72831..b68d32d4e5 100644 --- a/src/deepsparse/evaluation/cli.py +++ b/src/deepsparse/evaluation/cli.py @@ -20,7 +20,8 @@ Module for evaluating models on the various evaluation integrations OPTIONS: - --target TARGET A path to a remote or local directory containing ONNX/torch model + --model_path MODEL_PATH + A path to an ONNX model, local directory containing ONNX model (including all the auxiliary files) or a SparseZoo stub -d DATASET, --dataset DATASET The dataset to evaluate on. The user may pass multiple datasets @@ -30,9 +31,7 @@ integration name that is registered in the evaluation registry -e ENGINE_TYPE, --engine_type ENGINE_TYPE Inference engine to use for the evaluation. The default - is the DeepSparse engine. If the evaluation should be run - without initializing a pipeline (e.g. for the evaluation - of a torch model), the engine type should be set to None + is the DeepSparse engine. -s SAVE_PATH, --save_path SAVE_PATH The path to save the evaluation results. By default the results will be saved in the @@ -90,10 +89,10 @@ ) ) @click.option( - "--target", + "--model_path", type=click.Path(dir_okay=True, file_okay=True), required=True, - help="A path to a remote or local directory containing ONNX/torch model " + help="A path to an ONNX model, local directory containing ONNX model" "(including all the auxiliary files) or a SparseZoo stub", ) @click.option( @@ -118,9 +117,7 @@ type=click.Choice([DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE]), default=DEEPSPARSE_ENGINE, help="The engine to use for the evaluation. The default is the " - "DeepSparse engine. If the evaluation should be run without " - "initializing a pipeline (e.g. for the evaluation of a torch " - "model), the engine type should be set to None", + "DeepSparse engine. ", ) @click.option( "-s", @@ -167,7 +164,7 @@ ) @click.argument("integration_args", nargs=-1, type=click.UNPROCESSED) def main( - target, + model_path, dataset, integration, engine_type, @@ -183,14 +180,9 @@ def main( # format kwargs to a dict integration_args = args_to_dict(integration_args) - _LOGGER.info(f"Target to evaluate: {target}") - if engine_type: - _LOGGER.info(f"A pipeline with the engine type: {engine_type} will be created") - else: - _LOGGER.info( - "No engine type specified. The target " - "will be evaluated using the native framework" - ) + _LOGGER.info( + f"Creating {engine_type} pipeline to evaluate from model path: {model_path}" + ) _LOGGER.info( f"Datasets to evaluate on: {datasets}\n" @@ -201,7 +193,7 @@ def main( ) result: Result = evaluate( - target=target, + model=model_path, datasets=datasets, integration=integration, engine_type=engine_type, diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py index 7bd56adf6e..b513f07563 100644 --- a/src/deepsparse/evaluation/evaluator.py +++ b/src/deepsparse/evaluation/evaluator.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import Any, List, Optional, Union +from pathlib import Path +from typing import List, Optional, Union +from deepsparse import Pipeline from deepsparse.evaluation.registry import EvaluationRegistry from deepsparse.evaluation.results import Result -from deepsparse.evaluation.utils import create_model_from_target +from deepsparse.evaluation.utils import create_pipeline from deepsparse.operators.engine_operator import ( DEEPSPARSE_ENGINE, ORT_ENGINE, @@ -30,11 +32,11 @@ def evaluate( - target: Any, + model: Union[Pipeline, Path, str], datasets: Union[str, List[str]], integration: Optional[str] = None, engine_type: Union[ - DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, None + DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE ] = DEEPSPARSE_ENGINE, batch_size: int = 1, splits: Union[List[str], str, None] = None, @@ -42,18 +44,26 @@ def evaluate( **kwargs, ) -> Result: - # if target is a string, turn it into an appropriate model/pipeline - # otherwise assume it is a model/pipeline - model = ( - create_model_from_target(target, engine_type) - if isinstance(target, str) - else target + if isinstance(model, Pipeline): + _LOGGER.info( + "Passed a Pipeline object into evaluate function. This will " + "override the following arguments:" + ) + batch_size = model.batch_size + _LOGGER.info(f"batch_size: {batch_size}") + engine_type = engine_type + _LOGGER.info(f"engine_type: {engine_type}") + + # if target is a string, turn it into an appropriate pipeline + # otherwise assume it is a pipeline + pipeline = ( + create_pipeline(model, engine_type) if isinstance(model, (Path, str)) else model ) - eval_integration = EvaluationRegistry.resolve(model, datasets, integration) + eval_integration = EvaluationRegistry.resolve(pipeline, datasets, integration) return eval_integration( - model=model, + pipeline=pipeline, datasets=datasets, engine_type=engine_type, batch_size=batch_size, diff --git a/src/deepsparse/evaluation/registry.py b/src/deepsparse/evaluation/registry.py index 5b6e45bc1c..2daabb69cc 100644 --- a/src/deepsparse/evaluation/registry.py +++ b/src/deepsparse/evaluation/registry.py @@ -15,8 +15,9 @@ Implementation of a registry for evaluation functions """ import logging -from typing import Any, Callable, List, Optional, Union +from typing import Callable, List, Optional, Union +from deepsparse import Pipeline from sparsezoo.utils.registry import RegistryMixin @@ -38,7 +39,7 @@ def load_from_registry(cls, name: str) -> Callable[..., "Result"]: # noqa: F821 @classmethod def resolve( cls, - model: Any, + pipeline: Pipeline, datasets: Union[str, List[str]], integration: Optional[str] = None, ) -> Callable[..., "Result"]: # noqa: F821 @@ -59,12 +60,12 @@ def resolve( "No integration specified, inferring the evaluation" "function from the input arguments..." ) - integration = resolve_integration(model, datasets) + integration = resolve_integration(pipeline, datasets) if integration is None: raise ValueError( "Unable to resolve an evaluation function for the given model. " - "Specify an integration name or use a model that is supported " + "Specify an integration name or use a pipeline that is supported " ) _LOGGER.info(f"Inferred the evaluation function: {integration}") diff --git a/src/deepsparse/evaluation/utils.py b/src/deepsparse/evaluation/utils.py index 7684e54513..87475dd5d2 100644 --- a/src/deepsparse/evaluation/utils.py +++ b/src/deepsparse/evaluation/utils.py @@ -15,21 +15,11 @@ import os from typing import Any, Dict, List, Optional, Tuple, Union - -try: - from transformers import AutoModelForCausalLM, PreTrainedModel - - transformers_error = None -except ImportError as import_error: - transformers_error = import_error - - from deepsparse import Pipeline -from deepsparse.operators.engine_operator import DEEPSPARSE_ENGINE, ORT_ENGINE __all__ = [ - "create_model_from_target", + "create_pipeline", "get_save_path", "args_to_dict", "resolve_integration", @@ -57,36 +47,36 @@ def potentially_check_dependency_import(integration_name: str) -> bool: def resolve_integration( - model: Union[Pipeline, "PreTrainedModel"], datasets: Union[str, List[str]] + pipeline: Pipeline, datasets: Union[str, List[str]] ) -> Union[str, None]: """ - Given a model and dataset, infer the name of the evaluation integration + Given a pipeline and dataset, infer the name of the evaluation integration to use. If unable to infer a name, return None. Currently: if the model is a generative language model, default to 'lm-evaluation-harness' otherwise return None - :param model: The model to infer the integration for + :param pipeline: The pipeline to infer the integration for :param datasets: The datasets to infer the integration for :return: The name of the integration to use or None if unable to infer """ - if if_generative_language_model(model): + if if_generative_language_model(pipeline): return LM_EVALUATION_HARNESS return None -def if_generative_language_model(model: Any) -> bool: +def if_generative_language_model(pipeline: Pipeline) -> bool: """ Checks if the model is a generative language model. """ - _check_transformers_dependency() - if isinstance(model, Pipeline): - return model.__class__.__name__ == "TextGenerationPipeline" - elif isinstance(model, PreTrainedModel): - return "CausalLM" in model.__class__.__name__ - else: - return False + pipeline_name = pipeline.__class__.__name__ + if pipeline_name == "TextGenerationPipeline" or ( + pipeline_name == "TextGenerationPipelineNoKVCache" + ): + return True + + return False def args_to_dict(args: Tuple[Any, ...]) -> Dict[str, Any]: @@ -134,43 +124,30 @@ def get_save_path( return os.path.join(base_path, file_name) -def create_model_from_target( - target: str, +def create_pipeline( + model_path: str, engine_type: Optional[str] = None, **kwargs, -) -> Union[Pipeline, "AutoModelForCausalLM"]: +) -> Pipeline: """ - Create a model or a pipeline from a target path. + Create a pipeline for evaluation - Note: This function is currently limited to: - - creating pipelines of type 'text-generation' - - creating dense huggingface models of type 'AutoModelForCausalLM' - This function will be expanded in the future to support more - model types and frameworks. + Note: This function is currently primarily + focused on creating pipelines of type 'text-generation' + This function will be expanded in the future to support + more tasks and models - :param target: The target path to initialize the + :param model_path: The target path to initialize the text generation model from. This can be a local or remote path to the model or a sparsezoo stub :param engine_type: The engine type to initialize the model with. - :return: The initialized model + :return: The initialized pipeline """ - _check_transformers_dependency() - - if engine_type in [DEEPSPARSE_ENGINE, ORT_ENGINE]: - return Pipeline.create( - task="text-generation", - model_path=target, - sequence_length=kwargs.pop("sequence_length", 2048), - engine_type=engine_type, - batch_size=kwargs.pop("batch_size", 1), - **kwargs, - ) - else: - return AutoModelForCausalLM.from_pretrained(target, **kwargs) - - -def _check_transformers_dependency(): - if transformers_error: - raise ImportError( - "transformers is needed to use this module" - ) from transformers_error + return Pipeline.create( + task=kwargs.pop("task", "text-generation"), + model_path=model_path, + sequence_length=kwargs.pop("sequence_length", 2048), + engine_type=engine_type, + batch_size=kwargs.pop("batch_size", 1), + **kwargs, + ) diff --git a/src/deepsparse/transformers/pipelines/text_generation/pipeline.py b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py index 2c858c901b..64c0c64a51 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/pipeline.py +++ b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py @@ -357,6 +357,14 @@ def sequence_length(self) -> int: """ return self.ops["single_engine"].sequence_length + @property + def batch_size(self) -> int: + return self.ops["single_engine"].batch_size + + @property + def engine_type(self) -> str: + return self.ops["single_engine"]._engine_type + def _get_continuous_batching_scheduler( self, batch_sizes: List[int], engines: List[EngineOperator] ) -> ContinuousBatchingScheduler: diff --git a/src/deepsparse/transformers/pipelines/text_generation/pipeline_no_kv_cache.py b/src/deepsparse/transformers/pipelines/text_generation/pipeline_no_kv_cache.py index 7f6cb9db5f..c6cbc3dd59 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/pipeline_no_kv_cache.py +++ b/src/deepsparse/transformers/pipelines/text_generation/pipeline_no_kv_cache.py @@ -127,3 +127,11 @@ def expand_inputs(self, items, batch_size): out, orig_batch_size = split_engine_inputs(items, batch_size) combined_batches = [{"input_ids": b[0], "attention_mask": b[1]} for b in out] return combined_batches, orig_batch_size + + @property + def batch_size(self) -> int: + return self.ops["engine_operator"].batch_size + + @property + def engine_type(self) -> str: + return self.ops["engine_operator"]._engine_type diff --git a/tests/deepsparse/evaluation/integrations/test_lm_evaluation_harness.py b/tests/deepsparse/evaluation/integrations/test_lm_evaluation_harness.py index 9fa9b494cf..3b9016294f 100644 --- a/tests/deepsparse/evaluation/integrations/test_lm_evaluation_harness.py +++ b/tests/deepsparse/evaluation/integrations/test_lm_evaluation_harness.py @@ -12,19 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +from transformers import AutoModelForCausalLM + import pytest from deepsparse.evaluation.integrations import try_import_lm_evaluation_harness -from deepsparse.evaluation.utils import create_model_from_target +from deepsparse.evaluation.utils import create_pipeline @pytest.mark.parametrize( "pipeline, model_torch", [ ( - create_model_from_target( + create_pipeline( "hf:mgoin/TinyStories-1M-deepsparse", engine_type="onnxruntime" ), - create_model_from_target("roneneldan/TinyStories-1M"), + AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M"), ) ], ) diff --git a/tests/deepsparse/evaluation/test_evaluator.py b/tests/deepsparse/evaluation/test_evaluator.py index dedd63fa36..816ad075e0 100644 --- a/tests/deepsparse/evaluation/test_evaluator.py +++ b/tests/deepsparse/evaluation/test_evaluator.py @@ -29,6 +29,7 @@ Metric, Result, ) +from deepsparse.pipeline import Pipeline @EvaluationRegistry.register() @@ -49,7 +50,7 @@ def dummy_integration(*args, **kwargs): @pytest.fixture() -def target(): +def model_path(): return "hf:mgoin/TinyStories-1M-deepsparse" @@ -68,18 +69,42 @@ def unknown_integration_name(): return "unknown_integration" -def test_evaluate_unknown_integration(target, datasets, unknown_integration_name): +def test_evaluate_unknown_integration(model_path, datasets, unknown_integration_name): with pytest.raises(KeyError): evaluate( - target=target, + model=model_path, datasets=datasets, integration=unknown_integration_name, ) -def test_evaluate(target, datasets, dummy_integration_name): +def test_evaluate(model_path, datasets, dummy_integration_name): result = evaluate( - target=target, + model=model_path, + datasets=datasets, + integration=dummy_integration_name, + ) + assert isinstance(result, Result) + + +def test_evaluate_pipeline_with_kv_cache(model_path, datasets, dummy_integration_name): + result = evaluate( + model=Pipeline.create(model_path=model_path, task="text-generation"), + datasets=datasets, + integration=dummy_integration_name, + ) + assert isinstance(result, Result) + + +def test_evaluate_pipeline_without_kv_cache( + model_path, datasets, dummy_integration_name +): + result = evaluate( + model=Pipeline.create( + model_path=model_path, + task="text-generation", + onnx_model_name="model-orig.onnx", + ), datasets=datasets, integration=dummy_integration_name, ) @@ -91,11 +116,11 @@ def test_evaluate(target, datasets, dummy_integration_name): reason="lm_evaluation_harness not installed", ) def test_evaluation_llm_evaluation_harness_integration_name( - target, + model_path, datasets, ): assert evaluate( - target=target, + model=model_path, datasets=datasets, limit=2, no_cache=True, @@ -110,15 +135,17 @@ def test_evaluation_llm_evaluation_harness_integration_name( "with importing functions that are decorated with " "click option where multiple=True", ) -def test_cli(tmp_path, target, datasets, dummy_integration_name, type_serialization): +def test_cli( + tmp_path, model_path, datasets, dummy_integration_name, type_serialization +): from deepsparse.evaluation.cli import main runner = CliRunner() runner.invoke( main, [ - "--target", - target, + "--model_path", + model_path, "--dataset", datasets[0], "--dataset", diff --git a/tests/deepsparse/evaluation/test_utils.py b/tests/deepsparse/evaluation/test_utils.py index f712dce0df..f8f3c731a8 100644 --- a/tests/deepsparse/evaluation/test_utils.py +++ b/tests/deepsparse/evaluation/test_utils.py @@ -14,32 +14,16 @@ import os -from transformers import ( - AutoModelForCausalLM, - AutoModelForSequenceClassification, - GPTNeoForCausalLM, -) - import pytest from deepsparse import Pipeline from deepsparse.evaluation.utils import ( - create_model_from_target, + create_pipeline, get_save_path, if_generative_language_model, resolve_integration, ) -@pytest.fixture -def llm_type_hf_model(): - return AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M") - - -@pytest.fixture -def not_llm_type_hf_model(): - return AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") - - @pytest.fixture def llm_type_pipeline(): return Pipeline.create( @@ -49,25 +33,13 @@ def llm_type_pipeline(): ) -def test_resolve_known_llm_model(llm_type_hf_model): +def test_resolve_known_llm_pipeline(llm_type_pipeline): assert ( - resolve_integration(model=llm_type_hf_model, datasets="") + resolve_integration(pipeline=llm_type_pipeline, datasets="") == "lm-evaluation-harness" ) -def test_resolve_unknown_model(not_llm_type_hf_model): - assert resolve_integration(model=not_llm_type_hf_model, datasets="") is None - - -def test_if_generative_language_model_true(llm_type_hf_model): - assert if_generative_language_model(llm_type_hf_model) - - -def test_if_generative_language_model_false(not_llm_type_hf_model): - assert not if_generative_language_model(not_llm_type_hf_model) - - def test_if_generative_language_pipeline_true(llm_type_pipeline): assert if_generative_language_model(llm_type_pipeline) @@ -89,26 +61,11 @@ def pipeline_target(): return "hf:mgoin/TinyStories-1M-deepsparse" -@pytest.fixture -def torch_target(): - return "roneneldan/TinyStories-1M" - - def test_initialize_model_from_target_pipeline_onnx(pipeline_target): - model = create_model_from_target(pipeline_target, "onnxruntime") + model = create_pipeline(pipeline_target, "onnxruntime") assert model.ops.get("single_engine")._engine_type == "onnxruntime" -def test_initialize_model_from_target_pipeline_deepsparse(pipeline_target): - model = create_model_from_target(pipeline_target, "deepsparse") - assert model.ops.get("single_engine")._engine_type == "deepsparse" - - def test_initialize_model_from_target_pipeline_with_kwargs(pipeline_target): - model = create_model_from_target(pipeline_target, "deepsparse", sequence_length=64) + model = create_pipeline(pipeline_target, "deepsparse", sequence_length=64) assert model.ops.get("process_input").sequence_length == 64 - - -def test_initialize_model_from_target_torch(torch_target): - model = create_model_from_target(torch_target, "torch") - assert isinstance(model, GPTNeoForCausalLM)