From 27544e4328f0b97a0c7f0fba8c0e5a3073a3b96a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 12 Dec 2024 12:42:30 -0800 Subject: [PATCH] fix(utils.py): move default tokenizer to just openai hf tokenizer makes network calls when trying to get the tokenizer - this slows down execution time calls --- litellm/utils.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index 6bcea7174b00..f211f20e05a2 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1211,7 +1211,9 @@ async def wrapper_async(*args, **kwargs): # noqa: PLR0915 @lru_cache(maxsize=128) -def _select_tokenizer(model: str): +def _select_tokenizer( + model: str, +): if model in litellm.cohere_models and "command-r" in model: # cohere cohere_tokenizer = Tokenizer.from_pretrained( @@ -1232,19 +1234,10 @@ def _select_tokenizer(model: str): return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} # default - tiktoken else: - tokenizer = None - if ( - model in litellm.open_ai_chat_completion_models - or model in litellm.open_ai_text_completion_models - or model in litellm.open_ai_embedding_models - ): - return {"type": "openai_tokenizer", "tokenizer": encoding} - - try: - tokenizer = Tokenizer.from_pretrained(model) - return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} - except Exception: - return {"type": "openai_tokenizer", "tokenizer": encoding} + return { + "type": "openai_tokenizer", + "tokenizer": encoding, + } # default to openai tokenizer def encode(model="", text="", custom_tokenizer: Optional[dict] = None):