Skip to content

Commit

Permalink
fix(utils.py): move default tokenizer to just openai
Browse files Browse the repository at this point in the history
hf tokenizer makes network calls when trying to get the tokenizer - this slows down execution time calls
  • Loading branch information
krrishdholakia committed Dec 12, 2024
1 parent 5d9db82 commit 27544e4
Showing 1 changed file with 7 additions and 14 deletions.
21 changes: 7 additions & 14 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,9 @@ async def wrapper_async(*args, **kwargs): # noqa: PLR0915


@lru_cache(maxsize=128)
def _select_tokenizer(model: str):
def _select_tokenizer(
model: str,
):
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(
Expand All @@ -1232,19 +1234,10 @@ def _select_tokenizer(model: str):
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# default - tiktoken
else:
tokenizer = None
if (
model in litellm.open_ai_chat_completion_models
or model in litellm.open_ai_text_completion_models
or model in litellm.open_ai_embedding_models
):
return {"type": "openai_tokenizer", "tokenizer": encoding}

try:
tokenizer = Tokenizer.from_pretrained(model)
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
except Exception:
return {"type": "openai_tokenizer", "tokenizer": encoding}
return {
"type": "openai_tokenizer",
"tokenizer": encoding,
} # default to openai tokenizer


def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
Expand Down

0 comments on commit 27544e4

Please sign in to comment.