Skip to content

Commit

Permalink
Fix vLLM resulting different embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
martinakaduc committed Nov 19, 2024
1 parent bc741b2 commit 62b8735
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/embed_text_package/embed_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_embeddings(self, dataloader: DataLoader, model_name: str, cols: list):
for batch in tqdm_dataloader:
model_inputs = self.tokenizer(
batch[col],
add_special_tokens=False,
# add_special_tokens=False,
return_tensors="pt",
padding=True,
)
Expand Down
2 changes: 1 addition & 1 deletion src/embed_text_package/models/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(
self.quant_config = quant_config

self.model = DeepseekV2Model(config, cache_config, quant_config, prefix="model")
self._pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
self._pooler = Pooler(pooling_type=PoolingType.LAST, normalize=False)
self.make_empty_intermediate_tensors = (
self.model.make_empty_intermediate_tensors
)
Expand Down
2 changes: 1 addition & 1 deletion src/embed_text_package/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(
super().__init__()

self.model = LlamaModel(**kwargs)
self._pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
self._pooler = Pooler(pooling_type=PoolingType.LAST, normalize=False)
self.make_empty_intermediate_tensors = (
self.model.make_empty_intermediate_tensors
)
Expand Down
40 changes: 40 additions & 0 deletions tests/test_vllm_implementation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import gc

import torch
from datasets import Dataset
from embed_text_package.embed_text import Embedder
from embed_text_package.embed_text_v2 import Embedder as EmbedderV2

ds = Dataset.from_dict({"text": ["hello world"]})
# {'input_ids': tensor([[128000, 15339, 1917]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1]], device='cuda:0')}

# Load first implementation
embdr = Embedder()
embdr.load(
"meta-llama/Meta-Llama-3-8B",
)
embdr.model.to(dtype=torch.float16)

# Run first implementation
dataloader = torch.utils.data.DataLoader(ds, batch_size=1, shuffle=False)
emb = embdr.get_embeddings(dataloader, "meta-llama/Meta-Llama-3-8B", ["text"])
embs = torch.tensor(emb["text"])

# Free memory and load second implementation
del embdr
gc.collect()
torch.cuda.empty_cache()

# Load second implementation
embdr = EmbedderV2()
embdr.load(
"meta-llama/Meta-Llama-3-8B",
dtype=torch.float16,
)

# Run second implementation
dataloader = torch.utils.data.DataLoader(ds, batch_size=1, shuffle=False)
emb = embdr.get_embeddings(dataloader, "meta-llama/Meta-Llama-3-8B", ["text"])
embsv2 = torch.tensor(emb["text"])

assert torch.abs(embs - embsv2).mean() < 3e-3

0 comments on commit 62b8735

Please sign in to comment.