Skip to content

Commit

Permalink
Merge branch 'master' into v3.1-release; Release v3.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
tomaarsen committed Sep 19, 2024
2 parents 845dd54 + 7290448 commit 73c8dc3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 10 deletions.
12 changes: 7 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
[project]
name = "sentence-transformers"
version = "3.1.0"
description = "Multilingual text embeddings"
license = { file = "LICENSE" }
version = "3.1.1"
description = "State-of-the-Art Text Embeddings"
license = { text = "Apache 2.0" }
readme = "README.md"
authors = [
{ name = "Nils Reimers", email = "info@nils-reimers.de" },
{ name = "Tom Aarsen" },
{ name = "Tom Aarsen", email = "tom.aarsen@huggingface.co" },
]
maintainers = [
{ name = "Tom Aarsen", email = "tom.aarsen@huggingface.co" }
]
requires-python = ">=3.8"
keywords = [
Expand All @@ -33,7 +36,6 @@ dependencies = [
"transformers>=4.38.0,<5.0.0",
"tqdm",
"torch>=1.11.0",
"numpy<2.0.0",
"scikit-learn",
"scipy",
"huggingface-hub>=0.19.3",
Expand Down
2 changes: 1 addition & 1 deletion sentence_transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

__version__ = "3.1.0"
__version__ = "3.1.1"
__MODEL_HUB_ORGANIZATION__ = "sentence-transformers"

import importlib
Expand Down
16 changes: 12 additions & 4 deletions sentence_transformers/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,8 +714,12 @@ def mine_hard_negatives(
except Exception:
pass

corpus_embeddings = model.encode(corpus, batch_size=batch_size, convert_to_numpy=True, show_progress_bar=True)
query_embeddings = model.encode(queries, batch_size=batch_size, convert_to_numpy=True, show_progress_bar=True)
corpus_embeddings = model.encode(
corpus, batch_size=batch_size, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True
)
query_embeddings = model.encode(
queries, batch_size=batch_size, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True
)
index.add(corpus_embeddings)

scores_list = []
Expand All @@ -731,8 +735,12 @@ def mine_hard_negatives(

else:
# Embed the corpus and the queries
corpus_embeddings = model.encode(corpus, batch_size=batch_size, convert_to_numpy=True, show_progress_bar=True)
query_embeddings = model.encode(queries, batch_size=batch_size, convert_to_numpy=True, show_progress_bar=True)
corpus_embeddings = model.encode(
corpus, batch_size=batch_size, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True
)
query_embeddings = model.encode(
queries, batch_size=batch_size, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True
)
scores = model.similarity(query_embeddings, corpus_embeddings).to(device)

# Keep only the range_max + max_positives highest scores. We offset by 1 to potentially include the positive pair
Expand Down

0 comments on commit 73c8dc3

Please sign in to comment.