Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename the angular distance to cosine distance #94

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::borrow::Cow;
use bytemuck::{Pod, Zeroable};
use rand::Rng;

use super::{two_means_binary_quantized as two_means, Angular};
use super::{two_means_binary_quantized as two_means, Cosine};
use crate::distance::Distance;
use crate::node::Leaf;
use crate::parallel::ImmutableSubsetLeafs;
Expand All @@ -16,27 +16,27 @@ use crate::unaligned_vector::{BinaryQuantized, UnalignedVector};
/// /!\ This distance function is binary quantized, which means it loses all its precision
/// and their scalar values are converted to `-1` or `1`.
#[derive(Debug, Clone)]
pub enum BinaryQuantizedAngular {}
pub enum BinaryQuantizedCosine {}

/// The header of `BinaryQuantizedAngular` leaf nodes.
/// The header of `BinaryQuantizedCosine` leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
pub struct NodeHeaderBinaryQuantizedAngular {
pub struct NodeHeaderBinaryQuantizedCosine {
norm: f32,
}

impl Distance for BinaryQuantizedAngular {
impl Distance for BinaryQuantizedCosine {
const DEFAULT_OVERSAMPLING: usize = 3;

type Header = NodeHeaderBinaryQuantizedAngular;
type Header = NodeHeaderBinaryQuantizedCosine;
type VectorCodec = BinaryQuantized;

fn name() -> &'static str {
"binary quantized angular"
"binary quantized cosine"
}

fn new_header(vector: &UnalignedVector<Self::VectorCodec>) -> Self::Header {
NodeHeaderBinaryQuantizedAngular { norm: Self::norm_no_header(vector) }
NodeHeaderBinaryQuantizedCosine { norm: Self::norm_no_header(vector) }
}

fn built_distance(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
Expand Down Expand Up @@ -73,12 +73,12 @@ impl Distance for BinaryQuantizedAngular {
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
let [node_p, node_q] = two_means::<Self, Angular, R>(rng, children, true)?;
let [node_p, node_q] = two_means::<Self, Cosine, R>(rng, children, true)?;
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
let unaligned_vector = UnalignedVector::from_vec(vector);
let mut normal = Leaf {
header: NodeHeaderBinaryQuantizedAngular { norm: 0.0 },
header: NodeHeaderBinaryQuantizedCosine { norm: 0.0 },
vector: unaligned_vector,
};
Self::normalize(&mut normal);
Expand Down
16 changes: 8 additions & 8 deletions src/distance/angular.rs → src/distance/cosine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,25 @@ use crate::unaligned_vector::UnalignedVector;
/// non-zero vectors defined in an inner product space. Cosine similarity
/// is the cosine of the angle between the vectors.
#[derive(Debug, Clone)]
pub enum Angular {}
pub enum Cosine {}

/// The header of Angular leaf nodes.
/// The header of Cosine leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
pub struct NodeHeaderAngular {
pub struct NodeHeaderCosine {
norm: f32,
}

impl Distance for Angular {
type Header = NodeHeaderAngular;
impl Distance for Cosine {
type Header = NodeHeaderCosine;
type VectorCodec = f32;

fn name() -> &'static str {
"angular"
"cosine"
}

fn new_header(vector: &UnalignedVector<Self::VectorCodec>) -> Self::Header {
NodeHeaderAngular { norm: Self::norm_no_header(vector) }
NodeHeaderCosine { norm: Self::norm_no_header(vector) }
}

fn built_distance(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
Expand Down Expand Up @@ -72,7 +72,7 @@ impl Distance for Angular {
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
let unaligned_vector = UnalignedVector::from_vec(vector);
let mut normal = Leaf { header: NodeHeaderAngular { norm: 0.0 }, vector: unaligned_vector };
let mut normal = Leaf { header: NodeHeaderCosine { norm: 0.0 }, vector: unaligned_vector };
Self::normalize(&mut normal);

Ok(normal.vector)
Expand Down
8 changes: 4 additions & 4 deletions src/distance/mod.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
use std::borrow::Cow;
use std::fmt;

pub use angular::{Angular, NodeHeaderAngular};
pub use binary_quantized_angular::{BinaryQuantizedAngular, NodeHeaderBinaryQuantizedAngular};
pub use binary_quantized_cosine::{BinaryQuantizedCosine, NodeHeaderBinaryQuantizedCosine};
pub use binary_quantized_euclidean::{
BinaryQuantizedEuclidean, NodeHeaderBinaryQuantizedEuclidean,
};
pub use binary_quantized_manhattan::{
BinaryQuantizedManhattan, NodeHeaderBinaryQuantizedManhattan,
};
use bytemuck::{Pod, Zeroable};
pub use cosine::{Cosine, NodeHeaderCosine};
pub use dot_product::{DotProduct, NodeHeaderDotProduct};
pub use euclidean::{Euclidean, NodeHeaderEuclidean};
use heed::{RwPrefix, RwTxn};
Expand All @@ -22,10 +22,10 @@ use crate::parallel::ImmutableSubsetLeafs;
use crate::unaligned_vector::{UnalignedVector, UnalignedVectorCodec};
use crate::NodeCodec;

mod angular;
mod binary_quantized_angular;
mod binary_quantized_cosine;
mod binary_quantized_euclidean;
mod binary_quantized_manhattan;
mod cosine;
mod dot_product;
mod euclidean;
mod manhattan;
Expand Down
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ pub mod internals {
use rand::Rng;

pub use crate::distance::{
NodeHeaderAngular, NodeHeaderBinaryQuantizedAngular, NodeHeaderBinaryQuantizedEuclidean,
NodeHeaderBinaryQuantizedManhattan, NodeHeaderDotProduct, NodeHeaderEuclidean,
NodeHeaderManhattan,
NodeHeaderBinaryQuantizedCosine, NodeHeaderBinaryQuantizedEuclidean,
NodeHeaderBinaryQuantizedManhattan, NodeHeaderCosine, NodeHeaderDotProduct,
NodeHeaderEuclidean, NodeHeaderManhattan,
};
pub use crate::key::KeyCodec;
pub use crate::node::{Leaf, NodeCodec};
Expand Down Expand Up @@ -137,7 +137,7 @@ pub mod internals {
/// The set of distances implementing the [`Distance`] and supported by arroy.
pub mod distances {
pub use crate::distance::{
Angular, BinaryQuantizedAngular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan,
BinaryQuantizedCosine, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, Cosine,
DotProduct, Euclidean, Manhattan,
};
}
Expand Down
6 changes: 3 additions & 3 deletions src/tests/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::num::NonZeroUsize;
use roaring::RoaringBitmap;

use super::*;
use crate::distance::Angular;
use crate::distance::Cosine;
use crate::distances::{Euclidean, Manhattan};
use crate::{ItemId, Reader, Writer};

Expand Down Expand Up @@ -77,7 +77,7 @@ fn open_db_with_wrong_distance() {
#[test]
fn search_in_db_with_a_single_vector() {
// https://github.com/meilisearch/meilisearch/pull/4296
let handle = create_database::<Angular>();
let handle = create_database::<Cosine>();
let mut wtxn = handle.env.write_txn().unwrap();
let writer = Writer::new(handle.database, 0, 3);
writer.add_item(&mut wtxn, 0, &[0.00397, 0.553, 0.0]).unwrap();
Expand All @@ -86,7 +86,7 @@ fn search_in_db_with_a_single_vector() {
wtxn.commit().unwrap();

let rtxn = handle.env.read_txn().unwrap();
let reader = Reader::<Angular>::open(&rtxn, 0, handle.database).unwrap();
let reader = Reader::<Cosine>::open(&rtxn, 0, handle.database).unwrap();

let ret = reader.nns_by_item(&rtxn, 0, 1, None, None, None).unwrap();
insta::assert_snapshot!(NnsRes(ret), @r###"
Expand Down
14 changes: 7 additions & 7 deletions src/tests/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use rand::seq::SliceRandom;
use rand::Rng;

use super::{create_database, rng};
use crate::distance::{Angular, BinaryQuantizedAngular, DotProduct, Euclidean};
use crate::distance::{BinaryQuantizedCosine, Cosine, DotProduct, Euclidean};
use crate::{Database, Reader, Writer};

#[test]
Expand Down Expand Up @@ -507,7 +507,7 @@ fn delete_one_leaf_in_a_split() {

#[test]
fn delete_one_item_in_a_single_document_database() {
let handle = create_database::<Angular>();
let handle = create_database::<Cosine>();
let mut rng = rng();
let mut wtxn = handle.env.write_txn().unwrap();
let writer = Writer::new(handle.database, 0, 2);
Expand All @@ -520,9 +520,9 @@ fn delete_one_item_in_a_single_document_database() {
insta::assert_snapshot!(handle, @r###"
==================
Dumping index 0
Item 0: Leaf(Leaf { header: NodeHeaderAngular { norm: 0.0 }, vector: [0.0000, 0.0000] })
Item 0: Leaf(Leaf { header: NodeHeaderCosine { norm: 0.0 }, vector: [0.0000, 0.0000] })
Tree 0: Descendants(Descendants { descendants: [0] })
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "angular" }
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "cosine" }
"###);

let mut wtxn = handle.env.write_txn().unwrap();
Expand All @@ -536,7 +536,7 @@ fn delete_one_item_in_a_single_document_database() {
insta::assert_snapshot!(handle, @r###"
==================
Dumping index 0
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "angular" }
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "cosine" }
"###);
}

Expand Down Expand Up @@ -1006,7 +1006,7 @@ fn need_build() {

#[test]
fn prepare_changing_distance() {
let handle = create_database::<Angular>();
let handle = create_database::<Cosine>();
let mut rng = rng();
let mut wtxn = handle.env.write_txn().unwrap();
let writer = Writer::new(handle.database, 0, 2);
Expand All @@ -1029,7 +1029,7 @@ fn prepare_changing_distance() {
let mut wtxn = handle.env.write_txn().unwrap();
let writer = Writer::new(handle.database, 1, 2);

let writer = writer.prepare_changing_distance::<BinaryQuantizedAngular>(&mut wtxn).unwrap();
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(&mut wtxn).unwrap();
assert!(writer.need_build(&wtxn).unwrap(), "after changing the distance");

writer.build(&mut wtxn, &mut rng, None).unwrap();
Expand Down
Loading