From 54353a4e6b550450c47808e1ae969fa3e3b81e13 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Wed, 22 Nov 2023 19:20:08 +0100 Subject: [PATCH 01/20] refactor `FzfV2` to use a `Candidate` enum --- src/algos/fzf/candidate.rs | 111 ++++++++++ src/algos/fzf/common.rs | 5 +- src/algos/fzf/mod.rs | 3 + src/algos/fzf/query.rs | 26 ++- src/algos/fzf/scoring.rs | 2 +- src/algos/fzf/slab.rs | 134 +++++------ src/algos/fzf/v1.rs | 133 +++-------- src/algos/fzf/v2.rs | 444 +++++++++++++------------------------ src/candidate.rs | 299 +++++++++++++++++++++++++ src/lib.rs | 2 + src/opts.rs | 135 ----------- src/utils.rs | 10 + 12 files changed, 700 insertions(+), 604 deletions(-) create mode 100644 src/algos/fzf/candidate.rs create mode 100644 src/candidate.rs diff --git a/src/algos/fzf/candidate.rs b/src/algos/fzf/candidate.rs new file mode 100644 index 0000000..c031740 --- /dev/null +++ b/src/algos/fzf/candidate.rs @@ -0,0 +1,111 @@ +use super::*; +use crate::utils::*; +use crate::{Candidate, CandidateMatches}; + +/// TODO: docs +pub(super) struct CandidateV2<'a> { + /// TODO: docs + bonuses: &'a mut [Bonus], + + /// TODO: docs + base: Candidate<'a>, + + /// TODO: docs + opts: CandidateOpts, +} + +/// TODO: docs +#[derive(Clone, Copy)] +pub(super) struct CandidateOpts { + /// TODO: docs + pub char_eq: CharEq, + + /// TODO: docs + pub is_case_sensitive: bool, +} + +impl Default for CandidateOpts { + #[inline(always)] + fn default() -> Self { + Self { char_eq: char_eq(false, false), is_case_sensitive: false } + } +} + +impl CandidateOpts { + #[inline(always)] + pub fn new(is_case_sensitive: bool, is_normalized: bool) -> Self { + Self { + char_eq: char_eq(is_case_sensitive, is_normalized), + is_case_sensitive, + } + } +} + +impl<'a> CandidateV2<'a> { + #[inline(always)] + pub fn bonus_at(&mut self, char_idx: usize, scheme: &Scheme) -> Score { + let bonus = &mut self.bonuses[char_idx]; + + if bonus.is_set() { + return bonus.value(); + } + + let prev_class = if char_idx == 0 { + scheme.initial_char_class + } else { + char_class(self.char(char_idx - 1), scheme) + }; + + let this_class = char_class(self.char(char_idx), scheme); + + let bonus = &mut self.bonuses[char_idx]; + + bonus.set(compute_bonus(prev_class, this_class, scheme)); + + bonus.value() + } + + #[inline(always)] + pub fn char(&self, char_idx: usize) -> char { + self.base.char(char_idx) + } + + #[inline(always)] + pub fn char_len(&self) -> usize { + self.base.char_len() + } + + #[inline(always)] + pub fn into_base(self) -> Candidate<'a> { + self.base + } + + #[inline(always)] + pub fn matches(&self, ch: char) -> CandidateMatches<'a> { + self.base.matches(ch, self.opts.is_case_sensitive, self.opts.char_eq) + } + + #[inline(always)] + pub fn matches_from( + &self, + char_offset: usize, + ch: char, + ) -> CandidateMatches<'a> { + self.base.matches_from( + char_offset, + ch, + self.opts.is_case_sensitive, + self.opts.char_eq, + ) + } + + #[inline(always)] + pub fn new( + base: Candidate<'a>, + bonus_slab: &'a mut BonusSlab, + opts: CandidateOpts, + ) -> Self { + let bonuses = bonus_slab.alloc(base.char_len()); + Self { base, bonuses, opts } + } +} diff --git a/src/algos/fzf/common.rs b/src/algos/fzf/common.rs index 24d9029..5838893 100644 --- a/src/algos/fzf/common.rs +++ b/src/algos/fzf/common.rs @@ -45,7 +45,7 @@ pub(super) fn calculate_score( if opts.char_eq(pattern_char, candidate_ch) { score += bonus::MATCH; - let mut bonus = bonus(prev_class, ch_class, scheme); + let mut bonus = compute_bonus(prev_class, ch_class, scheme); if consecutive == 0 { first_bonus = bonus; @@ -146,7 +146,8 @@ pub(super) fn exact_match( if pattern_char_idx == 0 { bonus_start = current_start_offset + byte_offset; start_offset += byte_offset + candidate_ch.len_utf8(); - current_bonus = bonus(prev_char_class, char_class, scheme); + current_bonus = + compute_bonus(prev_char_class, char_class, scheme); } pattern_char_idx += 1; diff --git a/src/algos/fzf/mod.rs b/src/algos/fzf/mod.rs index 19dd0b8..3bfdeed 100644 --- a/src/algos/fzf/mod.rs +++ b/src/algos/fzf/mod.rs @@ -43,6 +43,7 @@ //! [fzf]: https://github.com/junegunn/fzf //! [extended-search]: https://github.com/junegunn/fzf#search-syntax +mod candidate; mod common; mod distance; mod parser; @@ -55,6 +56,7 @@ mod v1; #[cfg(feature = "fzf-v1")] mod v2; +use candidate::*; use common::*; pub use distance::FzfDistance; use distance::*; @@ -64,6 +66,7 @@ pub use scheme::FzfScheme; #[doc(hidden)] pub use scheme::Scheme; use scoring::*; +use slab::*; #[cfg(feature = "fzf-v1")] pub use v1::FzfV1; #[cfg(feature = "fzf-v1")] diff --git a/src/algos/fzf/query.rs b/src/algos/fzf/query.rs index 4f62ab3..a857adc 100644 --- a/src/algos/fzf/query.rs +++ b/src/algos/fzf/query.rs @@ -4,10 +4,10 @@ use super::*; use crate::*; /// TODO: docs -type FuzzyAlgo = fn( +type FuzzyAlgo = fn( Pattern, - &str, - O, + Candidate, + CandidateOpts, &Scheme, Option<&mut MatchedRanges>, T, @@ -300,14 +300,14 @@ impl<'a> Pattern<'a> { /// TODO: docs #[inline] - pub(super) fn score( + pub(super) fn score( self, - candidate: &str, - opts: O, + candidate: Candidate, + opts: CandidateOpts, scheme: &Scheme, mut ranges_buf: Option<&mut MatchedRanges>, extra: E, - fuzzy_algo: FuzzyAlgo, + fuzzy_algo: FuzzyAlgo, ) -> Option { if self.is_inverse { ranges_buf = None; @@ -319,19 +319,23 @@ impl<'a> Pattern<'a> { }, MatchType::Exact => { - exact_match(self, candidate, opts, scheme, ranges_buf) + todo!() + // exact_match(self, candidate, opts, scheme, ranges_buf) }, MatchType::PrefixExact => { - prefix_match(self, candidate, opts, scheme, ranges_buf) + todo!() + // prefix_match(self, candidate, opts, scheme, ranges_buf) }, MatchType::SuffixExact => { - suffix_match(self, candidate, opts, scheme, ranges_buf) + todo!() + // suffix_match(self, candidate, opts, scheme, ranges_buf) }, MatchType::EqualExact => { - equal_match(self, candidate, opts, scheme, ranges_buf) + todo!() + // equal_match(self, candidate, opts, scheme, ranges_buf) }, }; diff --git a/src/algos/fzf/scoring.rs b/src/algos/fzf/scoring.rs index c508bef..cdbee33 100644 --- a/src/algos/fzf/scoring.rs +++ b/src/algos/fzf/scoring.rs @@ -75,7 +75,7 @@ fn non_ascii_char_class(ch: char, scheme: &Scheme) -> CharClass { /// TODO: docs #[inline] -pub(super) fn bonus( +pub(super) fn compute_bonus( prev_class: CharClass, next_class: CharClass, scheme: &Scheme, diff --git a/src/algos/fzf/slab.rs b/src/algos/fzf/slab.rs index 57fac37..7e9699e 100644 --- a/src/algos/fzf/slab.rs +++ b/src/algos/fzf/slab.rs @@ -1,4 +1,4 @@ -use core::ops::{AddAssign, Index, IndexMut, SubAssign}; +use core::ops::{Index, IndexMut}; use super::Score; @@ -6,7 +6,7 @@ use super::Score; #[derive(Clone, Default)] pub(super) struct V2Slab { /// TODO: docs - pub(super) bonus_vector: BonusVectorSlab, + pub(super) bonus: BonusSlab, /// TODO: docs pub(super) consecutive_matrix: MatrixSlab, @@ -18,108 +18,116 @@ pub(super) struct V2Slab { pub(super) scoring_matrix: MatrixSlab, } +// #[repr(align(8))] /// TODO: docs -#[derive(Clone)] -pub(super) struct MatchedIndicesSlab { - vec: Vec, +#[derive(Clone, Default)] +pub(super) struct Bonus { + value: u8, + is_set: bool, } -impl Default for MatchedIndicesSlab { - #[inline] - fn default() -> Self { - Self { vec: vec![MatchedIdx::default(); 16] } +impl Bonus { + #[inline(always)] + pub fn is_set(&self) -> bool { + self.is_set } -} -impl MatchedIndicesSlab { - #[inline] - /// TODO: docs - pub fn alloc(&mut self, len: usize) -> &mut [MatchedIdx] { - if len > self.vec.len() { - self.vec.resize(len, MatchedIdx::default()); - } + #[inline(always)] + pub fn set(&mut self, value: Score) { + self.value = value as _; + self.is_set = true; + } - &mut self.vec[..len] + #[inline(always)] + pub fn value(&self) -> Score { + self.value as _ } } /// TODO: docs -#[derive(Copy, Clone, Debug, Default)] -pub(super) struct MatchedIdx { - /// TODO: docs - pub(super) byte_offset: usize, - - /// TODO: docs - pub(super) char_offset: usize, +#[derive(Clone)] +pub(super) struct BonusSlab { + vec: Vec, } -impl AddAssign for MatchedIdx { +impl Default for BonusSlab { #[inline(always)] - fn add_assign(&mut self, rhs: Self) { - self.byte_offset += rhs.byte_offset; - self.char_offset += rhs.char_offset; + fn default() -> Self { + Self { vec: vec![Bonus::default(); 128] } } } -impl SubAssign for MatchedIdx { - #[inline(always)] - fn sub_assign(&mut self, rhs: Self) { - self.byte_offset -= rhs.byte_offset; - self.char_offset -= rhs.char_offset; +impl BonusSlab { + /// TODO: docs + #[inline] + pub fn alloc<'a>(&'a mut self, len: usize) -> &'a mut [Bonus] { + if len > self.vec.len() { + self.vec.resize(len, Bonus::default()); + } + + let slice = &mut self.vec[..len]; + + for bonus in slice.iter_mut() { + bonus.is_set = false; + } + + slice } } /// TODO: docs #[derive(Clone)] -pub(super) struct BonusVectorSlab { - vec: Vec, +pub(super) struct CandidateSlab { + chars: Vec, } -impl Default for BonusVectorSlab { - #[inline] +impl Default for CandidateSlab { + #[inline(always)] fn default() -> Self { - Self { vec: vec![0; 16] } + Self { chars: vec![char::default(); 128] } } } -impl BonusVectorSlab { - /// TODO: docs - #[inline] - pub fn alloc<'a>(&'a mut self, candidate: &str) -> BonusVector<'a> { - let byte_len = candidate.len(); +impl CandidateSlab { + #[inline(always)] + pub fn alloc<'a>(&'a mut self, text: &str) -> &'a [char] { + if text.len() > self.chars.len() { + self.chars.resize(text.len(), char::default()); + } + + let mut char_len = 0; - if byte_len > self.vec.len() { - self.vec.resize(byte_len, 0); + for ch in text.chars() { + self.chars[char_len] = ch; + char_len += 1; } - BonusVector { indices: &mut self.vec[..byte_len], len: 0 } + &self.chars[..char_len] } } /// TODO: docs -pub(super) struct BonusVector<'a> { - indices: &'a mut [Score], - len: usize, +#[derive(Clone)] +pub(super) struct MatchedIndicesSlab { + vec: Vec, } -impl core::fmt::Debug for BonusVector<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.indices[..self.len].fmt(f) +impl Default for MatchedIndicesSlab { + #[inline] + fn default() -> Self { + Self { vec: vec![0; 128] } } } -impl<'a> BonusVector<'a> { - /// TODO: docs +impl MatchedIndicesSlab { #[inline] - pub fn into_slice(self) -> &'a [Score] { - &self.indices[..self.len] - } - /// TODO: docs - #[inline] - pub fn push(&mut self, score: Score) { - self.indices[self.len] = score; - self.len += 1; + pub fn alloc(&mut self, len: usize) -> &mut [usize] { + if len > self.vec.len() { + self.vec.resize(len, 0); + } + + &mut self.vec[..len] } } diff --git a/src/algos/fzf/v1.rs b/src/algos/fzf/v1.rs index 019b24a..2745fad 100644 --- a/src/algos/fzf/v1.rs +++ b/src/algos/fzf/v1.rs @@ -50,8 +50,7 @@ impl FzfV1 { fn score( &mut self, pattern: Pattern, - candidate: &str, - is_candidate_ascii: bool, + candidate: Candidate, buf: Option<&mut MatchedRanges>, ) -> Option { let is_sensitive = match self.case_sensitivity { @@ -60,25 +59,9 @@ impl FzfV1 { CaseSensitivity::Smart => pattern.has_uppercase, }; - if is_candidate_ascii { - fzf_v1( - pattern, - candidate, - AsciiCandidateOpts::new(is_sensitive), - &self.scheme, - buf, - (), - ) - } else { - fzf_v1( - pattern, - candidate, - UnicodeCandidateOpts::new(is_sensitive, self.normalization), - &self.scheme, - buf, - (), - ) - } + let opts = CandidateOpts::new(is_sensitive, self.normalization); + + fzf_v1(pattern, candidate, opts, &self.scheme, buf, ()) } /// TODO: docs @@ -121,79 +104,10 @@ impl Metric for FzfV1 { #[inline(always)] fn distance( &mut self, - query: FzfQuery<'_>, - candidate: &str, + _query: FzfQuery<'_>, + _candidate: &str, ) -> Option> { - if query.is_empty() { - return Some(Match::default()); - } - - let is_candidate_ascii = candidate.is_ascii(); - - let mut buf = if self.with_matched_ranges { - Some(MatchedRanges::default()) - } else { - None - }; - - let conditions = match query.search_mode { - SearchMode::Extended(conditions) => conditions, - - SearchMode::NotExtended(pattern) => { - return self - .score( - pattern, - candidate, - is_candidate_ascii, - buf.as_mut(), - ) - .map(FzfDistance::from_score) - .map(|distance| { - Match::new(distance, buf.unwrap_or_default()) - }) - }, - }; - - let mut total_score = 0; - - for condition in conditions { - let score = condition.iter().find_map(|pattern| { - let is_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - if is_candidate_ascii { - pattern.score( - candidate, - AsciiCandidateOpts::new(is_sensitive), - &self.scheme, - buf.as_mut(), - (), - fzf_v1, - ) - } else { - pattern.score( - candidate, - UnicodeCandidateOpts::new( - is_sensitive, - self.normalization, - ), - &self.scheme, - buf.as_mut(), - (), - fzf_v1, - ) - } - })?; - - total_score += score; - } - - let distance = FzfDistance::from_score(total_score); - - Some(Match::new(distance, buf.unwrap_or_default())) + todo!(); } #[inline] @@ -211,27 +125,30 @@ impl Metric for FzfV1 { #[inline] pub(super) fn fzf_v1( pattern: Pattern, - candidate: &str, - opts: impl Opts, - scheme: &Scheme, - ranges_buf: Option<&mut MatchedRanges>, + _candidate: Candidate, + _opts: CandidateOpts, + _scheme: &Scheme, + _ranges_buf: Option<&mut MatchedRanges>, _: (), ) -> Option { + // TODO: can we remove this? if pattern.is_empty() { return Some(0); } - let range_forward = forward_pass(pattern, candidate, opts)?; - - let start_backward = - backward_pass(pattern, &candidate[range_forward.clone()], opts); - - let range = range_forward.start + start_backward..range_forward.end; - - let score = - calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); - - Some(score) + todo!(); + + // let range_forward = forward_pass(pattern, candidate, opts)?; + // + // let start_backward = + // backward_pass(pattern, &candidate[range_forward.clone()], opts); + // + // let range = range_forward.start + start_backward..range_forward.end; + // + // let score = + // calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); + // + // Some(score) } /// TODO: docs diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/v2.rs index 7036545..91fdfa7 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/v2.rs @@ -1,13 +1,15 @@ use core::ops::Range; -use super::{query::*, scoring::*, slab::*, *}; -use crate::Opts; +use super::{query::*, slab::*, *}; use crate::*; /// TODO: docs #[cfg_attr(docsrs, doc(cfg(feature = "fzf-v2")))] #[derive(Clone, Default)] pub struct FzfV2 { + /// TODO: docs + candidate_slab: CandidateSlab, + /// TODO: docs case_sensitivity: CaseSensitivity, @@ -49,42 +51,6 @@ impl FzfV2 { &self.scheme } - /// TODO: docs - #[inline(always)] - fn score( - &mut self, - pattern: Pattern, - candidate: &str, - is_candidate_ascii: bool, - buf: Option<&mut MatchedRanges>, - ) -> Option { - let is_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - if is_candidate_ascii { - fzf_v2( - pattern, - candidate, - AsciiCandidateOpts::new(is_sensitive), - &self.scheme, - buf, - &mut self.slab, - ) - } else { - fzf_v2( - pattern, - candidate, - UnicodeCandidateOpts::new(is_sensitive, self.normalization), - &self.scheme, - buf, - &mut self.slab, - ) - } - } - /// TODO: docs #[inline(always)] pub fn with_case_sensitivity( @@ -132,7 +98,12 @@ impl Metric for FzfV2 { return Some(Match::default()); } - let is_candidate_ascii = candidate.is_ascii(); + let candidate = if candidate.is_ascii() { + Candidate::Ascii(candidate.as_bytes()) + } else { + let chars = self.candidate_slab.alloc(candidate); + Candidate::Unicode(chars) + }; let mut buf = if self.with_matched_ranges { Some(MatchedRanges::default()) @@ -144,17 +115,24 @@ impl Metric for FzfV2 { SearchMode::Extended(conditions) => conditions, SearchMode::NotExtended(pattern) => { - return self - .score( - pattern, - candidate, - is_candidate_ascii, - buf.as_mut(), - ) - .map(FzfDistance::from_score) - .map(|distance| { - Match::new(distance, buf.unwrap_or_default()) - }) + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; + + let score = fzf_v2( + pattern, + candidate, + CandidateOpts::new(is_sensitive, self.normalization), + &self.scheme, + None, + &mut self.slab, + )?; + + let distance = FzfDistance::from_score(score); + + return Some(Match::new(distance, buf.unwrap_or_default())); }, }; @@ -168,28 +146,14 @@ impl Metric for FzfV2 { CaseSensitivity::Smart => pattern.has_uppercase, }; - if is_candidate_ascii { - pattern.score( - candidate, - AsciiCandidateOpts::new(is_sensitive), - &self.scheme, - buf.as_mut(), - &mut self.slab, - fzf_v2, - ) - } else { - pattern.score( - candidate, - UnicodeCandidateOpts::new( - is_sensitive, - self.normalization, - ), - &self.scheme, - buf.as_mut(), - &mut self.slab, - fzf_v2, - ) - } + pattern.score( + candidate, + CandidateOpts::new(is_sensitive, self.normalization), + &self.scheme, + buf.as_mut(), + &mut self.slab, + fzf_v2, + ) })?; total_score += score; @@ -215,60 +179,45 @@ impl Metric for FzfV2 { #[inline] pub(super) fn fzf_v2( pattern: Pattern, - candidate: &str, - opts: impl Opts, + candidate: Candidate, + opts: CandidateOpts, scheme: &Scheme, ranges_buf: Option<&mut MatchedRanges>, slab: &mut V2Slab, ) -> Option { + // TODO: can we remove this? if pattern.is_empty() { return Some(0); } - let (matches, last_match_offset) = + let (match_offsets, last_match_offset) = matches(&mut slab.matched_indices, pattern, candidate, opts)?; - let first_match = matches[0]; - - let initial_char_class = candidate[..first_match.byte_offset] - .chars() - .next_back() - .map(|ch| char_class(ch, scheme)) - .unwrap_or(scheme.initial_char_class); + let first_offset = match_offsets[0]; - let candidate = &candidate[first_match.byte_offset..last_match_offset]; + let mut candidate = CandidateV2::new( + candidate.slice(first_offset, last_match_offset), + &mut slab.bonus, + opts, + ); // After slicing the candidate we need to move all the offsets back - // by the offsets of the first match so that they still refer to the - // characters. - matches.iter_mut().for_each(|mach| *mach -= first_match); - - let bonus_vector = compute_bonuses( - &mut slab.bonus_vector, - candidate, - initial_char_class, - scheme, - ); + // by the offset of the first match so that they still refer to the + // same characters. + match_offsets.iter_mut().for_each(|offset| *offset -= first_offset); let (scores, consecutive, score, score_cell) = score( &mut slab.scoring_matrix, &mut slab.consecutive_matrix, pattern, - candidate, - matches, - bonus_vector, - opts, + &mut candidate, + match_offsets, + scheme, ); if let Some(buf) = ranges_buf { - matched_ranges( - scores, - consecutive, - score_cell, - candidate, - first_match.byte_offset, - buf, - ); + let candidate = candidate.into_base(); + matched_ranges(scores, consecutive, score_cell, candidate, 0, buf); }; Some(score) @@ -279,35 +228,28 @@ pub(super) fn fzf_v2( fn matches<'idx>( indices_slab: &'idx mut MatchedIndicesSlab, pattern: Pattern, - mut candidate: &str, - opts: impl Opts, -) -> Option<(&'idx mut [MatchedIdx], usize)> { - let matched_idxs = indices_slab.alloc(pattern.char_len()); + candidate: Candidate, + opts: CandidateOpts, +) -> Option<(&'idx mut [usize], usize)> { + let match_offsets = indices_slab.alloc(pattern.char_len()); let mut pattern_char_idx = 0; - let mut last_matched_idx = MatchedIdx::default(); + let mut last_match_offset = 0; loop { let pattern_char = pattern.char(pattern_char_idx); - let (byte_offset, matched_char_byte_len) = - opts.find_first(pattern_char, candidate)?; - - let char_offset = opts.to_char_offset(candidate, byte_offset); - - last_matched_idx += MatchedIdx { byte_offset, char_offset }; + last_match_offset = candidate.find_first_from( + last_match_offset, + pattern_char, + opts.is_case_sensitive, + opts.char_eq, + )?; - matched_idxs[pattern_char_idx] = last_matched_idx; + match_offsets[pattern_char_idx] = last_match_offset; - // SAFETY: the start of the range is within the byte length of the - // candidate and it's a valid char boundary. - candidate = unsafe { - candidate.get_unchecked(byte_offset + matched_char_byte_len..) - }; - - last_matched_idx += - MatchedIdx { byte_offset: matched_char_byte_len, char_offset: 1 }; + last_match_offset += 1; if pattern_char_idx + 1 < pattern.char_len() { pattern_char_idx += 1; @@ -316,37 +258,16 @@ fn matches<'idx>( } } - let last_char_offset_inclusive = last_matched_idx.byte_offset - + if let Some((byte_offset, matched_char_byte_len)) = - opts.find_last(pattern.char(pattern_char_idx), candidate) - { - byte_offset + matched_char_byte_len - } else { - 0 - }; + let last_char_offset_inclusive = candidate + .find_last( + pattern.char(pattern_char_idx), + opts.is_case_sensitive, + opts.char_eq, + ) + .unwrap() + + 1; - Some((matched_idxs, last_char_offset_inclusive)) -} - -/// TODO: docs -#[inline] -fn compute_bonuses<'bonus>( - bonus_slab: &'bonus mut BonusVectorSlab, - candidate: &str, - initial_char_class: CharClass, - scheme: &Scheme, -) -> &'bonus [Score] { - let mut prev_class = initial_char_class; - - let mut bonuses = bonus_slab.alloc(candidate); - - for ch in candidate.chars() { - let char_class = char_class(ch, scheme); - bonuses.push(bonus(prev_class, char_class, scheme)); - prev_class = char_class; - } - - bonuses.into_slice() + Some((match_offsets, last_char_offset_inclusive)) } /// TODO: docs @@ -355,15 +276,12 @@ fn score<'scoring, 'consecutive>( scoring_slab: &'scoring mut MatrixSlab, consecutive_slab: &'consecutive mut MatrixSlab, pattern: Pattern, - candidate: &str, - matches: &[MatchedIdx], - bonus_vector: &[Score], - opts: impl Opts, + candidate: &mut CandidateV2, + matches: &[usize], + scheme: &Scheme, ) -> (Matrix<'scoring, Score>, Matrix<'consecutive, usize>, Score, MatrixCell) { - // The length of the bonus slice is the same as the character length of the - // candidate. - let matrix_width = bonus_vector.len(); + let matrix_width = candidate.char_len(); let matrix_height = pattern.char_len(); @@ -375,20 +293,18 @@ fn score<'scoring, 'consecutive>( let (max_score, max_score_cell) = score_first_row( scoring_matrix.row_mut(0), consecutive_matrix.row_mut(0), - bonus_vector, pattern.char(0), candidate, - opts, + scheme, ); let (max_score, max_score_cell) = score_remaining_rows( &mut scoring_matrix, &mut consecutive_matrix, pattern, - matches, candidate, - bonus_vector, - opts, + scheme, + matches, max_score, max_score_cell, ); @@ -401,10 +317,9 @@ fn score<'scoring, 'consecutive>( fn score_first_row( scores_first_row: &mut Row, consecutives_first_row: &mut Row, - bonus_vector: &[Score], first_pattern_char: char, - mut candidate: &str, - opts: impl Opts, + candidate: &mut CandidateV2, + scheme: &Scheme, ) -> (Score, MatrixCell) { let mut max_score: Score = 0; @@ -412,57 +327,45 @@ fn score_first_row( let mut max_score_col: usize = 0; - // TODO: docs - let mut col = 0; - - // TODO: explain what this does. - let mut penalty = penalty::GAP_START; - - while !candidate.is_empty() { - let Some((byte_offset, matched_char_byte_len)) = - opts.find_first(first_pattern_char, candidate) - else { - for col in col..scores_first_row.len() { - let score = prev_score.saturating_sub(penalty); - scores_first_row[col] = score; - prev_score = score; - penalty = penalty::GAP_EXTENSION; - } + let mut column = 0; - break; - }; + let mut penalty; - let char_offset = opts.to_char_offset(candidate, byte_offset); + for char_offset in candidate.matches(first_pattern_char) { + penalty = penalty::GAP_START; - // TODO: explain what this does. - { - for col in col..col + char_offset { - let score = prev_score.saturating_sub(penalty); - scores_first_row[col] = score; - prev_score = score; - penalty = penalty::GAP_EXTENSION; - } + for col in column..column + char_offset { + let score = prev_score.saturating_sub(penalty); + scores_first_row[col] = score; + prev_score = score; + penalty = penalty::GAP_EXTENSION; } - col += char_offset; + column = char_offset; - consecutives_first_row[col] = 1; + consecutives_first_row[column] = 1; let score = bonus::MATCH - + (bonus_vector[col] * bonus::FIRST_QUERY_CHAR_MULTIPLIER); + + (candidate.bonus_at(column, scheme) + * bonus::FIRST_QUERY_CHAR_MULTIPLIER); + + scores_first_row[column] = score; if score > max_score { max_score = score; - max_score_col = col; + max_score_col = column; } - scores_first_row[col] = score; - prev_score = score; + } - col += 1; + penalty = penalty::GAP_START; - candidate = &candidate[byte_offset + matched_char_byte_len..]; + for col in column..scores_first_row.len() { + let score = prev_score.saturating_sub(penalty); + scores_first_row[col] = score; + prev_score = score; + penalty = penalty::GAP_EXTENSION; } (max_score, MatrixCell(max_score_col)) @@ -474,10 +377,9 @@ fn score_remaining_rows( scores: &mut Matrix<'_, Score>, consecutives: &mut Matrix<'_, usize>, pattern: Pattern, - matches: &[MatchedIdx], - candidate: &str, - bonus_vector: &[Score], - opts: impl Opts, + candidate: &mut CandidateV2, + scheme: &Scheme, + matches: &[usize], mut max_score: Score, mut max_score_cell: MatrixCell, ) -> (Score, MatrixCell) { @@ -492,90 +394,70 @@ fn score_remaining_rows( let (prev_consecutives_row, consecutives_row) = consecutives.two_rows_mut(row_idx - 1, row_idx); - let matched_idx = matches[row_idx]; - - let mut column = matched_idx.char_offset; - - let mut candidate = &candidate[matched_idx.byte_offset..]; - - // TODO: explain what this does. - let mut penalty = penalty::GAP_START; - - while !candidate.is_empty() { - let Some((byte_offset, matched_char_byte_len)) = - opts.find_first(pattern_char, candidate) - else { - for col in column..matrix_width { - let score_left = scores_row[col - 1]; - let score = score_left.saturating_sub(penalty); - scores_row[col] = score; - penalty = penalty::GAP_EXTENSION; - } + let first_match_offset = matches[row_idx]; - break; - }; + let mut column = first_match_offset; - let char_offset = opts.to_char_offset(candidate, byte_offset); + let mut penalty; - // TODO: explain what this does. + for char_offset in + candidate.matches_from(first_match_offset, pattern_char) + { penalty = penalty::GAP_START; - { - for col in column..column + char_offset { - let score_left = scores_row[col - 1]; - let score = score_left.saturating_sub(penalty); - scores_row[col] = score; - penalty = penalty::GAP_EXTENSION; - } + for col in column..column + char_offset { + let score_left = scores_row[col - 1]; + let score = score_left.saturating_sub(penalty); + scores_row[col] = score; + penalty = penalty::GAP_EXTENSION; } column += char_offset; - // TODO: explain what this does. - { - let score_left = - scores_row[column - 1].saturating_sub(penalty); + let score_left = scores_row[column - 1].saturating_sub(penalty); - let mut score_up_left = - prev_scores_row[column - 1] + bonus::MATCH; + let mut score_up_left = prev_scores_row[column - 1] + bonus::MATCH; - let mut bonus = bonus_vector[column]; + let mut bonus = candidate.bonus_at(column, scheme); - let mut consecutive = prev_consecutives_row[column - 1] + 1; + let mut consecutive = prev_consecutives_row[column - 1] + 1; - if consecutive > 1 { - let fb = bonus_vector[column + 1 - consecutive]; + if consecutive > 1 { + let fb = candidate.bonus_at(column + 1 - consecutive, scheme); - if bonus >= bonus::BOUNDARY && bonus > fb { - consecutive = 1; - } else { - bonus = bonus::CONSECUTIVE.max(fb).max(bonus); - } + if bonus >= bonus::BOUNDARY && bonus > fb { + consecutive = 1; + } else { + bonus = bonus::CONSECUTIVE.max(fb).max(bonus); } + } - score_up_left += if score_up_left + bonus < score_left { - consecutive = 0; - bonus_vector[column] - } else { - bonus - }; + score_up_left += if score_up_left + bonus < score_left { + consecutive = 0; + candidate.bonus_at(column, scheme) + } else { + bonus + }; - let score = score_left.max(score_up_left); + let score = score_left.max(score_up_left); - if score > max_score { - max_score = score; - max_score_cell = - MatrixCell(row_idx * matrix_width + column); - } + if score > max_score { + max_score = score; + max_score_cell = MatrixCell(row_idx * matrix_width + column); + } - consecutives_row[column] = consecutive; + consecutives_row[column] = consecutive; - scores_row[column] = score; - } + scores_row[column] = score; + } - column += 1; + penalty = penalty::GAP_START; - candidate = &candidate[byte_offset + matched_char_byte_len..]; + for col in column..matrix_width { + let score_left = scores_row[col - 1]; + let score = score_left.saturating_sub(penalty); + scores_row[col] = score; + penalty = penalty::GAP_EXTENSION; } } @@ -588,16 +470,14 @@ fn matched_ranges( scores: Matrix, consecutives: Matrix, max_score_cell: MatrixCell, - candidate: &str, - start_offset: usize, + candidate: Candidate, + start_byte_offset: usize, ranges: &mut MatchedRanges, ) { let mut prefer_match = true; let mut cell = max_score_cell; - let mut char_indices = candidate.char_indices().rev().enumerate(); - loop { let score = scores[cell]; @@ -635,17 +515,13 @@ fn matched_ranges( { let col = scores.col_of(cell); - let (mut offset, ch) = char_indices - .by_ref() - .find_map(|(back_idx, ch)| { - let idx = scores.width() - back_idx - 1; - (idx == col).then_some(ch) - }) - .unwrap(); + let mut byte_offset = candidate.to_byte_offset(col); + + let ch = candidate.char(col); - offset += start_offset; + byte_offset += start_byte_offset; - ranges.insert(offset..offset + ch.len_utf8()); + ranges.insert(byte_offset..byte_offset + ch.len_utf8()); if let Some(up_left) = cell_up_left { cell = up_left; diff --git a/src/candidate.rs b/src/candidate.rs new file mode 100644 index 0000000..9d1eb6c --- /dev/null +++ b/src/candidate.rs @@ -0,0 +1,299 @@ +use crate::utils::*; + +/// TODO: docs +#[derive(Copy, Clone)] +pub(crate) enum Candidate<'a> { + Ascii(&'a [u8]), + Unicode(&'a [char]), +} + +impl<'a> Candidate<'a> { + /// TODO: docs + #[inline(always)] + pub fn char(&self, char_idx: usize) -> char { + match self { + Candidate::Ascii(candidate) => candidate[char_idx] as _, + Candidate::Unicode(candidate) => candidate[char_idx], + } + } + + /// TODO: docs + #[inline(always)] + pub fn char_len(&self) -> usize { + match self { + Candidate::Ascii(slice) => slice.len(), + Candidate::Unicode(slice) => slice.len(), + } + } + + /// TODO: docs + #[inline(always)] + pub fn find_first_from( + &self, + char_offset: usize, + ch: char, + is_case_sensitive: bool, + char_eq: CharEq, + ) -> Option { + match self { + Candidate::Ascii(slice) => { + if !ch.is_ascii() { + return None; + } + + let slice = &slice[char_offset..]; + + find_first_ascii(ch as _, slice, is_case_sensitive) + .map(|offset| offset + char_offset) + }, + + Candidate::Unicode(slice) => { + let slice = &slice[char_offset..]; + + find_first_unicode(ch, slice, char_eq) + .map(|idx| idx + char_offset) + }, + } + } + + /// TODO: docs + #[inline(always)] + pub fn find_last( + &self, + ch: char, + is_case_sensitive: bool, + char_eq: CharEq, + ) -> Option { + match self { + Candidate::Ascii(slice) => { + if ch.is_ascii() { + find_last_ascii(ch as _, slice, is_case_sensitive) + } else { + None + } + }, + + Candidate::Unicode(slice) => find_last_unicode(ch, slice, char_eq), + } + } + + /// TODO: docs + #[inline(always)] + pub fn matches( + &self, + ch: char, + is_case_sensitive: bool, + char_eq: CharEq, + ) -> CandidateMatches<'a> { + match self { + Candidate::Ascii(slice) => { + CandidateMatches::from_ascii(ch, slice, is_case_sensitive, 0) + }, + + Candidate::Unicode(slice) => { + CandidateMatches::from_unicode(ch, slice, char_eq, 0) + }, + } + } + + /// TODO: docs + #[inline(always)] + pub fn matches_from( + &self, + char_offset: usize, + ch: char, + is_case_sensitive: bool, + char_eq: CharEq, + ) -> CandidateMatches<'a> { + match self { + Candidate::Ascii(slice) => { + let slice = &slice[char_offset..]; + CandidateMatches::from_ascii( + ch, + slice, + is_case_sensitive, + char_offset, + ) + }, + + Candidate::Unicode(slice) => { + let slice = &slice[char_offset..]; + CandidateMatches::from_unicode(ch, slice, char_eq, char_offset) + }, + } + } + + /// TODO: docs + #[inline(always)] + pub fn slice(self, char_start: usize, char_end: usize) -> Self { + match self { + Candidate::Ascii(slice) => { + Candidate::Ascii(&slice[char_start..char_end]) + }, + + Candidate::Unicode(slice) => { + Candidate::Unicode(&slice[char_start..char_end]) + }, + } + } + + /// TODO: docs + #[inline(always)] + pub fn to_byte_offset(self, char_offset: usize) -> usize { + match self { + Candidate::Ascii(_) => char_offset, + Candidate::Unicode(slice) => { + slice[..char_offset].iter().map(|&ch| ch.len_utf8()).sum() + }, + } + } +} + +#[inline(always)] +fn find_first_ascii( + needle: u8, + haystack: &[u8], + is_case_sensitive: bool, +) -> Option { + if is_case_sensitive || !needle.is_ascii_alphabetic() { + memchr::memchr(needle, haystack) + } else { + memchr::memchr2(needle, ascii_letter_flip_case(needle), haystack) + } +} + +#[inline(always)] +fn find_last_ascii( + needle: u8, + haystack: &[u8], + is_case_sensitive: bool, +) -> Option { + if is_case_sensitive || !needle.is_ascii_alphabetic() { + memchr::memrchr(needle, haystack) + } else { + memchr::memrchr2(needle, ascii_letter_flip_case(needle), haystack) + } +} + +#[inline(always)] +fn find_first_unicode( + needle: char, + haystack: &[char], + char_eq: CharEq, +) -> Option { + haystack + .iter() + .enumerate() + .find_map(|(idx, &ch)| char_eq(needle, ch).then_some(idx)) +} + +#[inline(always)] +fn find_last_unicode( + needle: char, + haystack: &[char], + char_eq: CharEq, +) -> Option { + haystack + .iter() + .enumerate() + .rev() + .find_map(|(idx, &ch)| char_eq(needle, ch).then_some(idx)) +} + +struct UnicodeMatches<'a> { + needle: char, + haystack: &'a [char], + char_eq: CharEq, + offset: usize, +} + +impl<'a> UnicodeMatches<'a> { + fn new(ch: char, haystack: &'a [char], char_eq: CharEq) -> Self { + Self { needle: ch, haystack, char_eq, offset: 0 } + } +} + +impl Iterator for UnicodeMatches<'_> { + type Item = usize; + + #[inline(always)] + fn next(&mut self) -> Option { + let idx = + self.haystack.iter().enumerate().find_map(|(idx, &ch)| { + (self.char_eq)(self.needle, ch).then_some(idx) + })?; + + self.haystack = &self.haystack[idx + 1..]; + + let offset = self.offset + idx; + + self.offset = offset + 1; + + Some(offset) + } +} + +/// TODO: docs +pub(crate) struct CandidateMatches<'a> { + iter: CandidateMatchesIter<'a>, + start_offset: usize, +} + +impl<'a> CandidateMatches<'a> { + #[inline(always)] + fn from_ascii( + needle: char, + haystack: &'a [u8], + is_case_sensitive: bool, + start_offset: usize, + ) -> Self { + if !needle.is_ascii() { + return Self::from_unicode(needle, &[], char_eq(false, false), 0); + } + + let needle = needle as u8; + + let iter = if is_case_sensitive || !needle.is_ascii_alphabetic() { + CandidateMatchesIter::Memchr(memchr::Memchr::new(needle, haystack)) + } else { + CandidateMatchesIter::Memchr2(memchr::Memchr2::new( + needle, + ascii_letter_flip_case(needle), + haystack, + )) + }; + + Self { iter, start_offset } + } + + #[inline(always)] + fn from_unicode( + needle: char, + haystack: &'a [char], + char_eq: CharEq, + start_offset: usize, + ) -> Self { + let iter = UnicodeMatches::new(needle, haystack, char_eq); + Self { iter: CandidateMatchesIter::Unicode(iter), start_offset } + } +} + +enum CandidateMatchesIter<'a> { + Memchr(memchr::Memchr<'a>), + Memchr2(memchr::Memchr2<'a>), + Unicode(UnicodeMatches<'a>), +} + +impl Iterator for CandidateMatches<'_> { + type Item = usize; + + #[inline(always)] + fn next(&mut self) -> Option { + match &mut self.iter { + CandidateMatchesIter::Memchr(memchr) => memchr.next(), + CandidateMatchesIter::Memchr2(memchr2) => memchr2.next(), + CandidateMatchesIter::Unicode(unicode) => unicode.next(), + } + .map(|offset| self.start_offset + offset) + } +} diff --git a/src/lib.rs b/src/lib.rs index d8f0a84..8b6d91a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,7 @@ extern crate alloc; mod algos; +mod candidate; mod case_sensitivity; mod r#match; mod matched_ranges; @@ -68,6 +69,7 @@ mod tiny_vec; mod utils; pub use algos::*; +use candidate::{Candidate, CandidateMatches}; pub use case_sensitivity::CaseSensitivity; use matched_ranges::MatchedRanges; pub use metric::Metric; diff --git a/src/opts.rs b/src/opts.rs index ea4ae5a..8f034d0 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -1,5 +1,3 @@ -use crate::utils::*; - /// TODO: docs pub(crate) trait Opts: Copy { /// TODO: docs @@ -22,136 +20,3 @@ pub(crate) trait Opts: Copy { /// TODO: docs fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize; } - -#[derive(Clone, Copy)] -pub(crate) struct AsciiCandidateOpts { - is_case_sensitive: bool, -} - -impl AsciiCandidateOpts { - #[inline(always)] - pub fn new(is_case_sensitive: bool) -> Self { - Self { is_case_sensitive } - } -} - -impl Opts for AsciiCandidateOpts { - #[inline(always)] - fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool { - if self.is_case_sensitive { - query_ch == candidate_ch - } else { - query_ch.eq_ignore_ascii_case(&candidate_ch) - } - } - - #[inline(always)] - fn to_char_offset(&self, _: &str, byte_offset: usize) -> usize { - byte_offset - } - - #[inline(always)] - fn find_first( - &self, - query_ch: char, - candidate: &str, - ) -> Option<(usize, usize)> { - if !query_ch.is_ascii() { - return None; - }; - - let query_byte = query_ch as u8; - - let offset = - if self.is_case_sensitive || !query_byte.is_ascii_alphabetic() { - memchr::memchr(query_byte, candidate.as_bytes()) - } else { - memchr::memchr2( - query_byte, - ascii_letter_flip_case(query_byte), - candidate.as_bytes(), - ) - }?; - - Some((offset, 1)) - } - - #[inline(always)] - fn find_last( - &self, - query_ch: char, - candidate: &str, - ) -> Option<(usize, usize)> { - if !query_ch.is_ascii() { - return None; - }; - - let query_byte = query_ch as u8; - - let offset = - if self.is_case_sensitive || !query_byte.is_ascii_alphabetic() { - memchr::memrchr(query_byte, candidate.as_bytes()) - } else { - memchr::memrchr2( - query_byte, - ascii_letter_flip_case(query_byte), - candidate.as_bytes(), - ) - }?; - - Some((offset, 1)) - } -} - -#[derive(Clone, Copy)] -pub(crate) struct UnicodeCandidateOpts(CharEq); - -impl UnicodeCandidateOpts { - #[inline(always)] - pub fn new(is_case_sensitive: bool, normalize_candidate: bool) -> Self { - let fun = match (is_case_sensitive, normalize_candidate) { - (false, false) => case_insensitive_eq, - (true, false) => case_sensitive_eq, - (false, true) => case_insensitive_normalized_eq, - (true, true) => case_sensitive_normalized_eq, - }; - - Self(fun) - } -} - -impl Opts for UnicodeCandidateOpts { - #[inline(always)] - fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool { - self.0(query_ch, candidate_ch) - } - - #[inline(always)] - fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize { - char_len(&candidate[..byte_offset]) - } - - #[inline(always)] - fn find_first( - &self, - query_ch: char, - candidate: &str, - ) -> Option<(usize, usize)> { - candidate.char_indices().find_map(|(offset, candidate_ch)| { - self.char_eq(query_ch, candidate_ch) - .then_some((offset, candidate_ch.len_utf8())) - }) - } - - #[inline(always)] - fn find_last( - &self, - query_ch: char, - candidate: &str, - ) -> Option<(usize, usize)> { - candidate.char_indices().rev().find_map(|(offset, candidate_ch)| { - self.char_eq(query_ch, candidate_ch) - .then_some((offset, candidate_ch.len_utf8())) - }) - } -} diff --git a/src/utils.rs b/src/utils.rs index f461921..cce27f3 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -33,6 +33,16 @@ pub fn case_sensitive_normalized_eq(lhs: char, rhs: char) -> bool { lhs == normalize_candidate_char(lhs, rhs) } +#[inline(always)] +pub fn char_eq(is_case_sensitive: bool, normalize_candidate: bool) -> CharEq { + match (is_case_sensitive, normalize_candidate) { + (false, false) => case_insensitive_eq, + (true, false) => case_sensitive_eq, + (false, true) => case_insensitive_normalized_eq, + (true, true) => case_sensitive_normalized_eq, + } +} + /// TODO: docs #[inline(always)] pub fn char_len(s: &str) -> usize { From 858c4f1876e04f09dc85c8cdfa596a0ad88f55d5 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Wed, 22 Nov 2023 22:52:12 +0100 Subject: [PATCH 02/20] fzf-v2: fix off by 1 errors --- src/algos/fzf/v2.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/v2.rs index 91fdfa7..0b09ae5 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/v2.rs @@ -334,7 +334,7 @@ fn score_first_row( for char_offset in candidate.matches(first_pattern_char) { penalty = penalty::GAP_START; - for col in column..column + char_offset { + for col in column + 1..char_offset { let score = prev_score.saturating_sub(penalty); scores_first_row[col] = score; prev_score = score; @@ -361,7 +361,7 @@ fn score_first_row( penalty = penalty::GAP_START; - for col in column..scores_first_row.len() { + for col in column + 1..scores_first_row.len() { let score = prev_score.saturating_sub(penalty); scores_first_row[col] = score; prev_score = score; @@ -405,14 +405,14 @@ fn score_remaining_rows( { penalty = penalty::GAP_START; - for col in column..column + char_offset { + for col in column + 1..char_offset { let score_left = scores_row[col - 1]; let score = score_left.saturating_sub(penalty); scores_row[col] = score; penalty = penalty::GAP_EXTENSION; } - column += char_offset; + column = char_offset; let score_left = scores_row[column - 1].saturating_sub(penalty); @@ -453,7 +453,7 @@ fn score_remaining_rows( penalty = penalty::GAP_START; - for col in column..matrix_width { + for col in column + 1..matrix_width { let score_left = scores_row[col - 1]; let score = score_left.saturating_sub(penalty); scores_row[col] = score; From 0bc5f980886275cd00777a629c97ffac43ef0700 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Wed, 22 Nov 2023 23:01:41 +0100 Subject: [PATCH 03/20] fzf-v2: add an `initial_char_class` field to `CandidateV2` --- src/algos/fzf/candidate.rs | 8 ++++++-- src/algos/fzf/v2.rs | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/algos/fzf/candidate.rs b/src/algos/fzf/candidate.rs index c031740..46e0315 100644 --- a/src/algos/fzf/candidate.rs +++ b/src/algos/fzf/candidate.rs @@ -10,6 +10,9 @@ pub(super) struct CandidateV2<'a> { /// TODO: docs base: Candidate<'a>, + /// TODO: docs + initial_char_class: CharClass, + /// TODO: docs opts: CandidateOpts, } @@ -51,7 +54,7 @@ impl<'a> CandidateV2<'a> { } let prev_class = if char_idx == 0 { - scheme.initial_char_class + self.initial_char_class } else { char_class(self.char(char_idx - 1), scheme) }; @@ -103,9 +106,10 @@ impl<'a> CandidateV2<'a> { pub fn new( base: Candidate<'a>, bonus_slab: &'a mut BonusSlab, + initial_char_class: CharClass, opts: CandidateOpts, ) -> Self { let bonuses = bonus_slab.alloc(base.char_len()); - Self { base, bonuses, opts } + Self { base, bonuses, initial_char_class, opts } } } diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/v2.rs index 0b09ae5..3e508bc 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/v2.rs @@ -195,9 +195,16 @@ pub(super) fn fzf_v2( let first_offset = match_offsets[0]; + let initial_char_class = if first_offset == 0 { + scheme.initial_char_class + } else { + char_class(candidate.char(first_offset - 1), scheme) + }; + let mut candidate = CandidateV2::new( candidate.slice(first_offset, last_match_offset), &mut slab.bonus, + initial_char_class, opts, ); From fbf0c6b5d746f01c545aa749d8b5a8785837061e Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Wed, 22 Nov 2023 23:12:49 +0100 Subject: [PATCH 04/20] fzf-v2: don't reset the penalty at `GAP_START` after last match in a row --- src/algos/fzf/v2.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/v2.rs index 3e508bc..af83a04 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/v2.rs @@ -336,7 +336,7 @@ fn score_first_row( let mut column = 0; - let mut penalty; + let mut penalty = penalty::GAP_START; for char_offset in candidate.matches(first_pattern_char) { penalty = penalty::GAP_START; @@ -366,8 +366,6 @@ fn score_first_row( prev_score = score; } - penalty = penalty::GAP_START; - for col in column + 1..scores_first_row.len() { let score = prev_score.saturating_sub(penalty); scores_first_row[col] = score; @@ -405,7 +403,7 @@ fn score_remaining_rows( let mut column = first_match_offset; - let mut penalty; + let mut penalty = penalty::GAP_START; for char_offset in candidate.matches_from(first_match_offset, pattern_char) @@ -458,8 +456,6 @@ fn score_remaining_rows( scores_row[column] = score; } - penalty = penalty::GAP_START; - for col in column + 1..matrix_width { let score_left = scores_row[col - 1]; let score = score_left.saturating_sub(penalty); From b049092a0e1b7ec888fb98a219e03af38ea7d002 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 12:27:02 +0100 Subject: [PATCH 05/20] fzf: add `Fzf` trait --- src/algos/fzf/{common.rs => fzf.rs} | 294 ++++++++++++++++++---------- src/algos/fzf/{v1.rs => fzf_v1.rs} | 114 ++++++----- src/algos/fzf/{v2.rs => fzf_v2.rs} | 203 ++++++++----------- src/algos/fzf/mod.rs | 20 +- src/algos/fzf/query.rs | 61 ------ src/matched_ranges.rs | 2 +- src/metric.rs | 8 +- 7 files changed, 339 insertions(+), 363 deletions(-) rename src/algos/fzf/{common.rs => fzf.rs} (65%) rename src/algos/fzf/{v1.rs => fzf_v1.rs} (76%) rename src/algos/fzf/{v2.rs => fzf_v2.rs} (73%) diff --git a/src/algos/fzf/common.rs b/src/algos/fzf/fzf.rs similarity index 65% rename from src/algos/fzf/common.rs rename to src/algos/fzf/fzf.rs index 5838893..0c83a72 100644 --- a/src/algos/fzf/common.rs +++ b/src/algos/fzf/fzf.rs @@ -3,6 +3,98 @@ use core::ops::Range; use super::{query::*, *}; use crate::*; +/// TODO: docs +pub(super) trait Fzf { + /// TODO: docs + fn alloc_chars<'a>(&mut self, candidate: &str) -> &'a [char]; + + /// TODO: docs + fn scheme(&self) -> &Scheme; + + /// TODO: docs + fn fuzzy( + &mut self, + pattern: Pattern, + candidate: Candidate, + ranges: &mut MatchedRanges, + ) -> Option; + + /// TODO: docs + fn score( + &mut self, + pattern: Pattern, + candidate: Candidate, + ranges: &mut MatchedRanges, + ) -> Option { + let score = match pattern.match_type { + MatchType::Fuzzy => { + if pattern.is_inverse { + self.fuzzy::(pattern, candidate, ranges) + } else { + self.fuzzy::(pattern, candidate, ranges) + } + }, + + MatchType::Exact => { + todo!(); + }, + + MatchType::PrefixExact => { + todo!(); + }, + + MatchType::SuffixExact => { + todo!(); + }, + + MatchType::EqualExact => { + todo!(); + }, + }; + + match (score.is_some(), pattern.is_inverse) { + (true, false) => score, + (false, true) => Some(0), + _ => None, + } + } + + /// TODO: docs + #[inline(always)] + fn distance( + &mut self, + query: FzfQuery, + candidate: &str, + ranges: &mut MatchedRanges, + ) -> Option { + if query.is_empty() { + return Some(FzfDistance::from_score(0)); + } + + let candidate = if candidate.is_ascii() { + Candidate::Ascii(candidate.as_bytes()) + } else { + Candidate::Unicode(self.alloc_chars(candidate)) + }; + + match query.search_mode { + SearchMode::NotExtended(pattern) => self + .fuzzy::(pattern, candidate, ranges) + .map(FzfDistance::from_score), + + SearchMode::Extended(conditions) => { + let mut total_score: Score = 0; + for condition in conditions { + total_score += condition.iter().find_map(|pattern| { + self.score::(pattern, candidate, ranges) + })?; + } + Some(FzfDistance::from_score(total_score)) + }, + } + } +} + /// TODO: docs #[inline] pub(super) fn calculate_score( @@ -406,104 +498,104 @@ fn ignored_candidate_trailing_spaces( } } -#[cfg(test)] -mod tests { - #![allow(clippy::single_range_in_vec_init)] - - use super::*; - - #[test] - fn equal_match_1() { - let pattern = - Pattern::parse("^AbC$".chars().collect::>().leak()); - - let mut ranges_buf = MatchedRanges::default(); - - assert!(exact_match( - pattern, - "ABC", - AsciiCandidateOpts::new(true), - &Scheme::default(), - Some(&mut ranges_buf) - ) - .is_none()); - - { - ranges_buf = MatchedRanges::default(); - - assert!(exact_match( - pattern, - "AbC", - AsciiCandidateOpts::new(true), - &Scheme::default(), - Some(&mut ranges_buf) - ) - .is_some()); - - assert_eq!(ranges_buf.as_slice(), [0..3]); - } - - { - ranges_buf = MatchedRanges::default(); - - assert!(exact_match( - pattern, - "AbC ", - AsciiCandidateOpts::new(true), - &Scheme::default(), - Some(&mut ranges_buf) - ) - .is_some()); - - assert_eq!(ranges_buf.as_slice(), [0..3]); - } - - { - ranges_buf = MatchedRanges::default(); - - assert!(exact_match( - pattern, - " AbC ", - AsciiCandidateOpts::new(true), - &Scheme::default(), - Some(&mut ranges_buf) - ) - .is_some()); - - assert_eq!(ranges_buf.as_slice(), [1..4]); - } - - { - ranges_buf = MatchedRanges::default(); - - assert!(exact_match( - pattern, - " AbC", - AsciiCandidateOpts::new(true), - &Scheme::default(), - Some(&mut ranges_buf) - ) - .is_some()); - - assert_eq!(ranges_buf.as_slice(), [2..5]); - } - } - - #[test] - fn exact_match_1() { - let pattern = Pattern::parse("abc".chars().collect::>().leak()); - - let mut ranges_buf = MatchedRanges::default(); - - assert!(exact_match( - pattern, - "aabbcc abc", - AsciiCandidateOpts::new(true), - &Scheme::default(), - Some(&mut ranges_buf) - ) - .is_some()); - - assert_eq!(ranges_buf.as_slice(), [7..10]); - } -} +// #[cfg(test)] +// mod tests { +// #![allow(clippy::single_range_in_vec_init)] +// +// use super::*; +// +// #[test] +// fn equal_match_1() { +// let pattern = +// Pattern::parse("^AbC$".chars().collect::>().leak()); +// +// let mut ranges_buf = MatchedRanges::default(); +// +// assert!(exact_match( +// pattern, +// "ABC", +// todo!(), +// &Scheme::default(), +// Some(&mut ranges_buf) +// ) +// .is_none()); +// +// { +// ranges_buf = MatchedRanges::default(); +// +// assert!(exact_match( +// pattern, +// "AbC", +// todo!(), +// &Scheme::default(), +// Some(&mut ranges_buf) +// ) +// .is_some()); +// +// assert_eq!(ranges_buf.as_slice(), [0..3]); +// } +// +// { +// ranges_buf = MatchedRanges::default(); +// +// assert!(exact_match( +// pattern, +// "AbC ", +// todo!(), +// &Scheme::default(), +// Some(&mut ranges_buf) +// ) +// .is_some()); +// +// assert_eq!(ranges_buf.as_slice(), [0..3]); +// } +// +// { +// ranges_buf = MatchedRanges::default(); +// +// assert!(exact_match( +// pattern, +// " AbC ", +// todo!(), +// &Scheme::default(), +// Some(&mut ranges_buf) +// ) +// .is_some()); +// +// assert_eq!(ranges_buf.as_slice(), [1..4]); +// } +// +// { +// ranges_buf = MatchedRanges::default(); +// +// assert!(exact_match( +// pattern, +// " AbC", +// todo!(), +// &Scheme::default(), +// Some(&mut ranges_buf) +// ) +// .is_some()); +// +// assert_eq!(ranges_buf.as_slice(), [2..5]); +// } +// } +// +// #[test] +// fn exact_match_1() { +// let pattern = Pattern::parse("abc".chars().collect::>().leak()); +// +// let mut ranges_buf = MatchedRanges::default(); +// +// assert!(exact_match( +// pattern, +// "aabbcc abc", +// todo!(), +// &Scheme::default(), +// Some(&mut ranges_buf) +// ) +// .is_some()); +// +// assert_eq!(ranges_buf.as_slice(), [7..10]); +// } +// } diff --git a/src/algos/fzf/v1.rs b/src/algos/fzf/fzf_v1.rs similarity index 76% rename from src/algos/fzf/v1.rs rename to src/algos/fzf/fzf_v1.rs index 2745fad..7956a03 100644 --- a/src/algos/fzf/v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -7,6 +7,9 @@ use crate::*; #[cfg_attr(docsrs, doc(cfg(feature = "fzf-v1")))] #[derive(Clone, Default)] pub struct FzfV1 { + /// TODO: docs + candidate_slab: CandidateSlab, + /// TODO: docs case_sensitivity: CaseSensitivity, @@ -39,31 +42,6 @@ impl FzfV1 { Self::default() } - /// TODO: docs - #[cfg(feature = "tests")] - pub fn scheme(&self) -> &Scheme { - &self.scheme - } - - /// TODO: docs - #[inline(always)] - fn score( - &mut self, - pattern: Pattern, - candidate: Candidate, - buf: Option<&mut MatchedRanges>, - ) -> Option { - let is_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - let opts = CandidateOpts::new(is_sensitive, self.normalization); - - fzf_v1(pattern, candidate, opts, &self.scheme, buf, ()) - } - /// TODO: docs #[inline(always)] pub fn with_case_sensitivity( @@ -104,51 +82,69 @@ impl Metric for FzfV1 { #[inline(always)] fn distance( &mut self, - _query: FzfQuery<'_>, - _candidate: &str, - ) -> Option> { - todo!(); + query: FzfQuery<'_>, + candidate: &str, + ) -> Option { + let ranges = &mut MatchedRanges::default(); + ::distance::(self, query, candidate, ranges) } #[inline] fn distance_and_ranges( &mut self, - _query: FzfQuery<'_>, - _candidate: &str, - _ranges_buf: &mut Vec>, + query: FzfQuery<'_>, + candidate: &str, + ranges: &mut MatchedRanges, ) -> Option { - todo!() + ::distance::(self, query, candidate, ranges) } } -/// TODO: docs -#[inline] -pub(super) fn fzf_v1( - pattern: Pattern, - _candidate: Candidate, - _opts: CandidateOpts, - _scheme: &Scheme, - _ranges_buf: Option<&mut MatchedRanges>, - _: (), -) -> Option { - // TODO: can we remove this? - if pattern.is_empty() { - return Some(0); +impl Fzf for FzfV1 { + #[inline(always)] + fn alloc_chars<'a>(&mut self, s: &str) -> &'a [char] { + unsafe { core::mem::transmute(self.candidate_slab.alloc(s)) } + } + + #[inline(always)] + fn scheme(&self) -> &Scheme { + &self.scheme } - todo!(); - - // let range_forward = forward_pass(pattern, candidate, opts)?; - // - // let start_backward = - // backward_pass(pattern, &candidate[range_forward.clone()], opts); - // - // let range = range_forward.start + start_backward..range_forward.end; - // - // let score = - // calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); - // - // Some(score) + #[inline(always)] + fn fuzzy( + &mut self, + pattern: Pattern, + _candidate: Candidate, + _ranges: &mut MatchedRanges, + ) -> Option { + // TODO: can we remove this? + if pattern.is_empty() { + return Some(0); + } + + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; + + let _opts = CandidateOpts::new(is_sensitive, self.normalization); + + todo!(); + + // let range_forward = forward_pass(pattern, candidate, opts)?; + // + // let start_backward = + // backward_pass(pattern, &candidate[range_forward.clone()], opts); + // + // let range = range_forward.start + start_backward..range_forward.end; + // + // let score = + // calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); + // + // Some(score) + } } /// TODO: docs diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/fzf_v2.rs similarity index 73% rename from src/algos/fzf/v2.rs rename to src/algos/fzf/fzf_v2.rs index af83a04..76551fe 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/fzf_v2.rs @@ -1,5 +1,3 @@ -use core::ops::Range; - use super::{query::*, slab::*, *}; use crate::*; @@ -45,12 +43,6 @@ impl FzfV2 { Self::default() } - /// TODO: docs - #[cfg(feature = "tests")] - pub fn scheme(&self) -> &Scheme { - &self.scheme - } - /// TODO: docs #[inline(always)] pub fn with_case_sensitivity( @@ -93,141 +85,100 @@ impl Metric for FzfV2 { &mut self, query: FzfQuery<'_>, candidate: &str, - ) -> Option> { - if query.is_empty() { - return Some(Match::default()); - } - - let candidate = if candidate.is_ascii() { - Candidate::Ascii(candidate.as_bytes()) - } else { - let chars = self.candidate_slab.alloc(candidate); - Candidate::Unicode(chars) - }; - - let mut buf = if self.with_matched_ranges { - Some(MatchedRanges::default()) - } else { - None - }; - - let conditions = match query.search_mode { - SearchMode::Extended(conditions) => conditions, - - SearchMode::NotExtended(pattern) => { - let is_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - let score = fzf_v2( - pattern, - candidate, - CandidateOpts::new(is_sensitive, self.normalization), - &self.scheme, - None, - &mut self.slab, - )?; - - let distance = FzfDistance::from_score(score); - - return Some(Match::new(distance, buf.unwrap_or_default())); - }, - }; - - let mut total_score = 0; - - for condition in conditions { - let score = condition.iter().find_map(|pattern| { - let is_sensitive = match self.case_sensitivity { - CaseSensitivity::Sensitive => true, - CaseSensitivity::Insensitive => false, - CaseSensitivity::Smart => pattern.has_uppercase, - }; - - pattern.score( - candidate, - CandidateOpts::new(is_sensitive, self.normalization), - &self.scheme, - buf.as_mut(), - &mut self.slab, - fzf_v2, - ) - })?; - - total_score += score; - } - - let distance = FzfDistance::from_score(total_score); - - Some(Match::new(distance, buf.unwrap_or_default())) + ) -> Option { + let ranges = &mut MatchedRanges::default(); + ::distance::(self, query, candidate, ranges) } - #[inline] + #[inline(always)] fn distance_and_ranges( &mut self, - _query: FzfQuery<'_>, - _candidate: &str, - _ranges_buf: &mut Vec>, + query: FzfQuery<'_>, + candidate: &str, + ranges: &mut MatchedRanges, ) -> Option { - todo!(); + ::distance::(self, query, candidate, ranges) } } -/// TODO: docs -#[inline] -pub(super) fn fzf_v2( - pattern: Pattern, - candidate: Candidate, - opts: CandidateOpts, - scheme: &Scheme, - ranges_buf: Option<&mut MatchedRanges>, - slab: &mut V2Slab, -) -> Option { - // TODO: can we remove this? - if pattern.is_empty() { - return Some(0); +impl Fzf for FzfV2 { + #[inline(always)] + fn alloc_chars<'a>(&mut self, s: &str) -> &'a [char] { + unsafe { core::mem::transmute(self.candidate_slab.alloc(s)) } + } + + #[inline(always)] + fn scheme(&self) -> &Scheme { + &self.scheme } - let (match_offsets, last_match_offset) = - matches(&mut slab.matched_indices, pattern, candidate, opts)?; + #[inline(always)] + fn fuzzy( + &mut self, + pattern: Pattern, + candidate: Candidate, + ranges: &mut MatchedRanges, + ) -> Option { + // TODO: can we remove this? + if pattern.is_empty() { + return Some(0); + } + + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; - let first_offset = match_offsets[0]; + let opts = CandidateOpts::new(is_sensitive, self.normalization); - let initial_char_class = if first_offset == 0 { - scheme.initial_char_class - } else { - char_class(candidate.char(first_offset - 1), scheme) - }; + let (match_offsets, last_match_offset) = + matches(&mut self.slab.matched_indices, pattern, candidate, opts)?; - let mut candidate = CandidateV2::new( - candidate.slice(first_offset, last_match_offset), - &mut slab.bonus, - initial_char_class, - opts, - ); + let first_offset = match_offsets[0]; - // After slicing the candidate we need to move all the offsets back - // by the offset of the first match so that they still refer to the - // same characters. - match_offsets.iter_mut().for_each(|offset| *offset -= first_offset); + let start_byte_offset = + if RANGES { candidate.to_byte_offset(first_offset) } else { 0 }; - let (scores, consecutive, score, score_cell) = score( - &mut slab.scoring_matrix, - &mut slab.consecutive_matrix, - pattern, - &mut candidate, - match_offsets, - scheme, - ); + let initial_char_class = if first_offset == 0 { + self.scheme.initial_char_class + } else { + char_class(candidate.char(first_offset - 1), &self.scheme) + }; - if let Some(buf) = ranges_buf { - let candidate = candidate.into_base(); - matched_ranges(scores, consecutive, score_cell, candidate, 0, buf); - }; + let mut candidate = CandidateV2::new( + candidate.slice(first_offset, last_match_offset), + &mut self.slab.bonus, + initial_char_class, + opts, + ); + + // After slicing the candidate we move all the offsets back by the + // first offset. + match_offsets.iter_mut().for_each(|offset| *offset -= first_offset); + + let (scores, consecutive, score, score_cell) = score( + &mut self.slab.scoring_matrix, + &mut self.slab.consecutive_matrix, + pattern, + &mut candidate, + match_offsets, + &self.scheme, + ); + + if RANGES { + matched_ranges( + scores, + consecutive, + score_cell, + candidate.into_base(), + start_byte_offset, + ranges, + ); + }; - Some(score) + Some(score) + } } /// TODO: docs diff --git a/src/algos/fzf/mod.rs b/src/algos/fzf/mod.rs index 3bfdeed..f72109d 100644 --- a/src/algos/fzf/mod.rs +++ b/src/algos/fzf/mod.rs @@ -44,22 +44,26 @@ //! [extended-search]: https://github.com/junegunn/fzf#search-syntax mod candidate; -mod common; mod distance; +mod fzf; +#[cfg(feature = "fzf-v1")] +mod fzf_v1; +#[cfg(feature = "fzf-v1")] +mod fzf_v2; mod parser; mod query; mod scheme; mod scoring; mod slab; -#[cfg(feature = "fzf-v1")] -mod v1; -#[cfg(feature = "fzf-v1")] -mod v2; use candidate::*; -use common::*; pub use distance::FzfDistance; use distance::*; +use fzf::*; +#[cfg(feature = "fzf-v1")] +pub use fzf_v1::FzfV1; +#[cfg(feature = "fzf-v1")] +pub use fzf_v2::FzfV2; pub use parser::*; pub use query::FzfQuery; pub use scheme::FzfScheme; @@ -67,10 +71,6 @@ pub use scheme::FzfScheme; pub use scheme::Scheme; use scoring::*; use slab::*; -#[cfg(feature = "fzf-v1")] -pub use v1::FzfV1; -#[cfg(feature = "fzf-v1")] -pub use v2::FzfV2; #[doc(hidden)] pub mod bonus { diff --git a/src/algos/fzf/query.rs b/src/algos/fzf/query.rs index a857adc..bc5c169 100644 --- a/src/algos/fzf/query.rs +++ b/src/algos/fzf/query.rs @@ -1,18 +1,5 @@ use core::fmt::Write; -use super::*; -use crate::*; - -/// TODO: docs -type FuzzyAlgo = fn( - Pattern, - Candidate, - CandidateOpts, - &Scheme, - Option<&mut MatchedRanges>, - T, -) -> Option; - /// A parsed fzf query. /// /// This struct is created by the [`parse`](FzfParser::parse) method on @@ -298,54 +285,6 @@ impl<'a> Pattern<'a> { } } - /// TODO: docs - #[inline] - pub(super) fn score( - self, - candidate: Candidate, - opts: CandidateOpts, - scheme: &Scheme, - mut ranges_buf: Option<&mut MatchedRanges>, - extra: E, - fuzzy_algo: FuzzyAlgo, - ) -> Option { - if self.is_inverse { - ranges_buf = None; - } - - let result = match self.match_type { - MatchType::Fuzzy => { - fuzzy_algo(self, candidate, opts, scheme, ranges_buf, extra) - }, - - MatchType::Exact => { - todo!() - // exact_match(self, candidate, opts, scheme, ranges_buf) - }, - - MatchType::PrefixExact => { - todo!() - // prefix_match(self, candidate, opts, scheme, ranges_buf) - }, - - MatchType::SuffixExact => { - todo!() - // suffix_match(self, candidate, opts, scheme, ranges_buf) - }, - - MatchType::EqualExact => { - todo!() - // equal_match(self, candidate, opts, scheme, ranges_buf) - }, - }; - - match (result.is_some(), self.is_inverse) { - (true, false) => result, - (false, true) => Some(0), - _ => None, - } - } - /// TODO: docs #[inline(always)] pub(super) fn trailing_spaces(&self) -> usize { diff --git a/src/matched_ranges.rs b/src/matched_ranges.rs index 49d2a96..646ab83 100644 --- a/src/matched_ranges.rs +++ b/src/matched_ranges.rs @@ -4,7 +4,7 @@ use crate::tiny_vec::TinyVec; /// TODO: docs #[derive(Default)] -pub(crate) struct MatchedRanges { +pub struct MatchedRanges { ranges: TinyVec<8, Range>, } diff --git a/src/metric.rs b/src/metric.rs index 31c2d2d..1f57c5d 100644 --- a/src/metric.rs +++ b/src/metric.rs @@ -1,6 +1,4 @@ -use core::ops::Range; - -use crate::Match; +use crate::MatchedRanges; /// A trait representing a distance metric on strings. /// @@ -48,7 +46,7 @@ pub trait Metric { &mut self, query: Self::Query<'_>, candidate: &str, - ) -> Option>; + ) -> Option; /// This method always returns the same value as [`Self::distance`], but in /// the case of a match it also fills the provided buffer with the **byte** @@ -58,6 +56,6 @@ pub trait Metric { &mut self, _query: Self::Query<'_>, _candidate: &str, - _ranges_buf: &mut Vec>, + _ranges_buf: &mut MatchedRanges, ) -> Option; } From 21bb609b079fc178bc6283d7e10a6c7d8e3f71eb Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 13:31:17 +0100 Subject: [PATCH 06/20] fzf-v1: re-implement it using `Candidate` --- src/algos/fzf/fzf_v1.rs | 84 +++++++++++++++++++---------------------- src/candidate.rs | 26 +++++++++++++ 2 files changed, 64 insertions(+), 46 deletions(-) diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index 7956a03..bb6e97b 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -115,7 +115,7 @@ impl Fzf for FzfV1 { fn fuzzy( &mut self, pattern: Pattern, - _candidate: Candidate, + candidate: Candidate, _ranges: &mut MatchedRanges, ) -> Option { // TODO: can we remove this? @@ -129,17 +129,17 @@ impl Fzf for FzfV1 { CaseSensitivity::Smart => pattern.has_uppercase, }; - let _opts = CandidateOpts::new(is_sensitive, self.normalization); + let opts = CandidateOpts::new(is_sensitive, self.normalization); + + let range_forward = forward_pass(pattern, candidate, opts)?; + + let start_backward = + backward_pass(pattern, candidate, range_forward.end, opts); + + let range = range_forward.start + start_backward..range_forward.end; todo!(); - // let range_forward = forward_pass(pattern, candidate, opts)?; - // - // let start_backward = - // backward_pass(pattern, &candidate[range_forward.clone()], opts); - // - // let range = range_forward.start + start_backward..range_forward.end; - // // let score = // calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); // @@ -151,17 +151,21 @@ impl Fzf for FzfV1 { #[inline] fn forward_pass( pattern: Pattern, - mut candidate: &str, - opts: impl Opts, + candidate: Candidate, + opts: CandidateOpts, ) -> Option> { let mut pattern_chars = pattern.chars(); let mut pattern_char = pattern_chars.next()?; - let (start_offset, matched_char_byte_len) = - opts.find_first(pattern_char, candidate)?; + let start_offset = candidate.find_first_from( + 0, + pattern_char, + opts.is_case_sensitive, + opts.char_eq, + )?; - let mut end_offset = start_offset + matched_char_byte_len; + let mut end_offset = start_offset + 1; if let Some(next) = pattern_chars.next() { pattern_char = next; @@ -169,25 +173,19 @@ fn forward_pass( return Some(start_offset..end_offset); } - // SAFETY: todo. - candidate = unsafe { candidate.get_unchecked(end_offset..) }; - loop { - let (byte_offset, matched_char_byte_len) = - opts.find_first(pattern_char, candidate)?; - - end_offset += byte_offset + matched_char_byte_len; + end_offset = candidate.find_first_from( + end_offset, + pattern_char, + opts.is_case_sensitive, + opts.char_eq, + )? + 1; if let Some(next) = pattern_chars.next() { pattern_char = next; } else { return Some(start_offset..end_offset); } - - // SAFETY: todo. - candidate = unsafe { - candidate.get_unchecked(byte_offset + matched_char_byte_len..) - }; } } @@ -195,36 +193,30 @@ fn forward_pass( #[inline] fn backward_pass( pattern: Pattern, - mut candidate: &str, - opts: impl Opts, + candidate: Candidate, + end_offset: usize, + opts: CandidateOpts, ) -> usize { - // The candidate must start with the first character of the query. - debug_assert!(opts.char_eq( - pattern.chars().next().unwrap(), - candidate.chars().next().unwrap(), - )); - - // The candidate must end with the last character of the query. - debug_assert!(opts.char_eq( - pattern.chars().next_back().unwrap(), - candidate.chars().next_back().unwrap(), - )); - let mut pattern_chars = pattern.chars().rev(); let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); + let mut start_offset = end_offset; + loop { - let (byte_offset, _) = - opts.find_last(pattern_char, candidate).unwrap(); + start_offset = candidate + .find_last_from( + start_offset, + pattern_char, + opts.is_case_sensitive, + opts.char_eq, + ) + .unwrap(); if let Some(next) = pattern_chars.next() { pattern_char = next; } else { - return byte_offset; + return start_offset; } - - // SAFETY: todo. - candidate = unsafe { candidate.get_unchecked(..byte_offset) }; } } diff --git a/src/candidate.rs b/src/candidate.rs index 9d1eb6c..6f5c9fe 100644 --- a/src/candidate.rs +++ b/src/candidate.rs @@ -77,6 +77,32 @@ impl<'a> Candidate<'a> { } } + /// TODO: docs + #[inline(always)] + pub fn find_last_from( + &self, + end_offset: usize, + ch: char, + is_case_sensitive: bool, + char_eq: CharEq, + ) -> Option { + match self { + Candidate::Ascii(slice) => { + if ch.is_ascii() { + let slice = &slice[..end_offset]; + find_last_ascii(ch as _, slice, is_case_sensitive) + } else { + None + } + }, + + Candidate::Unicode(slice) => { + let slice = &slice[..end_offset]; + find_last_unicode(ch, slice, char_eq) + }, + } + } + /// TODO: docs #[inline(always)] pub fn matches( From 1c35a4b7a4dd7e5857075d2d110ed91baedc9557 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 17:14:44 +0100 Subject: [PATCH 07/20] fzf: re-implement `exact_match` using `Candidate` --- src/algos/fzf/fzf.rs | 78 +++++++++++++++++++++++++-------------- src/algos/fzf/fzf_v1.rs | 11 ++++++ src/algos/fzf/fzf_v2.rs | 11 ++++++ src/candidate.rs | 81 ++++++++++++++++++++++++++++------------- 4 files changed, 128 insertions(+), 53 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index 0c83a72..081e325 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -1,6 +1,7 @@ use core::ops::Range; use super::{query::*, *}; +use crate::utils::CharEq; use crate::*; /// TODO: docs @@ -8,6 +9,9 @@ pub(super) trait Fzf { /// TODO: docs fn alloc_chars<'a>(&mut self, candidate: &str) -> &'a [char]; + /// TODO: docs + fn char_eq(&self, pattern: Pattern) -> CharEq; + /// TODO: docs fn scheme(&self) -> &Scheme; @@ -36,7 +40,25 @@ pub(super) trait Fzf { }, MatchType::Exact => { - todo!(); + let char_eq = self.char_eq(pattern); + + if pattern.is_inverse { + exact_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } else { + exact_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } }, MatchType::PrefixExact => { @@ -193,12 +215,12 @@ pub(super) fn calculate_score( /// TODO: docs #[inline] -pub(super) fn exact_match( +pub(super) fn exact_match( pattern: Pattern, - candidate: &str, - opts: impl Opts, + candidate: Candidate, + char_eq: CharEq, scheme: &Scheme, - ranges_buf: Option<&mut MatchedRanges>, + ranges: &mut MatchedRanges, ) -> Option { if pattern.is_empty() { return Some(0); @@ -216,30 +238,29 @@ pub(super) fn exact_match( // TODO: docs let mut matched = false; - let mut prev_char_class = scheme.initial_char_class; + let mut prev_class = scheme.initial_char_class; let mut start_offset = 0; 'outer: loop { let current_start_offset = start_offset; - let candidate = &candidate[start_offset..]; let mut bonus_start = 0; let mut current_bonus: Score = 0; let mut pattern_char_idx = 0; - let mut char_indices = candidate.char_indices(); + let mut chars = candidate.chars_from(start_offset).enumerate(); - for (byte_offset, candidate_ch) in char_indices.by_ref() { + for (char_offset, candidate_ch) in chars.by_ref() { let pattern_ch = pattern.char(pattern_char_idx); let char_class = char_class(candidate_ch, scheme); - if opts.char_eq(pattern_ch, candidate_ch) { + if (char_eq)(pattern_ch, candidate_ch) { if pattern_char_idx == 0 { - bonus_start = current_start_offset + byte_offset; - start_offset += byte_offset + candidate_ch.len_utf8(); + bonus_start = current_start_offset + char_offset; + start_offset += char_offset + 1; current_bonus = - compute_bonus(prev_char_class, char_class, scheme); + compute_bonus(prev_class, char_class, scheme); } pattern_char_idx += 1; @@ -252,9 +273,8 @@ pub(super) fn exact_match( best_bonus_start = bonus_start; - best_bonus_end = current_start_offset - + byte_offset - + candidate_ch.len_utf8(); + best_bonus_end = + current_start_offset + char_offset + 1; } if current_bonus >= bonus::BOUNDARY { @@ -267,10 +287,10 @@ pub(super) fn exact_match( break; } - prev_char_class = char_class; + prev_class = char_class; } - if char_indices.next().is_none() { + if chars.next().is_none() { break; } } @@ -281,20 +301,22 @@ pub(super) fn exact_match( let matched_range = best_bonus_start..best_bonus_end; - let score = calculate_score( - pattern, - candidate, - matched_range.clone(), - opts, - scheme, - None, - ); + // let score = calculate_score( + // pattern, + // candidate, + // matched_range.clone(), + // opts, + // scheme, + // None, + // ); - if let Some(ranges) = ranges_buf { + if RANGES { ranges.insert(matched_range); } - Some(score) + todo!(); + + // Some(score) } /// TODO: docs diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index bb6e97b..85c0816 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -106,6 +106,17 @@ impl Fzf for FzfV1 { unsafe { core::mem::transmute(self.candidate_slab.alloc(s)) } } + #[inline(always)] + fn char_eq(&self, pattern: Pattern) -> utils::CharEq { + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; + + utils::char_eq(is_sensitive, self.normalization) + } + #[inline(always)] fn scheme(&self) -> &Scheme { &self.scheme diff --git a/src/algos/fzf/fzf_v2.rs b/src/algos/fzf/fzf_v2.rs index 76551fe..9231d9a 100644 --- a/src/algos/fzf/fzf_v2.rs +++ b/src/algos/fzf/fzf_v2.rs @@ -107,6 +107,17 @@ impl Fzf for FzfV2 { unsafe { core::mem::transmute(self.candidate_slab.alloc(s)) } } + #[inline(always)] + fn char_eq(&self, pattern: Pattern) -> utils::CharEq { + let is_sensitive = match self.case_sensitivity { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + CaseSensitivity::Smart => pattern.has_uppercase, + }; + + utils::char_eq(is_sensitive, self.normalization) + } + #[inline(always)] fn scheme(&self) -> &Scheme { &self.scheme diff --git a/src/candidate.rs b/src/candidate.rs index 6f5c9fe..82003d5 100644 --- a/src/candidate.rs +++ b/src/candidate.rs @@ -17,6 +17,19 @@ impl<'a> Candidate<'a> { } } + /// TODO: docs + #[inline(always)] + pub fn chars_from(&self, char_offset: usize) -> Chars<'_> { + match self { + Candidate::Ascii(slice) => { + Chars::Ascii(slice[char_offset..].iter()) + }, + Candidate::Unicode(slice) => { + Chars::Unicode(slice[char_offset..].iter()) + }, + } + } + /// TODO: docs #[inline(always)] pub fn char_len(&self) -> usize { @@ -226,36 +239,21 @@ fn find_last_unicode( .find_map(|(idx, &ch)| char_eq(needle, ch).then_some(idx)) } -struct UnicodeMatches<'a> { - needle: char, - haystack: &'a [char], - char_eq: CharEq, - offset: usize, -} - -impl<'a> UnicodeMatches<'a> { - fn new(ch: char, haystack: &'a [char], char_eq: CharEq) -> Self { - Self { needle: ch, haystack, char_eq, offset: 0 } - } +/// TODO: docs +pub(crate) enum Chars<'a> { + Ascii(core::slice::Iter<'a, u8>), + Unicode(core::slice::Iter<'a, char>), } -impl Iterator for UnicodeMatches<'_> { - type Item = usize; +impl Iterator for Chars<'_> { + type Item = char; #[inline(always)] fn next(&mut self) -> Option { - let idx = - self.haystack.iter().enumerate().find_map(|(idx, &ch)| { - (self.char_eq)(self.needle, ch).then_some(idx) - })?; - - self.haystack = &self.haystack[idx + 1..]; - - let offset = self.offset + idx; - - self.offset = offset + 1; - - Some(offset) + match self { + Chars::Ascii(iter) => iter.next().copied().map(char::from), + Chars::Unicode(iter) => iter.next().copied(), + } } } @@ -323,3 +321,36 @@ impl Iterator for CandidateMatches<'_> { .map(|offset| self.start_offset + offset) } } + +struct UnicodeMatches<'a> { + needle: char, + haystack: &'a [char], + char_eq: CharEq, + offset: usize, +} + +impl<'a> UnicodeMatches<'a> { + fn new(ch: char, haystack: &'a [char], char_eq: CharEq) -> Self { + Self { needle: ch, haystack, char_eq, offset: 0 } + } +} + +impl Iterator for UnicodeMatches<'_> { + type Item = usize; + + #[inline(always)] + fn next(&mut self) -> Option { + let idx = + self.haystack.iter().enumerate().find_map(|(idx, &ch)| { + (self.char_eq)(self.needle, ch).then_some(idx) + })?; + + self.haystack = &self.haystack[idx + 1..]; + + let offset = self.offset + idx; + + self.offset = offset + 1; + + Some(offset) + } +} From 6343b481dbcc788471bacf700f9539144bdb8973 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 23:04:57 +0100 Subject: [PATCH 08/20] fzf: re-implement `prefix_match` using `Candidate` --- src/algos/fzf/fzf.rs | 260 +++++++++++++++++++++++-------------------- src/candidate.rs | 14 +++ 2 files changed, 154 insertions(+), 120 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index 081e325..4c8b7db 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -62,7 +62,25 @@ pub(super) trait Fzf { }, MatchType::PrefixExact => { - todo!(); + let char_eq = self.char_eq(pattern); + + if pattern.is_inverse { + prefix_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } else { + prefix_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } }, MatchType::SuffixExact => { @@ -117,102 +135,6 @@ pub(super) trait Fzf { } } -/// TODO: docs -#[inline] -pub(super) fn calculate_score( - pattern: Pattern, - candidate: &str, - candidate_range: Range, - opts: impl Opts, - scheme: &Scheme, - mut ranges_buf: Option<&mut MatchedRanges>, -) -> Score { - // TODO: docs - let mut is_in_gap = false; - - // TODO: docs - let mut is_first_pattern_char = true; - - // TODO: docs - let mut first_bonus: Score = 0; - - // TODO: docs - let mut consecutive = 0u32; - - let range_start = candidate_range.start; - - let mut prev_class = candidate[..candidate_range.start] - .chars() - .next_back() - .map(|ch| char_class(ch, scheme)) - .unwrap_or(scheme.initial_char_class); - - let mut pattern_chars = pattern.chars(); - - let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); - - let mut score: Score = 0; - - for (offset, candidate_ch) in candidate[candidate_range].char_indices() { - let ch_class = char_class(candidate_ch, scheme); - - if opts.char_eq(pattern_char, candidate_ch) { - score += bonus::MATCH; - - let mut bonus = compute_bonus(prev_class, ch_class, scheme); - - if consecutive == 0 { - first_bonus = bonus; - } else { - if bonus >= bonus::BOUNDARY && bonus > first_bonus { - first_bonus = bonus - } - bonus = bonus.max(first_bonus).max(bonus::CONSECUTIVE); - } - - score += if is_first_pattern_char { - bonus * bonus::FIRST_QUERY_CHAR_MULTIPLIER - } else { - bonus - }; - - if let Some(ranges) = &mut ranges_buf { - let start = range_start + offset; - let end = start + candidate_ch.len_utf8(); - ranges.insert(start..end); - } - - is_in_gap = false; - - is_first_pattern_char = false; - - consecutive += 1; - - if let Some(next_char) = pattern_chars.next() { - pattern_char = next_char; - } else { - break; - }; - } else { - score -= if is_in_gap { - penalty::GAP_EXTENSION - } else { - penalty::GAP_START - }; - - is_in_gap = true; - - consecutive = 0; - - first_bonus = 0; - } - - prev_class = ch_class; - } - - score -} - /// TODO: docs #[inline] pub(super) fn exact_match( @@ -321,12 +243,12 @@ pub(super) fn exact_match( /// TODO: docs #[inline] -pub(super) fn prefix_match( +pub(super) fn prefix_match( pattern: Pattern, - candidate: &str, - opts: impl Opts, + candidate: Candidate, + char_eq: CharEq, scheme: &Scheme, - ranges_buf: Option<&mut MatchedRanges>, + ranges: &mut MatchedRanges, ) -> Option { if pattern.is_empty() { return Some(0); @@ -339,10 +261,11 @@ pub(super) fn prefix_match( let mut match_byte_len = 0; - for (candidate_ch, pattern_ch) in - candidate[ignored_leading_spaces..].chars().zip(pattern_chars.by_ref()) + for (candidate_ch, pattern_ch) in candidate + .chars_from(ignored_leading_spaces) + .zip(pattern_chars.by_ref()) { - if !opts.char_eq(pattern_ch, candidate_ch) { + if !char_eq(pattern_ch, candidate_ch) { return None; } match_byte_len += candidate_ch.len_utf8(); @@ -352,23 +275,24 @@ pub(super) fn prefix_match( return None; } - let matched_range = - ignored_leading_spaces..ignored_leading_spaces + match_byte_len; - - let score = calculate_score( - pattern, - candidate, - matched_range.clone(), - opts, - scheme, - None, - ); + // let score = calculate_score( + // pattern, + // candidate, + // matched_range.clone(), + // opts, + // scheme, + // None, + // ); - if let Some(ranges) = ranges_buf { - ranges.insert(matched_range); + if RANGES { + let start = ignored_leading_spaces; + let end = start + match_byte_len; + ranges.insert(start..end); } - Some(score) + todo!() + + // Some(score) } /// TODO: docs @@ -494,9 +418,9 @@ pub(super) fn equal_match( #[inline(always)] fn ignored_candidate_leading_spaces( pattern: Pattern, - candidate: &str, + candidate: Candidate, ) -> Option { - let candidate_leading_spaces = utils::leading_spaces(candidate); + let candidate_leading_spaces = candidate.leading_spaces(); if pattern.leading_spaces() > candidate_leading_spaces { None @@ -520,6 +444,102 @@ fn ignored_candidate_trailing_spaces( } } +/// TODO: docs +#[inline] +pub(super) fn calculate_score( + pattern: Pattern, + candidate: &str, + candidate_range: Range, + opts: impl Opts, + scheme: &Scheme, + mut ranges_buf: Option<&mut MatchedRanges>, +) -> Score { + // TODO: docs + let mut is_in_gap = false; + + // TODO: docs + let mut is_first_pattern_char = true; + + // TODO: docs + let mut first_bonus: Score = 0; + + // TODO: docs + let mut consecutive = 0u32; + + let range_start = candidate_range.start; + + let mut prev_class = candidate[..candidate_range.start] + .chars() + .next_back() + .map(|ch| char_class(ch, scheme)) + .unwrap_or(scheme.initial_char_class); + + let mut pattern_chars = pattern.chars(); + + let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); + + let mut score: Score = 0; + + for (offset, candidate_ch) in candidate[candidate_range].char_indices() { + let ch_class = char_class(candidate_ch, scheme); + + if opts.char_eq(pattern_char, candidate_ch) { + score += bonus::MATCH; + + let mut bonus = compute_bonus(prev_class, ch_class, scheme); + + if consecutive == 0 { + first_bonus = bonus; + } else { + if bonus >= bonus::BOUNDARY && bonus > first_bonus { + first_bonus = bonus + } + bonus = bonus.max(first_bonus).max(bonus::CONSECUTIVE); + } + + score += if is_first_pattern_char { + bonus * bonus::FIRST_QUERY_CHAR_MULTIPLIER + } else { + bonus + }; + + if let Some(ranges) = &mut ranges_buf { + let start = range_start + offset; + let end = start + candidate_ch.len_utf8(); + ranges.insert(start..end); + } + + is_in_gap = false; + + is_first_pattern_char = false; + + consecutive += 1; + + if let Some(next_char) = pattern_chars.next() { + pattern_char = next_char; + } else { + break; + }; + } else { + score -= if is_in_gap { + penalty::GAP_EXTENSION + } else { + penalty::GAP_START + }; + + is_in_gap = true; + + consecutive = 0; + + first_bonus = 0; + } + + prev_class = ch_class; + } + + score +} + // #[cfg(test)] // mod tests { // #![allow(clippy::single_range_in_vec_init)] diff --git a/src/candidate.rs b/src/candidate.rs index 82003d5..e7440df 100644 --- a/src/candidate.rs +++ b/src/candidate.rs @@ -116,6 +116,20 @@ impl<'a> Candidate<'a> { } } + /// TODO: docs + #[inline(always)] + pub fn leading_spaces(&self) -> usize { + match self { + Candidate::Ascii(slice) => { + slice.iter().take_while(|&&ch| ch == b' ').count() + }, + + Candidate::Unicode(slice) => { + slice.iter().take_while(|&&ch| ch == ' ').count() + }, + } + } + /// TODO: docs #[inline(always)] pub fn matches( From 980548df3b2219784cd8aa77203fe6be0c38d1a3 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 23:21:38 +0100 Subject: [PATCH 09/20] fzf: re-implement `suffix_match` using `Candidate` --- src/algos/fzf/fzf.rs | 57 +++++++++++++++++++------------------ src/candidate.rs | 67 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 87 insertions(+), 37 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index 4c8b7db..2860ead 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -268,7 +268,9 @@ pub(super) fn prefix_match( if !char_eq(pattern_ch, candidate_ch) { return None; } - match_byte_len += candidate_ch.len_utf8(); + if RANGES { + match_byte_len += candidate_ch.len_utf8(); + } } if pattern_chars.next().is_some() { @@ -297,12 +299,12 @@ pub(super) fn prefix_match( /// TODO: docs #[inline] -pub(super) fn suffix_match( +pub(super) fn suffix_match( pattern: Pattern, - candidate: &str, - opts: impl Opts, + candidate: Candidate, + char_eq: CharEq, scheme: &Scheme, - ranges_buf: Option<&mut MatchedRanges>, + ranges: &mut MatchedRanges, ) -> Option { if pattern.is_empty() { return Some(0); @@ -310,43 +312,42 @@ pub(super) fn suffix_match( let mut pattern_chars = pattern.chars().rev(); - let up_to_ignored_spaces = candidate.len() + let chars_up_to_ignored_spaces = candidate.char_len() - ignored_candidate_trailing_spaces(pattern, candidate)?; - let mut match_byte_len = 0; - - for (candidate_ch, pattern_ch) in candidate[..up_to_ignored_spaces] - .chars() + for (candidate_ch, pattern_ch) in candidate + .slice(0, chars_up_to_ignored_spaces) + .chars_from(0) .rev() .zip(pattern_chars.by_ref()) { - if !opts.char_eq(pattern_ch, candidate_ch) { + if !char_eq(pattern_ch, candidate_ch) { return None; } - match_byte_len += candidate_ch.len_utf8(); } if pattern_chars.next().is_some() { return None; } - let matched_range = - up_to_ignored_spaces - match_byte_len..up_to_ignored_spaces; - - let score = calculate_score( - pattern, - candidate, - matched_range.clone(), - opts, - scheme, - None, - ); + // let score = calculate_score( + // pattern, + // candidate, + // matched_range.clone(), + // opts, + // scheme, + // None, + // ); - if let Some(ranges) = ranges_buf { - ranges.insert(matched_range); + if RANGES { + let end = chars_up_to_ignored_spaces; + let start = end - pattern.char_len(); + ranges.insert(candidate.to_byte_range(start..end)); } - Some(score) + todo!() + + // Some(score) } /// TODO: docs @@ -433,9 +434,9 @@ fn ignored_candidate_leading_spaces( #[inline(always)] fn ignored_candidate_trailing_spaces( pattern: Pattern, - candidate: &str, + candidate: Candidate, ) -> Option { - let candidate_trailing_spaces = utils::trailing_spaces(candidate); + let candidate_trailing_spaces = candidate.trailing_spaces(); if pattern.trailing_spaces() > candidate_trailing_spaces { None diff --git a/src/candidate.rs b/src/candidate.rs index e7440df..31efff9 100644 --- a/src/candidate.rs +++ b/src/candidate.rs @@ -1,3 +1,5 @@ +use core::ops::Range; + use crate::utils::*; /// TODO: docs @@ -10,7 +12,7 @@ pub(crate) enum Candidate<'a> { impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] - pub fn char(&self, char_idx: usize) -> char { + pub fn char(self, char_idx: usize) -> char { match self { Candidate::Ascii(candidate) => candidate[char_idx] as _, Candidate::Unicode(candidate) => candidate[char_idx], @@ -19,7 +21,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] - pub fn chars_from(&self, char_offset: usize) -> Chars<'_> { + pub fn chars_from(self, char_offset: usize) -> Chars<'a> { match self { Candidate::Ascii(slice) => { Chars::Ascii(slice[char_offset..].iter()) @@ -32,7 +34,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] - pub fn char_len(&self) -> usize { + pub fn char_len(self) -> usize { match self { Candidate::Ascii(slice) => slice.len(), Candidate::Unicode(slice) => slice.len(), @@ -42,7 +44,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] pub fn find_first_from( - &self, + self, char_offset: usize, ch: char, is_case_sensitive: bool, @@ -72,7 +74,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] pub fn find_last( - &self, + self, ch: char, is_case_sensitive: bool, char_eq: CharEq, @@ -93,7 +95,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] pub fn find_last_from( - &self, + self, end_offset: usize, ch: char, is_case_sensitive: bool, @@ -118,7 +120,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] - pub fn leading_spaces(&self) -> usize { + pub fn leading_spaces(self) -> usize { match self { Candidate::Ascii(slice) => { slice.iter().take_while(|&&ch| ch == b' ').count() @@ -133,7 +135,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] pub fn matches( - &self, + self, ch: char, is_case_sensitive: bool, char_eq: CharEq, @@ -152,7 +154,7 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] pub fn matches_from( - &self, + self, char_offset: usize, ch: char, is_case_sensitive: bool, @@ -200,6 +202,43 @@ impl<'a> Candidate<'a> { }, } } + + /// TODO: docs + #[inline(always)] + pub fn to_byte_range(self, char_range: Range) -> Range { + match self { + Candidate::Ascii(_) => char_range, + + Candidate::Unicode(slice) => { + let mut chars = slice[..char_range.end].iter(); + + let start = chars + .by_ref() + .map(|&ch| ch.len_utf8()) + .take(char_range.start) + .sum::(); + + let end = + start + chars.map(|&ch| ch.len_utf8()).sum::(); + + start..end + }, + } + } + + /// TODO: docs + #[inline(always)] + pub fn trailing_spaces(self) -> usize { + match self { + Candidate::Ascii(slice) => { + slice.iter().rev().take_while(|&&ch| ch == b' ').count() + }, + + Candidate::Unicode(slice) => { + slice.iter().rev().take_while(|&&ch| ch == ' ').count() + }, + } + } } #[inline(always)] @@ -271,6 +310,16 @@ impl Iterator for Chars<'_> { } } +impl DoubleEndedIterator for Chars<'_> { + #[inline(always)] + fn next_back(&mut self) -> Option { + match self { + Chars::Ascii(iter) => iter.next_back().copied().map(char::from), + Chars::Unicode(iter) => iter.next_back().copied(), + } + } +} + /// TODO: docs pub(crate) struct CandidateMatches<'a> { iter: CandidateMatchesIter<'a>, From 32ac7b483fa26161a9c1c4a3518b3f5cf9ad7454 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 23:32:45 +0100 Subject: [PATCH 10/20] fzf: re-implement `equal_match` using `Candidate` --- src/algos/fzf/fzf.rs | 91 +++++++++++++++++++++++++++++------------ src/algos/fzf/fzf_v2.rs | 2 +- src/candidate.rs | 14 ++++--- 3 files changed, 74 insertions(+), 33 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index 2860ead..e1bdf1c 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -84,11 +84,47 @@ pub(super) trait Fzf { }, MatchType::SuffixExact => { - todo!(); + let char_eq = self.char_eq(pattern); + + if pattern.is_inverse { + suffix_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } else { + suffix_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } }, MatchType::EqualExact => { - todo!(); + let char_eq = self.char_eq(pattern); + + if pattern.is_inverse { + equal_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } else { + equal_match::( + pattern, + candidate, + char_eq, + self.scheme(), + ranges, + ) + } }, }; @@ -316,8 +352,8 @@ pub(super) fn suffix_match( - ignored_candidate_trailing_spaces(pattern, candidate)?; for (candidate_ch, pattern_ch) in candidate - .slice(0, chars_up_to_ignored_spaces) - .chars_from(0) + .slice(0..chars_up_to_ignored_spaces) + .chars() .rev() .zip(pattern_chars.by_ref()) { @@ -352,12 +388,12 @@ pub(super) fn suffix_match( /// TODO: docs #[inline] -pub(super) fn equal_match( +pub(super) fn equal_match( pattern: Pattern, - candidate: &str, - opts: impl Opts, + candidate: Candidate, + char_eq: CharEq, scheme: &Scheme, - ranges_buf: Option<&mut MatchedRanges>, + ranges: &mut MatchedRanges, ) -> Option { if pattern.is_empty() { return Some(0); @@ -367,30 +403,29 @@ pub(super) fn equal_match( ignored_candidate_leading_spaces(pattern, candidate)?; // The candidate contains only spaces. - if ignored_leading_spaces == candidate.len() { + if ignored_leading_spaces == candidate.char_len() { return None; } let ignored_trailing_spaces = ignored_candidate_trailing_spaces(pattern, candidate)?; - let matched_range = - ignored_leading_spaces..candidate.len() - ignored_trailing_spaces; - - let relevant_candidate = &candidate[matched_range.clone()]; + let matched_char_range = + ignored_leading_spaces..candidate.char_len() - ignored_trailing_spaces; - if relevant_candidate.len() < pattern.char_len() { + if matched_char_range.len() < pattern.char_len() { return None; } let mut pattern_chars = pattern.chars(); - let mut candidate_chars = relevant_candidate.chars(); + let mut candidate_chars = + candidate.slice(matched_char_range.clone()).chars(); for (pattern_ch, candidate_ch) in pattern_chars.by_ref().zip(candidate_chars.by_ref()) { - if !opts.char_eq(pattern_ch, candidate_ch) { + if !char_eq(pattern_ch, candidate_ch) { return None; } } @@ -399,20 +434,22 @@ pub(super) fn equal_match( return None; } - let score = calculate_score( - pattern, - candidate, - matched_range.clone(), - opts, - scheme, - None, - ); + // let score = calculate_score( + // pattern, + // candidate, + // matched_char_range.clone(), + // opts, + // scheme, + // None, + // ); - if let Some(ranges) = ranges_buf { - ranges.insert(matched_range); + if RANGES { + ranges.insert(candidate.to_byte_range(matched_char_range)); } - Some(score) + todo!(); + + // Some(score) } /// TODO: docs diff --git a/src/algos/fzf/fzf_v2.rs b/src/algos/fzf/fzf_v2.rs index 9231d9a..1fc2731 100644 --- a/src/algos/fzf/fzf_v2.rs +++ b/src/algos/fzf/fzf_v2.rs @@ -158,7 +158,7 @@ impl Fzf for FzfV2 { }; let mut candidate = CandidateV2::new( - candidate.slice(first_offset, last_match_offset), + candidate.slice(first_offset..last_match_offset), &mut self.slab.bonus, initial_char_class, opts, diff --git a/src/candidate.rs b/src/candidate.rs index 31efff9..21eb09c 100644 --- a/src/candidate.rs +++ b/src/candidate.rs @@ -19,6 +19,12 @@ impl<'a> Candidate<'a> { } } + /// TODO: docs + #[inline(always)] + pub fn chars(self) -> Chars<'a> { + self.chars_from(0) + } + /// TODO: docs #[inline(always)] pub fn chars_from(self, char_offset: usize) -> Chars<'a> { @@ -180,14 +186,12 @@ impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] - pub fn slice(self, char_start: usize, char_end: usize) -> Self { + pub fn slice(self, char_range: Range) -> Self { match self { - Candidate::Ascii(slice) => { - Candidate::Ascii(&slice[char_start..char_end]) - }, + Candidate::Ascii(slice) => Candidate::Ascii(&slice[char_range]), Candidate::Unicode(slice) => { - Candidate::Unicode(&slice[char_start..char_end]) + Candidate::Unicode(&slice[char_range]) }, } } From 3544b520b9a3439060dafbf93357de70b7723367 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 23:49:46 +0100 Subject: [PATCH 11/20] fzf: re-implement `calculate_score` using `Candidate` --- src/algos/fzf/fzf.rs | 160 +++++++++++++++++++++------------------- src/algos/fzf/fzf_v1.rs | 16 ++-- src/utils.rs | 14 +--- 3 files changed, 96 insertions(+), 94 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index e1bdf1c..dc7afe4 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -173,7 +173,7 @@ pub(super) trait Fzf { /// TODO: docs #[inline] -pub(super) fn exact_match( +fn exact_match( pattern: Pattern, candidate: Candidate, char_eq: CharEq, @@ -188,10 +188,10 @@ pub(super) fn exact_match( let mut best_bonus: i64 = -1; // TODO: docs - let mut best_bonus_start = 0; + let mut best_bonus_char_start = 0; // TODO: docs - let mut best_bonus_end = 0; + let mut best_bonus_char_end = 0; // TODO: docs let mut matched = false; @@ -229,9 +229,9 @@ pub(super) fn exact_match( if current_bonus as i64 > best_bonus { best_bonus = current_bonus as _; - best_bonus_start = bonus_start; + best_bonus_char_start = bonus_start; - best_bonus_end = + best_bonus_char_end = current_start_offset + char_offset + 1; } @@ -257,29 +257,27 @@ pub(super) fn exact_match( return None; } - let matched_range = best_bonus_start..best_bonus_end; + let matched_range = best_bonus_char_start..best_bonus_char_end; - // let score = calculate_score( - // pattern, - // candidate, - // matched_range.clone(), - // opts, - // scheme, - // None, - // ); + let score = compute_score::( + pattern, + candidate, + matched_range.clone(), + char_eq, + scheme, + ranges, + ); if RANGES { - ranges.insert(matched_range); + ranges.insert(candidate.to_byte_range(matched_range)); } - todo!(); - - // Some(score) + Some(score) } /// TODO: docs #[inline] -pub(super) fn prefix_match( +fn prefix_match( pattern: Pattern, candidate: Candidate, char_eq: CharEq, @@ -313,29 +311,31 @@ pub(super) fn prefix_match( return None; } - // let score = calculate_score( - // pattern, - // candidate, - // matched_range.clone(), - // opts, - // scheme, - // None, - // ); - - if RANGES { + let matched_range = { let start = ignored_leading_spaces; let end = start + match_byte_len; - ranges.insert(start..end); - } + start..end + }; + + let score = compute_score::( + pattern, + candidate, + matched_range.clone(), + char_eq, + scheme, + ranges, + ); - todo!() + if RANGES { + ranges.insert(matched_range); + } - // Some(score) + Some(score) } /// TODO: docs #[inline] -pub(super) fn suffix_match( +fn suffix_match( pattern: Pattern, candidate: Candidate, char_eq: CharEq, @@ -366,29 +366,31 @@ pub(super) fn suffix_match( return None; } - // let score = calculate_score( - // pattern, - // candidate, - // matched_range.clone(), - // opts, - // scheme, - // None, - // ); - - if RANGES { + let matched_range = { let end = chars_up_to_ignored_spaces; let start = end - pattern.char_len(); - ranges.insert(candidate.to_byte_range(start..end)); - } + start..end + }; + + let score = compute_score::( + pattern, + candidate, + matched_range.clone(), + char_eq, + scheme, + ranges, + ); - todo!() + if RANGES { + ranges.insert(candidate.to_byte_range(matched_range)); + } - // Some(score) + Some(score) } /// TODO: docs #[inline] -pub(super) fn equal_match( +fn equal_match( pattern: Pattern, candidate: Candidate, char_eq: CharEq, @@ -434,22 +436,20 @@ pub(super) fn equal_match( return None; } - // let score = calculate_score( - // pattern, - // candidate, - // matched_char_range.clone(), - // opts, - // scheme, - // None, - // ); + let score = compute_score::( + pattern, + candidate, + matched_char_range.clone(), + char_eq, + scheme, + ranges, + ); if RANGES { ranges.insert(candidate.to_byte_range(matched_char_range)); } - todo!(); - - // Some(score) + Some(score) } /// TODO: docs @@ -484,13 +484,13 @@ fn ignored_candidate_trailing_spaces( /// TODO: docs #[inline] -pub(super) fn calculate_score( +pub(super) fn compute_score( pattern: Pattern, - candidate: &str, - candidate_range: Range, - opts: impl Opts, + candidate: Candidate, + candidate_char_range: Range, + char_eq: CharEq, scheme: &Scheme, - mut ranges_buf: Option<&mut MatchedRanges>, + ranges: &mut MatchedRanges, ) -> Score { // TODO: docs let mut is_in_gap = false; @@ -504,13 +504,19 @@ pub(super) fn calculate_score( // TODO: docs let mut consecutive = 0u32; - let range_start = candidate_range.start; + let byte_range_start = if RANGES { + candidate.to_byte_offset(candidate_char_range.start) + } else { + 0 + }; + + let mut byte_offset = 0; - let mut prev_class = candidate[..candidate_range.start] - .chars() - .next_back() - .map(|ch| char_class(ch, scheme)) - .unwrap_or(scheme.initial_char_class); + let mut prev_class = if candidate_char_range.start == 0 { + scheme.initial_char_class + } else { + char_class(candidate.char(candidate_char_range.start - 1), scheme) + }; let mut pattern_chars = pattern.chars(); @@ -518,10 +524,10 @@ pub(super) fn calculate_score( let mut score: Score = 0; - for (offset, candidate_ch) in candidate[candidate_range].char_indices() { + for candidate_ch in candidate.slice(candidate_char_range).chars() { let ch_class = char_class(candidate_ch, scheme); - if opts.char_eq(pattern_char, candidate_ch) { + if char_eq(pattern_char, candidate_ch) { score += bonus::MATCH; let mut bonus = compute_bonus(prev_class, ch_class, scheme); @@ -541,8 +547,8 @@ pub(super) fn calculate_score( bonus }; - if let Some(ranges) = &mut ranges_buf { - let start = range_start + offset; + if RANGES { + let start = byte_range_start + byte_offset; let end = start + candidate_ch.len_utf8(); ranges.insert(start..end); } @@ -573,6 +579,10 @@ pub(super) fn calculate_score( } prev_class = ch_class; + + if RANGES { + byte_offset += candidate_ch.len_utf8(); + } } score diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index 85c0816..abaa2cc 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -127,7 +127,7 @@ impl Fzf for FzfV1 { &mut self, pattern: Pattern, candidate: Candidate, - _ranges: &mut MatchedRanges, + ranges: &mut MatchedRanges, ) -> Option { // TODO: can we remove this? if pattern.is_empty() { @@ -149,12 +149,16 @@ impl Fzf for FzfV1 { let range = range_forward.start + start_backward..range_forward.end; - todo!(); + let score = compute_score::( + pattern, + candidate, + range, + opts.char_eq, + &self.scheme, + ranges, + ); - // let score = - // calculate_score(pattern, candidate, range, opts, scheme, ranges_buf); - // - // Some(score) + Some(score) } } diff --git a/src/utils.rs b/src/utils.rs index cce27f3..61f16b6 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -45,13 +45,7 @@ pub fn char_eq(is_case_sensitive: bool, normalize_candidate: bool) -> CharEq { /// TODO: docs #[inline(always)] -pub fn char_len(s: &str) -> usize { - s.chars().count() -} - -/// TODO: docs -#[inline(always)] -pub fn leading_spaces(s: &str) -> usize { +fn leading_spaces(s: &str) -> usize { s.bytes().take_while(|&b| b == b' ').count() } @@ -70,9 +64,3 @@ fn normalize_candidate_char(query_char: char, candidate_char: char) -> char { pub fn strip_leading_spaces(s: &str) -> &str { &s[leading_spaces(s)..] } - -/// TODO: docs -#[inline(always)] -pub fn trailing_spaces(s: &str) -> usize { - s.bytes().rev().take_while(|&b| b == b' ').count() -} From e83dd3d279cb7528aea8d1ec88afb9feb32d4b47 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 23:50:57 +0100 Subject: [PATCH 12/20] fix warnings --- src/lib.rs | 4 ---- src/match.rs | 33 --------------------------------- src/matched_ranges.rs | 2 +- src/opts.rs | 22 ---------------------- 4 files changed, 1 insertion(+), 60 deletions(-) delete mode 100644 src/match.rs delete mode 100644 src/opts.rs diff --git a/src/lib.rs b/src/lib.rs index 8b6d91a..32af582 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,11 +60,9 @@ extern crate alloc; mod algos; mod candidate; mod case_sensitivity; -mod r#match; mod matched_ranges; mod metric; mod normalize; -mod opts; mod tiny_vec; mod utils; @@ -73,5 +71,3 @@ use candidate::{Candidate, CandidateMatches}; pub use case_sensitivity::CaseSensitivity; use matched_ranges::MatchedRanges; pub use metric::Metric; -use opts::*; -pub use r#match::Match; diff --git a/src/match.rs b/src/match.rs deleted file mode 100644 index 7dbfcdd..0000000 --- a/src/match.rs +++ /dev/null @@ -1,33 +0,0 @@ -use core::ops::Range; - -use crate::MatchedRanges; - -/// TODO: docs -#[derive(Default)] -pub struct Match { - /// TODO: docs - distance: D, - - /// TODO: docs - matched_ranges: MatchedRanges, -} - -impl Match { - /// TODO: docs - #[inline(always)] - pub fn distance(&self) -> D { - self.distance - } - - /// TODO: docs - #[inline(always)] - pub fn matched_ranges(&self) -> &[Range] { - self.matched_ranges.as_slice() - } - - /// TODO: docs - #[inline(always)] - pub(crate) fn new(distance: D, matched_ranges: MatchedRanges) -> Self { - Self { distance, matched_ranges } - } -} diff --git a/src/matched_ranges.rs b/src/matched_ranges.rs index 646ab83..e6dce61 100644 --- a/src/matched_ranges.rs +++ b/src/matched_ranges.rs @@ -17,7 +17,7 @@ impl core::fmt::Debug for MatchedRanges { impl MatchedRanges { /// TODO: docs #[inline(always)] - pub(crate) fn as_slice(&self) -> &[Range] { + pub fn as_slice(&self) -> &[Range] { self.ranges.as_slice() } diff --git a/src/opts.rs b/src/opts.rs deleted file mode 100644 index 8f034d0..0000000 --- a/src/opts.rs +++ /dev/null @@ -1,22 +0,0 @@ -/// TODO: docs -pub(crate) trait Opts: Copy { - /// TODO: docs - fn char_eq(&self, query_ch: char, candidate_ch: char) -> bool; - - /// TODO: docs - fn find_first( - &self, - query_ch: char, - candidate: &str, - ) -> Option<(usize, usize)>; - - /// TODO: docs - fn find_last( - &self, - query_ch: char, - candidate: &str, - ) -> Option<(usize, usize)>; - - /// TODO: docs - fn to_char_offset(&self, candidate: &str, byte_offset: usize) -> usize; -} From b551d1fabbb454b29a376fa2f8686d6ef13ccec2 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Thu, 23 Nov 2023 23:52:30 +0100 Subject: [PATCH 13/20] fix clippy --- src/algos/fzf/slab.rs | 2 +- src/lib.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/algos/fzf/slab.rs b/src/algos/fzf/slab.rs index 7e9699e..55bee4d 100644 --- a/src/algos/fzf/slab.rs +++ b/src/algos/fzf/slab.rs @@ -60,7 +60,7 @@ impl Default for BonusSlab { impl BonusSlab { /// TODO: docs #[inline] - pub fn alloc<'a>(&'a mut self, len: usize) -> &'a mut [Bonus] { + pub fn alloc(&mut self, len: usize) -> &mut [Bonus] { if len > self.vec.len() { self.vec.resize(len, Bonus::default()); } diff --git a/src/lib.rs b/src/lib.rs index 32af582..6364d89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,6 +49,7 @@ //! ``` #![cfg_attr(docsrs, feature(doc_cfg))] +#![allow(clippy::module_inception)] #![allow(clippy::needless_range_loop)] #![allow(clippy::too_many_arguments)] #![deny(missing_docs)] From 011c96a22e4e6eeb4f50a2f81319835f06d13ea2 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 00:12:29 +0100 Subject: [PATCH 14/20] fix tests --- src/algos/fzf/fzf.rs | 207 ++++++++++++++++++++-------------------- src/algos/fzf/fzf_v1.rs | 6 ++ src/algos/fzf/fzf_v2.rs | 6 ++ src/lib.rs | 2 +- tests/fzf_common.rs | 152 +++++++++++++++-------------- tests/fzf_v1.rs | 28 ++++-- tests/fzf_v2.rs | 59 ++++++++---- 7 files changed, 259 insertions(+), 201 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index dc7afe4..273679f 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -588,104 +588,109 @@ pub(super) fn compute_score( score } -// #[cfg(test)] -// mod tests { -// #![allow(clippy::single_range_in_vec_init)] -// -// use super::*; -// -// #[test] -// fn equal_match_1() { -// let pattern = -// Pattern::parse("^AbC$".chars().collect::>().leak()); -// -// let mut ranges_buf = MatchedRanges::default(); -// -// assert!(exact_match( -// pattern, -// "ABC", -// todo!(), -// &Scheme::default(), -// Some(&mut ranges_buf) -// ) -// .is_none()); -// -// { -// ranges_buf = MatchedRanges::default(); -// -// assert!(exact_match( -// pattern, -// "AbC", -// todo!(), -// &Scheme::default(), -// Some(&mut ranges_buf) -// ) -// .is_some()); -// -// assert_eq!(ranges_buf.as_slice(), [0..3]); -// } -// -// { -// ranges_buf = MatchedRanges::default(); -// -// assert!(exact_match( -// pattern, -// "AbC ", -// todo!(), -// &Scheme::default(), -// Some(&mut ranges_buf) -// ) -// .is_some()); -// -// assert_eq!(ranges_buf.as_slice(), [0..3]); -// } -// -// { -// ranges_buf = MatchedRanges::default(); -// -// assert!(exact_match( -// pattern, -// " AbC ", -// todo!(), -// &Scheme::default(), -// Some(&mut ranges_buf) -// ) -// .is_some()); -// -// assert_eq!(ranges_buf.as_slice(), [1..4]); -// } -// -// { -// ranges_buf = MatchedRanges::default(); -// -// assert!(exact_match( -// pattern, -// " AbC", -// todo!(), -// &Scheme::default(), -// Some(&mut ranges_buf) -// ) -// .is_some()); -// -// assert_eq!(ranges_buf.as_slice(), [2..5]); -// } -// } -// -// #[test] -// fn exact_match_1() { -// let pattern = Pattern::parse("abc".chars().collect::>().leak()); -// -// let mut ranges_buf = MatchedRanges::default(); -// -// assert!(exact_match( -// pattern, -// "aabbcc abc", -// todo!(), -// &Scheme::default(), -// Some(&mut ranges_buf) -// ) -// .is_some()); -// -// assert_eq!(ranges_buf.as_slice(), [7..10]); -// } -// } +#[cfg(test)] +mod tests { + #![allow(clippy::single_range_in_vec_init)] + + use super::*; + + fn candidate(s: &str) -> Candidate { + assert!(s.is_ascii()); + Candidate::Ascii(s.as_bytes()) + } + + #[test] + fn equal_match_1() { + let pattern = + Pattern::parse("^AbC$".chars().collect::>().leak()); + + let mut ranges_buf = MatchedRanges::default(); + + assert!(exact_match::( + pattern, + candidate("ABC"), + utils::char_eq(false, false), + &Scheme::default(), + &mut ranges_buf + ) + .is_none()); + + { + ranges_buf = MatchedRanges::default(); + + assert!(exact_match::( + pattern, + candidate("AbC"), + utils::char_eq(false, false), + &Scheme::default(), + &mut ranges_buf + ) + .is_some()); + + assert_eq!(ranges_buf.as_slice(), [0..3]); + } + + { + ranges_buf = MatchedRanges::default(); + + assert!(exact_match::( + pattern, + candidate("AbC "), + utils::char_eq(false, false), + &Scheme::default(), + &mut ranges_buf + ) + .is_some()); + + assert_eq!(ranges_buf.as_slice(), [0..3]); + } + + { + ranges_buf = MatchedRanges::default(); + + assert!(exact_match::( + pattern, + candidate(" AbC "), + utils::char_eq(false, false), + &Scheme::default(), + &mut ranges_buf + ) + .is_some()); + + assert_eq!(ranges_buf.as_slice(), [1..4]); + } + + { + ranges_buf = MatchedRanges::default(); + + assert!(exact_match::( + pattern, + candidate(" AbC"), + utils::char_eq(false, false), + &Scheme::default(), + &mut ranges_buf + ) + .is_some()); + + assert_eq!(ranges_buf.as_slice(), [2..5]); + } + } + + #[test] + fn exact_match_1() { + let pattern = Pattern::parse("abc".chars().collect::>().leak()); + + let mut ranges_buf = MatchedRanges::default(); + + assert!(exact_match::( + pattern, + candidate("aabbcc abc"), + utils::char_eq(false, false), + &Scheme::default(), + &mut ranges_buf + ) + .is_some()); + + assert_eq!(ranges_buf.as_slice(), [7..10]); + } +} diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index abaa2cc..923edc6 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -42,6 +42,12 @@ impl FzfV1 { Self::default() } + /// TODO: docs + #[cfg(feature = "tests")] + pub fn scheme(&self) -> &Scheme { + &self.scheme + } + /// TODO: docs #[inline(always)] pub fn with_case_sensitivity( diff --git a/src/algos/fzf/fzf_v2.rs b/src/algos/fzf/fzf_v2.rs index 1fc2731..dfb50eb 100644 --- a/src/algos/fzf/fzf_v2.rs +++ b/src/algos/fzf/fzf_v2.rs @@ -43,6 +43,12 @@ impl FzfV2 { Self::default() } + /// TODO: docs + #[cfg(feature = "tests")] + pub fn scheme(&self) -> &Scheme { + &self.scheme + } + /// TODO: docs #[inline(always)] pub fn with_case_sensitivity( diff --git a/src/lib.rs b/src/lib.rs index 6364d89..0daf6c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,5 +70,5 @@ mod utils; pub use algos::*; use candidate::{Candidate, CandidateMatches}; pub use case_sensitivity::CaseSensitivity; -use matched_ranges::MatchedRanges; +pub use matched_ranges::MatchedRanges; pub use metric::Metric; diff --git a/tests/fzf_common.rs b/tests/fzf_common.rs index 653e835..a4b2ccf 100644 --- a/tests/fzf_common.rs +++ b/tests/fzf_common.rs @@ -9,9 +9,9 @@ pub fn upstream_empty() { let m = m.unwrap(); - assert_eq!(m.distance().into_score(), 0); + assert_eq!(m.distance.into_score(), 0); - assert!(m.matched_ranges().is_empty()); + assert!(m.matched_ranges.as_slice().is_empty()); } pub fn upstream_fuzzy_1() { @@ -20,13 +20,13 @@ pub fn upstream_fuzzy_1() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::CAMEL_123 - penalty::GAP_START - 3 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [2..4, 8..9]); + assert_eq!(m.matched_ranges.as_slice(), [2..4, 8..9]); } pub fn upstream_fuzzy_2() { @@ -35,7 +35,7 @@ pub fn upstream_fuzzy_2() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white @@ -43,7 +43,7 @@ pub fn upstream_fuzzy_2() { - 4 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [0..1, 4..5, 8..9]); + assert_eq!(m.matched_ranges.as_slice(), [0..1, 4..5, 8..9]); } pub fn upstream_fuzzy_3() { @@ -52,11 +52,11 @@ pub fn upstream_fuzzy_3() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + 2 * bonus::CONSECUTIVE + bonus::CAMEL_123 ); - assert_eq!(m.matched_ranges().sorted(), [9..13]); + assert_eq!(m.matched_ranges.as_slice(), [9..13]); } pub fn upstream_fuzzy_4() { @@ -65,13 +65,13 @@ pub fn upstream_fuzzy_4() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) * fzf.scheme().bonus_boundary_delimiter ); - assert_eq!(m.matched_ranges().sorted(), [6..10]); + assert_eq!(m.matched_ranges.as_slice(), [6..10]); } pub fn upstream_fuzzy_5() { @@ -80,14 +80,14 @@ pub fn upstream_fuzzy_5() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::BOUNDARY + fzf.scheme().bonus_boundary_delimiter - penalty::GAP_START ); - assert_eq!(m.matched_ranges().sorted(), [8..11, 12..13]); + assert_eq!(m.matched_ranges.as_slice(), [8..11, 12..13]); } pub fn upstream_fuzzy_6() { @@ -96,13 +96,13 @@ pub fn upstream_fuzzy_6() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 5 * bonus::MATCH + 3 * bonus::CONSECUTIVE - penalty::GAP_START - penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [3..6, 8..10]); + assert_eq!(m.matched_ranges.as_slice(), [3..6, 8..10]); } pub fn upstream_fuzzy_7() { @@ -111,7 +111,7 @@ pub fn upstream_fuzzy_7() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 5 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::CAMEL_123 + bonus::CONSECUTIVE @@ -119,7 +119,7 @@ pub fn upstream_fuzzy_7() { - penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [3..6, 8..10]); + assert_eq!(m.matched_ranges.as_slice(), [3..6, 8..10]); } pub fn upstream_fuzzy_8() { @@ -128,7 +128,7 @@ pub fn upstream_fuzzy_8() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::FIRST_QUERY_CHAR_MULTIPLIER * fzf.scheme().bonus_boundary_white @@ -137,7 +137,7 @@ pub fn upstream_fuzzy_8() { - 4 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [0..1, 4..5, 8..9]); + assert_eq!(m.matched_ranges.as_slice(), [0..1, 4..5, 8..9]); } pub fn upstream_fuzzy_9() { @@ -146,7 +146,7 @@ pub fn upstream_fuzzy_9() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::FIRST_QUERY_CHAR_MULTIPLIER * fzf.scheme().bonus_boundary_white @@ -155,7 +155,7 @@ pub fn upstream_fuzzy_9() { - 2 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [0..1, 3..4, 6..7]); + assert_eq!(m.matched_ranges.as_slice(), [0..1, 3..4, 6..7]); } pub fn upstream_fuzzy_10() { @@ -164,7 +164,7 @@ pub fn upstream_fuzzy_10() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 1) * fzf.scheme().bonus_boundary_white @@ -172,7 +172,7 @@ pub fn upstream_fuzzy_10() { - 3 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [0..1, 4..5, 7..8]); + assert_eq!(m.matched_ranges.as_slice(), [0..1, 4..5, 7..8]); } pub fn upstream_fuzzy_11() { @@ -181,13 +181,13 @@ pub fn upstream_fuzzy_11() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) * fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [0..4]); + assert_eq!(m.matched_ranges.as_slice(), [0..4]); } pub fn upstream_fuzzy_12() { @@ -196,14 +196,14 @@ pub fn upstream_fuzzy_12() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 5 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::CAMEL_123 + bonus::NON_WORD + bonus::BOUNDARY ); - assert_eq!(m.matched_ranges().sorted(), [1..6]); + assert_eq!(m.matched_ranges.as_slice(), [1..6]); } pub fn upstream_fuzzy_13() { @@ -212,13 +212,13 @@ pub fn upstream_fuzzy_13() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::CAMEL_123 - penalty::GAP_START - 3 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [2..4, 8..9]); + assert_eq!(m.matched_ranges.as_slice(), [2..4, 8..9]); } pub fn upstream_fuzzy_14() { @@ -227,7 +227,7 @@ pub fn upstream_fuzzy_14() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::FIRST_QUERY_CHAR_MULTIPLIER * fzf.scheme().bonus_boundary_white @@ -236,7 +236,7 @@ pub fn upstream_fuzzy_14() { - 4 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [0..1, 4..5, 8..9]); + assert_eq!(m.matched_ranges.as_slice(), [0..1, 4..5, 8..9]); } pub fn upstream_fuzzy_15() { @@ -245,7 +245,7 @@ pub fn upstream_fuzzy_15() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::FIRST_QUERY_CHAR_MULTIPLIER * fzf.scheme().bonus_boundary_white @@ -254,7 +254,7 @@ pub fn upstream_fuzzy_15() { - 2 * penalty::GAP_EXTENSION ); - assert_eq!(m.matched_ranges().sorted(), [0..1, 3..4, 6..7]); + assert_eq!(m.matched_ranges.as_slice(), [0..1, 3..4, 6..7]); } pub fn upstream_fuzzy_16() { @@ -263,14 +263,14 @@ pub fn upstream_fuzzy_16() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white + bonus::CAMEL_123.max(fzf.scheme().bonus_boundary_white) ); - assert_eq!(m.matched_ranges().sorted(), [0..4]); + assert_eq!(m.matched_ranges.as_slice(), [0..4]); } pub fn upstream_fuzzy_17() { @@ -279,11 +279,11 @@ pub fn upstream_fuzzy_17() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + 3 * bonus::BOUNDARY ); - assert_eq!(m.matched_ranges().sorted(), [2..6]); + assert_eq!(m.matched_ranges.as_slice(), [2..6]); } pub fn upstream_fuzzy_18() { @@ -317,11 +317,11 @@ pub fn upstream_exact_3() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + bonus::CAMEL_123 + bonus::CONSECUTIVE ); - assert_eq!(m.matched_ranges().sorted(), [2..5]); + assert_eq!(m.matched_ranges.as_slice(), [2..5]); } pub fn upstream_exact_4() { @@ -330,11 +330,11 @@ pub fn upstream_exact_4() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + bonus::CAMEL_123 + 2 * bonus::CONSECUTIVE ); - assert_eq!(m.matched_ranges().sorted(), [9..13]); + assert_eq!(m.matched_ranges.as_slice(), [9..13]); } pub fn upstream_exact_5() { @@ -343,13 +343,13 @@ pub fn upstream_exact_5() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) * fzf.scheme().bonus_boundary_delimiter ); - assert_eq!(m.matched_ranges().sorted(), [6..10]); + assert_eq!(m.matched_ranges.as_slice(), [6..10]); } pub fn upstream_exact_6() { @@ -358,13 +358,13 @@ pub fn upstream_exact_6() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 5 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) * bonus::BOUNDARY + fzf.scheme().bonus_boundary_delimiter ); - assert_eq!(m.matched_ranges().sorted(), [8..13]); + assert_eq!(m.matched_ranges.as_slice(), [8..13]); } pub fn upstream_exact_7() { @@ -372,12 +372,9 @@ pub fn upstream_exact_7() { let m = m.unwrap(); - assert_eq!( - m.distance().into_score(), - 2 * bonus::MATCH + bonus::CONSECUTIVE - ); + assert_eq!(m.distance.into_score(), 2 * bonus::MATCH + bonus::CONSECUTIVE); - assert_eq!(m.matched_ranges().sorted(), [1..3]); + assert_eq!(m.matched_ranges.as_slice(), [1..3]); } pub fn upstream_prefix_1() { @@ -396,13 +393,13 @@ pub fn upstream_prefix_3() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [0..3]); + assert_eq!(m.matched_ranges.as_slice(), [0..3]); } pub fn upstream_prefix_4() { @@ -411,13 +408,13 @@ pub fn upstream_prefix_4() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [0..3]); + assert_eq!(m.matched_ranges.as_slice(), [0..3]); } pub fn upstream_prefix_5() { @@ -426,13 +423,13 @@ pub fn upstream_prefix_5() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [0..3]); + assert_eq!(m.matched_ranges.as_slice(), [0..3]); } pub fn upstream_prefix_6() { @@ -441,13 +438,13 @@ pub fn upstream_prefix_6() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [1..4]); + assert_eq!(m.matched_ranges.as_slice(), [1..4]); } pub fn upstream_prefix_7() { @@ -456,13 +453,13 @@ pub fn upstream_prefix_7() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [0..3]); + assert_eq!(m.matched_ranges.as_slice(), [0..3]); } pub fn upstream_prefix_8() { @@ -486,11 +483,11 @@ pub fn upstream_suffix_3() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + 2 * bonus::CONSECUTIVE ); - assert_eq!(m.matched_ranges().sorted(), [6..9]); + assert_eq!(m.matched_ranges.as_slice(), [6..9]); } pub fn upstream_suffix_4() { @@ -499,12 +496,12 @@ pub fn upstream_suffix_4() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::CAMEL_123 ); - assert_eq!(m.matched_ranges().sorted(), [6..9]); + assert_eq!(m.matched_ranges.as_slice(), [6..9]); } pub fn upstream_suffix_5() { @@ -513,11 +510,11 @@ pub fn upstream_suffix_5() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 3 * bonus::MATCH + 2 * bonus::CONSECUTIVE ); - assert_eq!(m.matched_ranges().sorted(), [6..9]); + assert_eq!(m.matched_ranges.as_slice(), [6..9]); } pub fn upstream_suffix_6() { @@ -526,13 +523,13 @@ pub fn upstream_suffix_6() { let m = m.unwrap(); assert_eq!( - m.distance().into_score(), + m.distance.into_score(), 4 * bonus::MATCH + 2 * bonus::CONSECUTIVE + fzf.scheme().bonus_boundary_white ); - assert_eq!(m.matched_ranges().sorted(), [6..10]); + assert_eq!(m.matched_ranges.as_slice(), [6..10]); } pub use utils::*; @@ -540,8 +537,13 @@ pub use utils::*; mod utils { use core::ops::Range; + pub struct FzfMatch { + pub distance: FzfDistance, + pub matched_ranges: norm::MatchedRanges, + } + use norm::fzf::{FzfDistance, FzfParser, FzfQuery, FzfV1, FzfV2}; - use norm::{CaseSensitivity, Match, Metric}; + use norm::{CaseSensitivity, Metric}; /// TODO: docs pub trait SortedRanges { @@ -624,15 +626,23 @@ mod utils { case_sensitivity: CaseSensitivity, query: &str, candidate: &str, - ) -> (F, Option>) { + ) -> (F, Option) { let mut fzf = F::default(); fzf.with_case_sensitivity(case_sensitivity).with_matched_ranges(true); let mut parser = FzfParser::new(); - let m = fzf.distance(parser.parse(query), candidate); + let mut ranges = norm::MatchedRanges::default(); + + let Some(distance) = fzf.distance_and_ranges( + parser.parse(query), + candidate, + &mut ranges, + ) else { + return (fzf, None); + }; - (fzf, m) + (fzf, Some(FzfMatch { distance, matched_ranges: ranges })) } } diff --git a/tests/fzf_v1.rs b/tests/fzf_v1.rs index ebfde0b..0ed5311 100644 --- a/tests/fzf_v1.rs +++ b/tests/fzf_v1.rs @@ -222,13 +222,15 @@ fn fzf_v1_score_1() { let mut parser = FzfParser::new(); - let mach = fzf + let mut ranges = norm::MatchedRanges::default(); + + let _ = fzf .with_case_sensitivity(CaseSensitivity::Sensitive) .with_matched_ranges(true) - .distance(parser.parse("ZZ"), "ӥZZZ") + .distance_and_ranges(parser.parse("ZZ"), "ӥZZZ", &mut ranges) .unwrap(); - assert_eq!(mach.matched_ranges(), [2..4]); + assert_eq!(ranges.as_slice(), [2..4]); } #[test] @@ -269,17 +271,19 @@ fn fzf_v1_score_4() { let mut parser = FzfParser::new(); + let mut ranges = norm::MatchedRanges::default(); + let query = parser.parse("z\n"); let candidate = "ZZ\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u{65e}\nZ\u{65e}"; - let mach = fzf + let _ = fzf .with_case_sensitivity(CaseSensitivity::Insensitive) .with_matched_ranges(true) - .distance(query, candidate) + .distance_and_ranges(query, candidate, &mut ranges) .unwrap(); - assert_eq!(mach.matched_ranges(), [1..2, 21..22]); + assert_eq!(ranges.as_slice(), [1..2, 21..22]); } #[test] @@ -288,12 +292,18 @@ fn fzf_v1_score_5() { let mut parser = FzfParser::new(); - let mach = fzf + let mut ranges = norm::MatchedRanges::default(); + + let _ = fzf .with_case_sensitivity(CaseSensitivity::Sensitive) .with_matched_ranges(true) .with_normalization(true) - .distance(parser.parse("e !"), " !I\\hh+\u{364}") + .distance_and_ranges( + parser.parse("e !"), + " !I\\hh+\u{364}", + &mut ranges, + ) .unwrap(); - assert_eq!(mach.matched_ranges(), [1..2, 7..9]); + assert_eq!(ranges.as_slice(), [1..2, 7..9]); } diff --git a/tests/fzf_v2.rs b/tests/fzf_v2.rs index 7074eb0..28f2a22 100644 --- a/tests/fzf_v2.rs +++ b/tests/fzf_v2.rs @@ -2,7 +2,6 @@ mod fzf_common; -use common::SortedRanges; use fzf_common as common; use norm::fzf::{bonus, FzfParser, FzfV2}; use norm::{CaseSensitivity, Metric}; @@ -223,21 +222,23 @@ fn fzf_v2_score_1() { let mut parser = FzfParser::new(); - let mach = fzf + let mut ranges = norm::MatchedRanges::default(); + + let distance = fzf .with_case_sensitivity(CaseSensitivity::Sensitive) .with_matched_ranges(true) - .distance(parser.parse("jelly"), "jellyfish") + .distance_and_ranges(parser.parse("jelly"), "jellyfish", &mut ranges) .unwrap(); assert_eq!( - mach.distance().into_score(), + distance.into_score(), bonus::MATCH * 5 + fzf.scheme().bonus_boundary_white * bonus::FIRST_QUERY_CHAR_MULTIPLIER + fzf.scheme().bonus_boundary_white * 4 ); - assert_eq!(mach.matched_ranges().sorted(), [0..5]); + assert_eq!(ranges.as_slice(), [0..5]); } #[test] @@ -246,12 +247,12 @@ fn fzf_v2_score_2() { let mut parser = FzfParser::new(); - let mach = fzf + let distance = fzf .with_case_sensitivity(CaseSensitivity::Sensitive) .with_matched_ranges(true) .distance(parser.parse("!$"), "$$2"); - assert!(mach.is_none()); + assert!(distance.is_none()); } #[test] @@ -260,13 +261,19 @@ fn fzf_v2_score_3() { let mut parser = FzfParser::new(); - let mach = fzf + let mut ranges = norm::MatchedRanges::default(); + + let _ = fzf .with_case_sensitivity(CaseSensitivity::Sensitive) .with_matched_ranges(true) - .distance(parser.parse("\0\0"), "\0#B\0\u{364}\0\0") + .distance_and_ranges( + parser.parse("\0\0"), + "\0#B\0\u{364}\0\0", + &mut ranges, + ) .unwrap(); - assert_eq!(mach.matched_ranges().sorted(), [6..8]); + assert_eq!(ranges.as_slice(), [6..8]); } #[test] @@ -275,14 +282,20 @@ fn fzf_v2_score_4() { let mut parser = FzfParser::new(); - let mach = fzf + let mut ranges = norm::MatchedRanges::default(); + + let _ = fzf .with_case_sensitivity(CaseSensitivity::Sensitive) .with_matched_ranges(true) .with_normalization(true) - .distance(parser.parse("e !"), " !I\\hh+\u{364}") + .distance_and_ranges( + parser.parse("e !"), + " !I\\hh+\u{364}", + &mut ranges, + ) .unwrap(); - assert_eq!(mach.matched_ranges(), [1..2, 7..9]); + assert_eq!(ranges.as_slice(), [1..2, 7..9]); } #[test] @@ -291,14 +304,16 @@ fn fzf_v2_score_5() { let mut parser = FzfParser::new(); - let mach = fzf + let mut ranges = norm::MatchedRanges::default(); + + let _ = fzf .with_case_sensitivity(CaseSensitivity::Insensitive) .with_matched_ranges(true) .with_normalization(true) - .distance(parser.parse("E"), "\u{364}E") + .distance_and_ranges(parser.parse("E"), "\u{364}E", &mut ranges) .unwrap(); - assert_eq!(mach.matched_ranges(), [0..2]); + assert_eq!(ranges.as_slice(), [0..2]); } #[test] @@ -307,13 +322,19 @@ fn fzf_v2_score_6() { let mut parser = FzfParser::new(); + let mut ranges = norm::MatchedRanges::default(); + let query = parser.parse("!2\t\0\0\0WWHHWHWWWWWWWZ !I"); - let mach = fzf + let distance = fzf .with_case_sensitivity(CaseSensitivity::Insensitive) .with_matched_ranges(true) .with_normalization(true) - .distance(query, "\u{6}\0\0 N\u{364}\u{e}\u{365}+"); + .distance_and_ranges( + query, + "\u{6}\0\0 N\u{364}\u{e}\u{365}+", + &mut ranges, + ); - assert!(mach.is_none()); + assert!(distance.is_none()); } From ab3a56b22a9e46e1274c11a54e8ba095374c5372 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 00:14:39 +0100 Subject: [PATCH 15/20] fix docs --- src/algos/fzf/mod.rs | 4 ++-- src/algos/fzf/query.rs | 4 ++-- src/lib.rs | 4 +--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/algos/fzf/mod.rs b/src/algos/fzf/mod.rs index f72109d..d8ed952 100644 --- a/src/algos/fzf/mod.rs +++ b/src/algos/fzf/mod.rs @@ -48,7 +48,7 @@ mod distance; mod fzf; #[cfg(feature = "fzf-v1")] mod fzf_v1; -#[cfg(feature = "fzf-v1")] +#[cfg(feature = "fzf-v2")] mod fzf_v2; mod parser; mod query; @@ -62,7 +62,7 @@ use distance::*; use fzf::*; #[cfg(feature = "fzf-v1")] pub use fzf_v1::FzfV1; -#[cfg(feature = "fzf-v1")] +#[cfg(feature = "fzf-v2")] pub use fzf_v2::FzfV2; pub use parser::*; pub use query::FzfQuery; diff --git a/src/algos/fzf/query.rs b/src/algos/fzf/query.rs index bc5c169..f0aaae0 100644 --- a/src/algos/fzf/query.rs +++ b/src/algos/fzf/query.rs @@ -2,8 +2,8 @@ use core::fmt::Write; /// A parsed fzf query. /// -/// This struct is created by the [`parse`](FzfParser::parse) method on -/// [`FzfParser`]. See its documentation for more. +/// This struct is created by the [`parse`](super::FzfParser::parse) method on +/// [`FzfParser`](super::FzfParser). See its documentation for more. #[derive(Clone, Copy)] pub struct FzfQuery<'a> { pub(super) search_mode: SearchMode<'a>, diff --git a/src/lib.rs b/src/lib.rs index 0daf6c7..24de176 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,9 +36,7 @@ //! let mut results = cities //! .iter() //! .copied() -//! .filter_map(|city| { -//! fzf.distance(query, city).map(|mach| (city, mach.distance())) -//! }) +//! .filter_map(|city| fzf.distance(query, city).map(|dist| (city, dist))) //! .collect::>(); //! //! results.sort_by_key(|(_city, dist)| *dist); From 8a65b6cc44872b1cfe46bd6058b9b6039e19403a Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 00:21:07 +0100 Subject: [PATCH 16/20] fzf: fix typo in tests --- src/algos/fzf/fzf.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/algos/fzf/fzf.rs b/src/algos/fzf/fzf.rs index 273679f..c0ef4e6 100644 --- a/src/algos/fzf/fzf.rs +++ b/src/algos/fzf/fzf.rs @@ -609,7 +609,7 @@ mod tests { assert!(exact_match::( pattern, candidate("ABC"), - utils::char_eq(false, false), + utils::char_eq(true, false), &Scheme::default(), &mut ranges_buf ) @@ -621,7 +621,7 @@ mod tests { assert!(exact_match::( pattern, candidate("AbC"), - utils::char_eq(false, false), + utils::char_eq(true, false), &Scheme::default(), &mut ranges_buf ) @@ -636,7 +636,7 @@ mod tests { assert!(exact_match::( pattern, candidate("AbC "), - utils::char_eq(false, false), + utils::char_eq(true, false), &Scheme::default(), &mut ranges_buf ) @@ -651,7 +651,7 @@ mod tests { assert!(exact_match::( pattern, candidate(" AbC "), - utils::char_eq(false, false), + utils::char_eq(true, false), &Scheme::default(), &mut ranges_buf ) @@ -666,7 +666,7 @@ mod tests { assert!(exact_match::( pattern, candidate(" AbC"), - utils::char_eq(false, false), + utils::char_eq(true, false), &Scheme::default(), &mut ranges_buf ) @@ -685,7 +685,7 @@ mod tests { assert!(exact_match::( pattern, candidate("aabbcc abc"), - utils::char_eq(false, false), + utils::char_eq(true, false), &Scheme::default(), &mut ranges_buf ) From b7b1a266dc6876b8b301ec5db305151c52224b35 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 11:23:02 +0100 Subject: [PATCH 17/20] fzf-v1: the new output of `start_backward` doesn't have to be modified --- src/algos/fzf/fzf_v1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index 923edc6..cec1fa8 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -153,7 +153,7 @@ impl Fzf for FzfV1 { let start_backward = backward_pass(pattern, candidate, range_forward.end, opts); - let range = range_forward.start + start_backward..range_forward.end; + let range = start_backward..range_forward.end; let score = compute_score::( pattern, From 8fe3bde9afee7b0109f93fc142dd32d77f7a88d1 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 11:23:13 +0100 Subject: [PATCH 18/20] impl `Debug` for `Candidate` --- src/candidate.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/candidate.rs b/src/candidate.rs index 21eb09c..a02c6e4 100644 --- a/src/candidate.rs +++ b/src/candidate.rs @@ -9,6 +9,21 @@ pub(crate) enum Candidate<'a> { Unicode(&'a [char]), } +impl core::fmt::Debug for Candidate<'_> { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Candidate::Ascii(slice) => { + core::str::from_utf8(slice).unwrap().fmt(f) + }, + + Candidate::Unicode(slice) => { + slice.iter().collect::().fmt(f) + }, + } + } +} + impl<'a> Candidate<'a> { /// TODO: docs #[inline(always)] From 73654261e1b83236713b237e92805aaa3c304c02 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 11:26:04 +0100 Subject: [PATCH 19/20] fzf-v1: only return the end offset from `forward_pass` --- src/algos/fzf/fzf_v1.rs | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index cec1fa8..32f350d 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -1,5 +1,3 @@ -use core::ops::Range; - use super::{query::*, *}; use crate::*; @@ -148,17 +146,15 @@ impl Fzf for FzfV1 { let opts = CandidateOpts::new(is_sensitive, self.normalization); - let range_forward = forward_pass(pattern, candidate, opts)?; + let end_forward = forward_pass(pattern, candidate, opts)?; let start_backward = - backward_pass(pattern, candidate, range_forward.end, opts); - - let range = start_backward..range_forward.end; + backward_pass(pattern, candidate, end_forward, opts); let score = compute_score::( pattern, candidate, - range, + start_backward..end_forward, opts.char_eq, &self.scheme, ranges, @@ -174,25 +170,12 @@ fn forward_pass( pattern: Pattern, candidate: Candidate, opts: CandidateOpts, -) -> Option> { +) -> Option { let mut pattern_chars = pattern.chars(); let mut pattern_char = pattern_chars.next()?; - let start_offset = candidate.find_first_from( - 0, - pattern_char, - opts.is_case_sensitive, - opts.char_eq, - )?; - - let mut end_offset = start_offset + 1; - - if let Some(next) = pattern_chars.next() { - pattern_char = next; - } else { - return Some(start_offset..end_offset); - } + let mut end_offset = 0; loop { end_offset = candidate.find_first_from( @@ -205,7 +188,7 @@ fn forward_pass( if let Some(next) = pattern_chars.next() { pattern_char = next; } else { - return Some(start_offset..end_offset); + return Some(end_offset); } } } From 92d6f2f2892e4cf1d673d7f3fa0adcc76c6a69b7 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 24 Nov 2023 11:41:13 +0100 Subject: [PATCH 20/20] fzf: update fuzz target to new API --- fuzz/fuzz_targets/fzf.rs | 22 ++++++++++------------ src/algos/fzf/fzf_v1.rs | 1 + src/algos/fzf/fzf_v2.rs | 1 + src/matched_ranges.rs | 6 ++++++ src/tiny_vec.rs | 31 +++++++++++++++++++++++++------ 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/fuzz/fuzz_targets/fzf.rs b/fuzz/fuzz_targets/fzf.rs index cc4c2aa..677c9b7 100644 --- a/fuzz/fuzz_targets/fzf.rs +++ b/fuzz/fuzz_targets/fzf.rs @@ -41,29 +41,27 @@ fuzz_target!(|data: (Query, Candidate)| { let mut fzf_v2 = FzfV2::new(); + let mut ranges = norm::MatchedRanges::default(); + with_opts(|case_sensitivity, normalization, scheme| { - let mach = fzf_v1 + let _ = fzf_v1 .with_case_sensitivity(case_sensitivity) .with_normalization(normalization) .with_scoring_scheme(scheme) - .distance(query, candidate); + .distance_and_ranges(query, candidate, &mut ranges); - if let Some(mach) = mach { - for range in mach.matched_ranges() { - let _s = &candidate[range.clone()]; - } + for range in ranges.as_slice() { + let _ = &candidate[range.clone()]; } - let mach = fzf_v2 + let _ = fzf_v2 .with_case_sensitivity(case_sensitivity) .with_normalization(normalization) .with_scoring_scheme(scheme) - .distance(query, candidate); + .distance_and_ranges(query, candidate, &mut ranges); - if let Some(mach) = mach { - for range in mach.matched_ranges() { - let _s = &candidate[range.clone()]; - } + for range in ranges.as_slice() { + let _ = &candidate[range.clone()]; } }); }); diff --git a/src/algos/fzf/fzf_v1.rs b/src/algos/fzf/fzf_v1.rs index 32f350d..5372a5d 100644 --- a/src/algos/fzf/fzf_v1.rs +++ b/src/algos/fzf/fzf_v1.rs @@ -100,6 +100,7 @@ impl Metric for FzfV1 { candidate: &str, ranges: &mut MatchedRanges, ) -> Option { + ranges.clear(); ::distance::(self, query, candidate, ranges) } } diff --git a/src/algos/fzf/fzf_v2.rs b/src/algos/fzf/fzf_v2.rs index dfb50eb..e4f7219 100644 --- a/src/algos/fzf/fzf_v2.rs +++ b/src/algos/fzf/fzf_v2.rs @@ -103,6 +103,7 @@ impl Metric for FzfV2 { candidate: &str, ranges: &mut MatchedRanges, ) -> Option { + ranges.clear(); ::distance::(self, query, candidate, ranges) } } diff --git a/src/matched_ranges.rs b/src/matched_ranges.rs index e6dce61..5f5522c 100644 --- a/src/matched_ranges.rs +++ b/src/matched_ranges.rs @@ -21,6 +21,12 @@ impl MatchedRanges { self.ranges.as_slice() } + /// TODO: docs + #[inline(always)] + pub fn clear(&mut self) { + self.ranges.clear() + } + /// TODO: docs #[inline(always)] pub(crate) fn insert(&mut self, new_range: Range) { diff --git a/src/tiny_vec.rs b/src/tiny_vec.rs index a18d86b..bffb126 100644 --- a/src/tiny_vec.rs +++ b/src/tiny_vec.rs @@ -46,7 +46,7 @@ impl> IndexMut { #[inline(always)] fn index_mut(&mut self, index: I) -> &mut Self::Output { - IndexMut::index_mut(self.as_slice_mut(), index) + IndexMut::index_mut(self.as_mut_slice(), index) } } @@ -60,9 +60,9 @@ impl TinyVec { } #[inline(always)] - pub fn as_slice_mut(&mut self) -> &mut [T] { + pub fn as_mut_slice(&mut self) -> &mut [T] { match &mut self.inner { - TinyVecType::Inline(inner) => inner.as_slice_mut(), + TinyVecType::Inline(inner) => inner.as_mut_slice(), TinyVecType::Heap(inner) => inner.as_mut_slice(), } } @@ -75,9 +75,17 @@ impl TinyVec { self.as_slice().binary_search_by(f) } + #[inline(always)] + pub fn clear(&mut self) { + match &mut self.inner { + TinyVecType::Inline(inner) => inner.clear(), + TinyVecType::Heap(inner) => inner.clear(), + } + } + #[inline(always)] pub fn get_mut(&mut self, idx: usize) -> Option<&mut T> { - self.as_slice_mut().get_mut(idx) + self.as_mut_slice().get_mut(idx) } #[inline(always)] @@ -128,7 +136,7 @@ impl TinyVec { pub fn split_at_mut(&mut self, mid: usize) -> (&mut [T], &mut [T]) { match &mut self.inner { TinyVecType::Inline(inner) => { - inner.as_slice_mut().split_at_mut(mid) + inner.as_mut_slice().split_at_mut(mid) }, TinyVecType::Heap(inner) => inner.split_at_mut(mid), } @@ -182,11 +190,22 @@ impl InlineVec { } #[inline(always)] - fn as_slice_mut(&mut self) -> &mut [T] { + fn as_mut_slice(&mut self) -> &mut [T] { // SAFETY: same as `as_slice`. unsafe { transmute(&mut self.data[..self.len]) } } + #[inline(always)] + fn clear(&mut self) { + let elems: *mut [T] = self.as_mut_slice(); + + // SAFETY: copied from `Vec::clear`. + unsafe { + self.len = 0; + ptr::drop_in_place(elems); + } + } + #[inline(always)] fn insert(&mut self, offset: usize, child: T) { assert!(offset <= self.len());