Skip to content

Commit

Permalink
Merge pull request #26 from chris-ha458/trivially_copy_pass_by_ref
Browse files Browse the repository at this point in the history
Trivially copy pass by ref
  • Loading branch information
nickspring authored Oct 2, 2023
2 parents 1e2e6b5 + bf027ea commit abfd5a9
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 100 deletions.
18 changes: 10 additions & 8 deletions src/cd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub(crate) fn encoding_unicode_range(iana_name: &str) -> Result<Vec<&str>, Strin
.decode(&[i], DecoderTrap::Ignore)
.ok()
.and_then(|chunk| chunk.chars().next())
.and_then(|first_char| unicode_range(&first_char))
.and_then(unicode_range)
.filter(|&range| !is_unicode_range_secondary(range))
{
*result.entry(range).or_insert(0) += 1;
Expand All @@ -58,7 +58,7 @@ pub(crate) fn unicode_range_languages(primary_range: &str) -> Vec<&'static Langu
.filter_map(|(language, characters, _, _)| {
characters
.chars()
.find(|char| unicode_range(char).unwrap_or_default() == primary_range)
.find(|char| unicode_range(*char).unwrap_or_default() == primary_range)
.map(|_| language)
})
.collect::<Vec<&Language>>()
Expand Down Expand Up @@ -90,12 +90,14 @@ pub(crate) fn mb_encoding_languages(iana_name: &str) -> Vec<&'static Language> {
// Return associated languages associated to given characters
#[allow(clippy::ptr_arg)]
pub(crate) fn alphabet_languages(
characters: &Vec<&char>,
characters: &[char],
ignore_non_latin: bool,
) -> Vec<&'static Language> {
let mut languages: Vec<(&Language, f32)> = vec![];
let source_characters_set: HashSet<char> = characters.iter().copied().copied().collect(); //take a look why copied/cloned is needed twice
let source_has_accents = source_characters_set.iter().any(is_accentuated);
let source_characters_set: HashSet<_> = characters.iter().copied().collect();
let source_has_accents = source_characters_set
.iter()
.any(|&char| is_accentuated(char));

for (language, language_characters, target_have_accents, target_pure_latin) in LANGUAGES.iter()
{
Expand Down Expand Up @@ -128,7 +130,7 @@ pub(crate) fn alpha_unicode_split(decoded_sequence: &str) -> Vec<String> {
let mut layers: HashMap<&str, String> = HashMap::new();

for ch in decoded_sequence.chars().filter(|c| c.is_alphabetic()) {
if let Some(character_range) = unicode_range(&ch) {
if let Some(character_range) = unicode_range(ch) {
let layer_key: &str = layers
.keys()
.find(|key| !is_suspiciously_successive_range(Some(key), Some(character_range)))
Expand Down Expand Up @@ -213,7 +215,7 @@ pub(crate) fn coherence_ratio(
continue;
}
let most_common = layer.chars().collect::<Counter<_>>().most_common_ordered();
let popular_character_ordered: Vec<&char> = most_common.iter().map(|(ch, _)| ch).collect();
let popular_character_ordered: Vec<char> = most_common.iter().map(|(ch, _)| *ch).collect();

let languages = if include_languages.is_empty() {
alphabet_languages(&popular_character_ordered, ignore_non_latin)
Expand All @@ -222,7 +224,7 @@ pub(crate) fn coherence_ratio(
};

let popular_character_ordered_as_string: String =
popular_character_ordered.iter().copied().collect();
popular_character_ordered.iter().collect();

// Convert the String into a &str
for language in languages {
Expand Down
60 changes: 30 additions & 30 deletions src/md.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ trait MessDetectorPlugin {

// The main routine to be executed upon character.
// Insert the logic in witch the text would be considered chaotic.
fn feed(&mut self, character: &char);
fn feed(&mut self, character: char);

// Compute the chaos ratio based on what your feed() has seen.
// Must NOT be lower than 0.; No restriction gt 0.
Expand All @@ -47,20 +47,20 @@ struct TooManySymbolOrPunctuationPlugin {

impl MessDetectorPlugin for TooManySymbolOrPunctuationPlugin {
fn eligible(&self, character: char) -> bool {
!is_unprintable(&character)
!is_unprintable(character)
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
self.character_count += 1;
if (self.last_printable_char.is_none() || *character != self.last_printable_char.unwrap())
&& !COMMON_SAFE_ASCII_CHARACTERS.contains(*character)
if (self.last_printable_char.is_none() || character != self.last_printable_char.unwrap())
&& !COMMON_SAFE_ASCII_CHARACTERS.contains(character)
{
if is_punctuation(character) {
self.punctuation_count += 1;
} else if !character.is_numeric() && is_symbol(character) && !is_emoticon(character) {
self.symbol_count += 2;
}
}
self.last_printable_char = Some(*character);
self.last_printable_char = Some(character);
}
fn ratio(&self) -> f32 {
if self.character_count == 0 {
Expand Down Expand Up @@ -90,7 +90,7 @@ impl MessDetectorPlugin for TooManyAccentuatedPlugin {
fn eligible(&self, character: char) -> bool {
character.is_alphabetic()
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
self.character_count += 1;
if is_accentuated(character) {
self.accentuated_count += 1
Expand Down Expand Up @@ -118,7 +118,7 @@ impl MessDetectorPlugin for UnprintablePlugin {
fn eligible(&self, character: char) -> bool {
true
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
if is_unprintable(character) {
self.unprintable_count += 1;
}
Expand All @@ -144,24 +144,24 @@ struct SuspiciousDuplicateAccentPlugin {

impl MessDetectorPlugin for SuspiciousDuplicateAccentPlugin {
fn eligible(&self, character: char) -> bool {
character.is_alphabetic() && is_latin(&character)
character.is_alphabetic() && is_latin(character)
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
self.character_count += 1;
if self.last_latin_character.is_some()
&& is_accentuated(character)
&& is_accentuated(&self.last_latin_character.unwrap())
&& is_accentuated(self.last_latin_character.unwrap())
{
if character.is_uppercase() && self.last_latin_character.unwrap().is_uppercase() {
self.successive_count += 1;
}

// Worse if its the same char duplicated with different accent.
if remove_accent(character) == remove_accent(&self.last_latin_character.unwrap()) {
if remove_accent(character) == remove_accent(self.last_latin_character.unwrap()) {
self.successive_count += 1;
}
}
self.last_latin_character = Some(*character);
self.last_latin_character = Some(character);
}
fn ratio(&self) -> f32 {
if self.character_count == 0 {
Expand All @@ -183,33 +183,33 @@ struct SuspiciousRangePlugin {

impl MessDetectorPlugin for SuspiciousRangePlugin {
fn eligible(&self, character: char) -> bool {
!is_unprintable(&character)
!is_unprintable(character)
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
self.character_count += 1;

if character.is_whitespace()
|| is_punctuation(character)
|| COMMON_SAFE_ASCII_CHARACTERS.contains(*character)
|| COMMON_SAFE_ASCII_CHARACTERS.contains(character)
{
self.last_printable_char = None;
return;
}

if self.last_printable_char.is_none() {
self.last_printable_char = Some(*character);
self.last_printable_char = Some(character);
return;
}

let tmp_a = &self.last_printable_char.unwrap();
let tmp_a = self.last_printable_char.unwrap();
let unicode_range_a = unicode_range(tmp_a);
let unicode_range_b = unicode_range(character);

if is_suspiciously_successive_range(unicode_range_a, unicode_range_b) {
self.suspicious_successive_range_count += 1;
}

self.last_printable_char = Some(*character);
self.last_printable_char = Some(character);
}
fn ratio(&self) -> f32 {
(self.character_count > 0)
Expand Down Expand Up @@ -240,12 +240,12 @@ struct SuperWeirdWordPlugin {
}

impl MessDetectorPlugin for SuperWeirdWordPlugin {
fn eligible(&self, _: char) -> bool {
fn eligible(&self, character: char) -> bool {
true
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
if character.is_ascii_alphabetic() {
self.buffer.push(*character);
self.buffer.push(character);
if is_accentuated(character) {
self.buffer_accent_count += 1;
}
Expand Down Expand Up @@ -278,7 +278,7 @@ impl MessDetectorPlugin for SuperWeirdWordPlugin {
// Word/Buffer ending with an upper case accentuated letter are so rare,
// that we will consider them all as suspicious. Same weight as foreign_long suspicious.
let last_char = self.buffer.chars().last().unwrap();
if is_accentuated(&last_char) && last_char.is_uppercase() {
if is_accentuated(last_char) && last_char.is_uppercase() {
self.foreign_long_count += 1;
self.is_current_word_bad = true;
}
Expand Down Expand Up @@ -306,12 +306,12 @@ impl MessDetectorPlugin for SuperWeirdWordPlugin {
self.foreign_long_watch = false;
self.buffer.clear();
self.buffer_accent_count = 0;
} else if !"<>-=~|_".contains(*character)
} else if !"<>-=~|_".contains(character)
&& !character.is_ascii_digit()
&& is_symbol(character)
{
self.is_current_word_bad = true;
self.buffer.push(*character);
self.buffer.push(character);
}
}
fn ratio(&self) -> f32 {
Expand All @@ -337,8 +337,8 @@ impl MessDetectorPlugin for CjkInvalidStopPlugin {
fn eligible(&self, _: char) -> bool {
true
}
fn feed(&mut self, character: &char) {
if "丅丄".contains(*character) {
fn feed(&mut self, character: char) {
if "丅丄".contains(character) {
self.wrong_stop_count += 1;
return;
}
Expand Down Expand Up @@ -386,7 +386,7 @@ impl MessDetectorPlugin for ArchaicUpperLowerPlugin {
fn eligible(&self, _: char) -> bool {
true
}
fn feed(&mut self, character: &char) {
fn feed(&mut self, character: char) {
if !(character.is_alphabetic() && is_case_variable(character))
&& self.character_count_since_last_sep > 0
{
Expand Down Expand Up @@ -426,7 +426,7 @@ impl MessDetectorPlugin for ArchaicUpperLowerPlugin {

self.character_count += 1;
self.character_count_since_last_sep += 1;
self.last_alpha_seen = Some(*character);
self.last_alpha_seen = Some(character);
}
fn ratio(&self) -> f32 {
if self.character_count == 0 {
Expand Down Expand Up @@ -470,7 +470,7 @@ pub(crate) fn mess_ratio(
detectors
.iter_mut()
.filter(|detector| detector.eligible(ch))
.for_each(|detector| detector.feed(&ch));
.for_each(|detector| detector.feed(ch));

if (index > 0 && index.rem_euclid(intermediary_mean_mess_ratio_calc) == 0)
|| index == length
Expand Down
5 changes: 2 additions & 3 deletions src/tests/cd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ fn test_alphabet_languages() {
("Ailem ve Ben Adım Ece ve on iki yaşındayım. Her sabah 7'de uyanırım, kahvaltımı yaparım ve okula giderim. Boş zamanlarımda bahçede kitap okumayı severim. Küçük bir erkek kardeşim var. Kardeşim üç yaşında ve resim yapmayı sever. Evde her gün top oynar ve şarkı söyler. Kardeşim ve ben makarna yemeyi severiz. Bazen mutfakta yemekleri biz hazırlarız.", false, Some(Language::Turkish)),
];
for (input, ignore_non_latin, expected) in tests {
let characters = input.chars().collect::<Vec<char>>();
let characters_2 = characters.iter().collect();
let languages = alphabet_languages(&characters_2, ignore_non_latin);
let characters: Vec<char> = input.chars().collect();
let languages = alphabet_languages(&characters, ignore_non_latin);
if expected.is_none() {
assert_eq!(languages.len(), 0);
} else {
Expand Down
Loading

0 comments on commit abfd5a9

Please sign in to comment.