Skip to content

Commit

Permalink
Merge pull request #27 from chris-ha458/fixes
Browse files Browse the repository at this point in the history
Small Fixes
  • Loading branch information
nickspring authored Oct 3, 2023
2 parents abfd5a9 + 99c122d commit 42d71ff
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
5 changes: 3 additions & 2 deletions src/assets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use ahash::HashMap;
use lazy_static::lazy_static;

lazy_static! {
pub static ref LANGUAGES: Vec<(Language, &'static str, bool, bool)> = vec![
pub static ref LANGUAGE_SUPPORTED_COUNT: usize = 41;
pub static ref LANGUAGES: [(Language, &'static str, bool, bool);41] = [
// language, alphabet, have_accents, pure_latin
(Language::English, "eationsrhldcmufpgwbyvkjxzq", false, true, ),
(Language::English, "eationsrhldcumfpgwybvkxjzq", false, true, ),
Expand Down Expand Up @@ -48,7 +49,7 @@ lazy_static! {
(Language::Kazakh, "аыентрлідсмқкобиуғжңзшйпгө", false, false, ),
];

pub static ref LANGUAGE_SUPPORTED_COUNT: usize = LANGUAGES.len();


// direct binding encoding to language
pub(crate) static ref ENCODING_TO_LANGUAGE: HashMap<&'static str, Language> = HashMap::from_iter([
Expand Down
13 changes: 6 additions & 7 deletions src/cd.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#![allow(unused_variables)]
use crate::assets::{ENCODING_TO_LANGUAGE, LANGUAGES};
use crate::assets::{ENCODING_TO_LANGUAGE, LANGUAGES, LANGUAGE_SUPPORTED_COUNT};
use crate::consts::TOO_SMALL_SEQUENCE;
use crate::entity::{CoherenceMatch, CoherenceMatches, Language};
use crate::utils::{
Expand Down Expand Up @@ -93,22 +93,21 @@ pub(crate) fn alphabet_languages(
characters: &[char],
ignore_non_latin: bool,
) -> Vec<&'static Language> {
let mut languages: Vec<(&Language, f32)> = vec![];
let source_characters_set: HashSet<_> = characters.iter().copied().collect();
let mut languages: Vec<(&Language, f32)> = Vec::with_capacity(*LANGUAGE_SUPPORTED_COUNT);
let source_characters_set: HashSet<char> = characters.iter().copied().collect();
let source_has_accents = source_characters_set
.iter()
.any(|&char| is_accentuated(char));

for (language, language_characters, target_have_accents, target_pure_latin) in LANGUAGES.iter()
{
if (ignore_non_latin && !*target_pure_latin)
|| (!*target_have_accents && source_has_accents)
if (ignore_non_latin && !target_pure_latin) || (!target_have_accents && source_has_accents)
{
continue;
}

let language_characters_set: HashSet<_> = language_characters.chars().collect();
let intersection: HashSet<_> = language_characters_set
let language_characters_set: HashSet<char> = language_characters.chars().collect();
let intersection: HashSet<char> = language_characters_set
.intersection(&source_characters_set)
.copied()
.collect();
Expand Down
2 changes: 1 addition & 1 deletion src/entity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl CharsetMatch {
IANA_SUPPORTED_ALIASES
.get(self.encoding.as_str())
.cloned()
.unwrap_or_default()
.expect("Problem with static HashMap IANA_SUPPORTED_ALIASES")
}
// byte_order_mark
pub fn bom(&self) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ pub fn get_large_test_datasets() -> Result<Vec<(String, Vec<String>)>, String> {
.split(',')
.map(|s| s.to_string())
.collect();
if encoding.len() == 1 && encoding.first()? == "largesets" {
if encoding.as_slice() == ["largesets"] {
return None; // None is ignored by filter_map
}
Some((path.to_string(), encoding)) // Return the tuple for the 'result'. unpacked by filter_map
Expand Down

0 comments on commit 42d71ff

Please sign in to comment.