Skip to content

Commit

Permalink
test: Add memsize test for first 100 words.
Browse files Browse the repository at this point in the history
Right now the raw words are smaller than our trie, which surprised me.
Perhaps carrying around values in the label is increasing our size.
Something to try and suss out.
  • Loading branch information
shanecelis committed Apr 22, 2024
1 parent 3c7247d commit aabf0b3
Showing 1 changed file with 38 additions and 0 deletions.
38 changes: 38 additions & 0 deletions src/trie/trie_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,44 @@ mod search_tests {
let _ = trie.common_prefix_search::<String, _>("").next();
}


#[cfg(feature = "mem_dbg")]
#[test]
/// ```sh
/// cargo test --features mem_dbg memsize -- --nocapture
/// ```
fn memsize() {
use std::{env, io::{BufReader, BufRead}, fs::{File}};
use mem_dbg::*;

const COUNT: usize = 100;
let mut builder = TrieBuilder::new();

let repo_root = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR environment variable must be set.");
let edict2_path = format!("{}/benches/edict.furigana", repo_root);
println!("Reading dictionary file from: {}", edict2_path);

let mut n_words = 0;
let mut accum = 0;
for result in BufReader::new(File::open(edict2_path).unwrap()).lines().take(COUNT) {
let l = result.unwrap();
accum += l.len();
builder.push(l);
n_words += 1;
}
println!("Read {} words, {} bytes.", n_words, accum);

let trie = builder.build();
let trie_size = trie.mem_size(SizeFlags::default());
eprintln!("Trie size {trie_size}");
let uncompressed: Vec<String> = trie.iter().collect();
let uncompressed_size = uncompressed.mem_size(SizeFlags::default());
eprintln!("Uncompressed size {}", uncompressed_size);
assert!(accum < trie_size); // This seems wrong to me.
assert!(trie_size < uncompressed_size);

}

mod exact_match_tests {
macro_rules! parameterized_tests {
($($name:ident: $value:expr,)*) => {
Expand Down

0 comments on commit aabf0b3

Please sign in to comment.