Merge branch 'main' into multiset

egraphs-good · Oct 25, 2024 · 0042f72 · 0042f72
2 parents f090848 + b9f4c58
commit 0042f72
Show file tree

Hide file tree

Showing 10 changed files with 1,860 additions and 283 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -26,7 +26,7 @@ bin = ["dep:clap", "dep:env_logger", "egraph-serialize/serde", "dep:serde_json"]
 wasm-bindgen = ["instant/wasm-bindgen", "dep:getrandom"]
 
 [dependencies]
-hashbrown = { version = "0.14", features = ["raw"] }
+hashbrown = { version = "0.15" }
 indexmap = "2.0"
 instant = "0.1"
 log = "0.4"

diff --git a/README.md b/README.md
@@ -57,6 +57,22 @@ for the REPL.
 
 To run the tests use `make test`.
 
+## Benchmarks
+
+We run all of our "examples" [as benchmarks in codspeed](https://codspeed.io/egraphs-good/egglog). These are in CI
+for every commit in main and for all PRs. It will run the examples with extra instrumentation added so that it can
+capture a single trace of the CPU interactions ([src](https://docs.codspeed.io/features/understanding-the-metrics/)):
+
+> CodSpeed instruments your benchmarks to measure the performance of your code. A benchmark will be run only once and the CPU behavior will be simulated. This ensures that the measurement is as accurate as possible, taking into account not only the instructions executed but also the cache and memory access patterns. The simulation gives us an equivalent of the CPU cycles that includes cache and memory access.
+
+Since many of the shorter running benchmarks have unstable timings due to non deterministic performance ([like in the memory allocator](https://github.com/oxc-project/backlog/issues/89)),
+we ["ignore"](https://docs.codspeed.io/features/ignoring-benchmarks/) them in codspeed. That way, we still
+capture their performance, but their timings don't show up in our reports by default.
+
+We use 50ms as our cutoff currently, any benchmarks shorter than that are ignored. This number was selected to try to ignore
+any benchmarks with have changes > 1% when they haven't been modified. Note that all the ignoring is done manually,
+so if you add another example that's short, an admin on the codspeed project will need to manually ignore it.
+
 # Documentation
 
 To view documentation, run `cargo doc --open`.

diff --git a/benches/example_benchmarks.rs b/benches/example_benchmarks.rs
@@ -11,8 +11,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {
     for entry in glob::glob("tests/**/*.egg").unwrap() {
         let path = entry.unwrap().clone();
         let path_string = path.to_string_lossy().to_string();
-        // Skip python_array_optimize since it is too slow and doesn't even reflect the current python implementation
-        if path_string.contains("fail-typecheck") || path_string.contains("python_array_optimize") {
+        if path_string.contains("fail-typecheck") {
             continue;
         }
         let name = path.file_stem().unwrap().to_string_lossy().to_string();

diff --git a/src/ast/mod.rs b/src/ast/mod.rs
@@ -78,12 +78,18 @@ pub(crate) enum Ruleset {
 pub const DEFAULT_FILENAME: &str = "<unnamed.egg>";
 pub const DUMMY_FILENAME: &str = "<internal.egg>";
 
-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, PartialEq, Eq, Hash)]
 pub struct SrcFile {
     pub name: String,
     pub contents: Option<String>,
 }
 
+impl Debug for SrcFile {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "SrcFile({}, contents=...)", self.name)
+    }
+}
+
 #[derive(Clone, Copy)]
 pub struct Location {
     pub line: usize,

diff --git a/src/function/table.rs b/src/function/table.rs
@@ -32,7 +32,7 @@ use std::{
     ops::Range,
 };
 
-use hashbrown::raw::RawTable;
+use hashbrown::HashTable;
 
 use super::binary_search::binary_search_table_by_key;
 use crate::{util::BuildHasher as BH, TupleOutput, Value, ValueVec};
@@ -51,11 +51,11 @@ struct TableOffset {
 pub(crate) struct Table {
     max_ts: u32,
     n_stale: usize,
-    table: RawTable<TableOffset>,
+    table: HashTable<TableOffset>,
     pub(crate) vals: Vec<(Input, TupleOutput)>,
 }
 
-/// Used for the RawTable probe sequence.
+/// Used for the HashTable probe sequence.
 macro_rules! search_for {
     ($slf:expr, $hash:expr, $inp:expr) => {
         |to| {
@@ -97,19 +97,18 @@ impl Table {
 
     /// Rehashes the table, invalidating any offsets stored into the table.
     pub(crate) fn rehash(&mut self) {
-        let mut src = 0usize;
         let mut dst = 0usize;
         self.table.clear();
         self.vals.retain(|(inp, _)| {
             if inp.live() {
                 let hash = hash_values(inp.data());
+                let to = TableOffset { hash, off: dst };
                 self.table
-                    .insert(hash, TableOffset { hash, off: dst }, |to| to.hash);
-                src += 1;
+                    .entry(hash, |to2| to2 == &to, |to2| to2.hash)
+                    .insert(to);
                 dst += 1;
                 true
             } else {
-                src += 1;
                 false
             }
         });
@@ -120,16 +119,16 @@ impl Table {
     /// table.
     pub(crate) fn get(&self, inputs: &[Value]) -> Option<&TupleOutput> {
         let hash = hash_values(inputs);
-        let TableOffset { off, .. } = self.table.get(hash, search_for!(self, hash, inputs))?;
-        debug_assert!(self.vals[*off].0.live());
-        Some(&self.vals[*off].1)
+        let &TableOffset { off, .. } = self.table.find(hash, search_for!(self, hash, inputs))?;
+        debug_assert!(self.vals[off].0.live());
+        Some(&self.vals[off].1)
     }
 
     pub(crate) fn get_mut(&mut self, inputs: &[Value]) -> Option<&mut TupleOutput> {
         let hash: u64 = hash_values(inputs);
-        let TableOffset { off, .. } = self.table.get(hash, search_for!(self, hash, inputs))?;
-        debug_assert!(self.vals[*off].0.live());
-        Some(&mut self.vals[*off].1)
+        let &TableOffset { off, .. } = self.table.find(hash, search_for!(self, hash, inputs))?;
+        debug_assert!(self.vals[off].0.live());
+        Some(&mut self.vals[off].1)
     }
 
     /// Insert the given data into the table at the given timestamp. Return the
@@ -161,7 +160,7 @@ impl Table {
         self.max_ts = ts;
         let hash = hash_values(inputs);
         if let Some(TableOffset { off, .. }) =
-            self.table.get_mut(hash, search_for!(self, hash, inputs))
+            self.table.find_mut(hash, search_for!(self, hash, inputs))
         {
             let (inp, prev) = &mut self.vals[*off];
             let prev_subsumed = prev.subsumed;
@@ -193,14 +192,13 @@ impl Table {
                 subsumed,
             },
         ));
-        self.table.insert(
+        let to = TableOffset {
             hash,
-            TableOffset {
-                hash,
-                off: new_offset,
-            },
-            |off| off.hash,
-        );
+            off: new_offset,
+        };
+        self.table
+            .entry(hash, |to2| to2 == &to, |to2| to2.hash)
+            .insert(to);
     }
 
     /// One more than the maximum (potentially) valid offset into the table.
@@ -237,13 +235,11 @@ impl Table {
     /// removed.
     pub(crate) fn remove(&mut self, inp: &[Value], ts: u32) -> bool {
         let hash = hash_values(inp);
-        let entry = if let Some(entry) = self.table.remove_entry(hash, search_for!(self, hash, inp))
-        {
-            entry
-        } else {
+        let Ok(entry) = self.table.find_entry(hash, search_for!(self, hash, inp)) else {
             return false;
         };
-        self.vals[entry.off].0.stale_at = ts;
+        let (TableOffset { off, .. }, _) = entry.remove();
+        self.vals[off].0.stale_at = ts;
         self.n_stale += 1;
         true
     }

diff --git a/src/lib.rs b/src/lib.rs
@@ -1467,6 +1467,11 @@ impl EGraph {
         self.type_info.sorts.get(&value.tag)
     }
 
+    /// Returns a sort based on the type
+    pub fn get_sort<S: Sort + Send + Sync>(&self) -> Option<Arc<S>> {
+        self.type_info.get_sort_by(|_| true)
+    }
+
     /// Returns the first sort that satisfies the type and predicate if there's one.
     /// Otherwise returns none.
     pub fn get_sort_by<S: Sort + Send + Sync>(

diff --git a/src/termdag.rs b/src/termdag.rs
@@ -1,6 +1,6 @@
 use crate::{
     ast::Literal,
-    util::{HashMap, HashSet},
+    util::{HashMap, HashSet, IndexSet},
     Expr, GenericExpr, Symbol,
 };
 
@@ -21,14 +21,8 @@ pub enum Term {
 /// A hashconsing arena for [`Term`]s.
 #[derive(Clone, PartialEq, Eq, Debug, Default)]
 pub struct TermDag {
-    // think of nodes as a map from indices to Terms.
-    // invariant: the nodes map and the hashcons map are inverses.
-    // note that this implies:
-    // - no duplicates in nodes
-    // - every element of node is a key in hashcons
-    // - every key of hashcons is in nodes
-    pub nodes: Vec<Term>,
-    pub hashcons: HashMap<Term, TermId>,
+    /// A bidirectional map between deduplicated `Term`s and indices.
+    nodes: IndexSet<Term>,
 }
 
 #[macro_export]
@@ -54,14 +48,14 @@ impl TermDag {
     ///
     /// Panics if the term does not already exist in this [TermDag].
     pub fn lookup(&self, node: &Term) -> TermId {
-        *self.hashcons.get(node).unwrap()
+        self.nodes.get_index_of(node).unwrap()
     }
 
     /// Convert the given id to the corresponding term.
     ///
     /// Panics if the id is not valid.
-    pub fn get(&self, id: TermId) -> Term {
-        self.nodes[id].clone()
+    pub fn get(&self, id: TermId) -> &Term {
+        self.nodes.get_index(id).unwrap()
     }
 
     /// Make and return a [`Term::App`] with the given head symbol and children,
@@ -97,10 +91,8 @@ impl TermDag {
     }
 
     fn add_node(&mut self, node: &Term) {
-        if self.hashcons.get(node).is_none() {
-            let idx = self.nodes.len();
-            self.nodes.push(node.clone());
-            self.hashcons.insert(node.clone(), idx);
+        if self.nodes.get(node).is_none() {
+            self.nodes.insert(node.clone());
         }
     }
 
@@ -138,10 +130,7 @@ impl TermDag {
             Term::App(op, args) => {
                 let args: Vec<_> = args
                     .iter()
-                    .map(|a| {
-                        let term = self.get(*a);
-                        self.term_to_expr(&term)
-                    })
+                    .map(|a| self.term_to_expr(self.get(*a)))
                     .collect();
                 Expr::call_no_span(*op, args)
             }
@@ -215,7 +204,7 @@ mod tests {
         //     x, y, (g x y), and the root call to f
         // so we can compute expected answer by hand:
         assert_eq!(
-            td.nodes,
+            td.nodes.as_slice().iter().cloned().collect::<Vec<_>>(),
             vec![
                 Term::Var("x".into()),
                 Term::Var("y".into()),
@@ -236,7 +225,7 @@ mod tests {
         let (td, t) = parse_term(s);
         match_term_app!(t; {
             ("f", [_, x, _, _]) =>
-                assert_eq!(td.term_to_expr(&td.get(*x)), ast::GenericExpr::Var(DUMMY_SPAN.clone(), Symbol::new("x"))),
+                assert_eq!(td.term_to_expr(td.get(*x)), ast::GenericExpr::Var(DUMMY_SPAN.clone(), Symbol::new("x"))),
             (head, _) => panic!("unexpected head {}, in {}:{}:{}", head, file!(), line!(), column!())
         })
     }