Skip to content

Commit

Permalink
Merge branch 'main' into multiset
Browse files Browse the repository at this point in the history
  • Loading branch information
saulshanabrook authored Oct 25, 2024
2 parents f090848 + b9f4c58 commit 0042f72
Show file tree
Hide file tree
Showing 10 changed files with 1,860 additions and 283 deletions.
37 changes: 16 additions & 21 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ bin = ["dep:clap", "dep:env_logger", "egraph-serialize/serde", "dep:serde_json"]
wasm-bindgen = ["instant/wasm-bindgen", "dep:getrandom"]

[dependencies]
hashbrown = { version = "0.14", features = ["raw"] }
hashbrown = { version = "0.15" }
indexmap = "2.0"
instant = "0.1"
log = "0.4"
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@ for the REPL.

To run the tests use `make test`.

## Benchmarks

We run all of our "examples" [as benchmarks in codspeed](https://codspeed.io/egraphs-good/egglog). These are in CI
for every commit in main and for all PRs. It will run the examples with extra instrumentation added so that it can
capture a single trace of the CPU interactions ([src](https://docs.codspeed.io/features/understanding-the-metrics/)):

> CodSpeed instruments your benchmarks to measure the performance of your code. A benchmark will be run only once and the CPU behavior will be simulated. This ensures that the measurement is as accurate as possible, taking into account not only the instructions executed but also the cache and memory access patterns. The simulation gives us an equivalent of the CPU cycles that includes cache and memory access.
Since many of the shorter running benchmarks have unstable timings due to non deterministic performance ([like in the memory allocator](https://github.com/oxc-project/backlog/issues/89)),
we ["ignore"](https://docs.codspeed.io/features/ignoring-benchmarks/) them in codspeed. That way, we still
capture their performance, but their timings don't show up in our reports by default.

We use 50ms as our cutoff currently, any benchmarks shorter than that are ignored. This number was selected to try to ignore
any benchmarks with have changes > 1% when they haven't been modified. Note that all the ignoring is done manually,
so if you add another example that's short, an admin on the codspeed project will need to manually ignore it.

# Documentation

To view documentation, run `cargo doc --open`.
Expand Down
3 changes: 1 addition & 2 deletions benches/example_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {
for entry in glob::glob("tests/**/*.egg").unwrap() {
let path = entry.unwrap().clone();
let path_string = path.to_string_lossy().to_string();
// Skip python_array_optimize since it is too slow and doesn't even reflect the current python implementation
if path_string.contains("fail-typecheck") || path_string.contains("python_array_optimize") {
if path_string.contains("fail-typecheck") {
continue;
}
let name = path.file_stem().unwrap().to_string_lossy().to_string();
Expand Down
8 changes: 7 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,18 @@ pub(crate) enum Ruleset {
pub const DEFAULT_FILENAME: &str = "<unnamed.egg>";
pub const DUMMY_FILENAME: &str = "<internal.egg>";

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct SrcFile {
pub name: String,
pub contents: Option<String>,
}

impl Debug for SrcFile {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "SrcFile({}, contents=...)", self.name)
}
}

#[derive(Clone, Copy)]
pub struct Location {
pub line: usize,
Expand Down
48 changes: 22 additions & 26 deletions src/function/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use std::{
ops::Range,
};

use hashbrown::raw::RawTable;
use hashbrown::HashTable;

use super::binary_search::binary_search_table_by_key;
use crate::{util::BuildHasher as BH, TupleOutput, Value, ValueVec};
Expand All @@ -51,11 +51,11 @@ struct TableOffset {
pub(crate) struct Table {
max_ts: u32,
n_stale: usize,
table: RawTable<TableOffset>,
table: HashTable<TableOffset>,
pub(crate) vals: Vec<(Input, TupleOutput)>,
}

/// Used for the RawTable probe sequence.
/// Used for the HashTable probe sequence.
macro_rules! search_for {
($slf:expr, $hash:expr, $inp:expr) => {
|to| {
Expand Down Expand Up @@ -97,19 +97,18 @@ impl Table {

/// Rehashes the table, invalidating any offsets stored into the table.
pub(crate) fn rehash(&mut self) {
let mut src = 0usize;
let mut dst = 0usize;
self.table.clear();
self.vals.retain(|(inp, _)| {
if inp.live() {
let hash = hash_values(inp.data());
let to = TableOffset { hash, off: dst };
self.table
.insert(hash, TableOffset { hash, off: dst }, |to| to.hash);
src += 1;
.entry(hash, |to2| to2 == &to, |to2| to2.hash)
.insert(to);
dst += 1;
true
} else {
src += 1;
false
}
});
Expand All @@ -120,16 +119,16 @@ impl Table {
/// table.
pub(crate) fn get(&self, inputs: &[Value]) -> Option<&TupleOutput> {
let hash = hash_values(inputs);
let TableOffset { off, .. } = self.table.get(hash, search_for!(self, hash, inputs))?;
debug_assert!(self.vals[*off].0.live());
Some(&self.vals[*off].1)
let &TableOffset { off, .. } = self.table.find(hash, search_for!(self, hash, inputs))?;
debug_assert!(self.vals[off].0.live());
Some(&self.vals[off].1)
}

pub(crate) fn get_mut(&mut self, inputs: &[Value]) -> Option<&mut TupleOutput> {
let hash: u64 = hash_values(inputs);
let TableOffset { off, .. } = self.table.get(hash, search_for!(self, hash, inputs))?;
debug_assert!(self.vals[*off].0.live());
Some(&mut self.vals[*off].1)
let &TableOffset { off, .. } = self.table.find(hash, search_for!(self, hash, inputs))?;
debug_assert!(self.vals[off].0.live());
Some(&mut self.vals[off].1)
}

/// Insert the given data into the table at the given timestamp. Return the
Expand Down Expand Up @@ -161,7 +160,7 @@ impl Table {
self.max_ts = ts;
let hash = hash_values(inputs);
if let Some(TableOffset { off, .. }) =
self.table.get_mut(hash, search_for!(self, hash, inputs))
self.table.find_mut(hash, search_for!(self, hash, inputs))
{
let (inp, prev) = &mut self.vals[*off];
let prev_subsumed = prev.subsumed;
Expand Down Expand Up @@ -193,14 +192,13 @@ impl Table {
subsumed,
},
));
self.table.insert(
let to = TableOffset {
hash,
TableOffset {
hash,
off: new_offset,
},
|off| off.hash,
);
off: new_offset,
};
self.table
.entry(hash, |to2| to2 == &to, |to2| to2.hash)
.insert(to);
}

/// One more than the maximum (potentially) valid offset into the table.
Expand Down Expand Up @@ -237,13 +235,11 @@ impl Table {
/// removed.
pub(crate) fn remove(&mut self, inp: &[Value], ts: u32) -> bool {
let hash = hash_values(inp);
let entry = if let Some(entry) = self.table.remove_entry(hash, search_for!(self, hash, inp))
{
entry
} else {
let Ok(entry) = self.table.find_entry(hash, search_for!(self, hash, inp)) else {
return false;
};
self.vals[entry.off].0.stale_at = ts;
let (TableOffset { off, .. }, _) = entry.remove();
self.vals[off].0.stale_at = ts;
self.n_stale += 1;
true
}
Expand Down
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,11 @@ impl EGraph {
self.type_info.sorts.get(&value.tag)
}

/// Returns a sort based on the type
pub fn get_sort<S: Sort + Send + Sync>(&self) -> Option<Arc<S>> {
self.type_info.get_sort_by(|_| true)
}

/// Returns the first sort that satisfies the type and predicate if there's one.
/// Otherwise returns none.
pub fn get_sort_by<S: Sort + Send + Sync>(
Expand Down
33 changes: 11 additions & 22 deletions src/termdag.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
ast::Literal,
util::{HashMap, HashSet},
util::{HashMap, HashSet, IndexSet},
Expr, GenericExpr, Symbol,
};

Expand All @@ -21,14 +21,8 @@ pub enum Term {
/// A hashconsing arena for [`Term`]s.
#[derive(Clone, PartialEq, Eq, Debug, Default)]
pub struct TermDag {
// think of nodes as a map from indices to Terms.
// invariant: the nodes map and the hashcons map are inverses.
// note that this implies:
// - no duplicates in nodes
// - every element of node is a key in hashcons
// - every key of hashcons is in nodes
pub nodes: Vec<Term>,
pub hashcons: HashMap<Term, TermId>,
/// A bidirectional map between deduplicated `Term`s and indices.
nodes: IndexSet<Term>,
}

#[macro_export]
Expand All @@ -54,14 +48,14 @@ impl TermDag {
///
/// Panics if the term does not already exist in this [TermDag].
pub fn lookup(&self, node: &Term) -> TermId {
*self.hashcons.get(node).unwrap()
self.nodes.get_index_of(node).unwrap()
}

/// Convert the given id to the corresponding term.
///
/// Panics if the id is not valid.
pub fn get(&self, id: TermId) -> Term {
self.nodes[id].clone()
pub fn get(&self, id: TermId) -> &Term {
self.nodes.get_index(id).unwrap()
}

/// Make and return a [`Term::App`] with the given head symbol and children,
Expand Down Expand Up @@ -97,10 +91,8 @@ impl TermDag {
}

fn add_node(&mut self, node: &Term) {
if self.hashcons.get(node).is_none() {
let idx = self.nodes.len();
self.nodes.push(node.clone());
self.hashcons.insert(node.clone(), idx);
if self.nodes.get(node).is_none() {
self.nodes.insert(node.clone());
}
}

Expand Down Expand Up @@ -138,10 +130,7 @@ impl TermDag {
Term::App(op, args) => {
let args: Vec<_> = args
.iter()
.map(|a| {
let term = self.get(*a);
self.term_to_expr(&term)
})
.map(|a| self.term_to_expr(self.get(*a)))
.collect();
Expr::call_no_span(*op, args)
}
Expand Down Expand Up @@ -215,7 +204,7 @@ mod tests {
// x, y, (g x y), and the root call to f
// so we can compute expected answer by hand:
assert_eq!(
td.nodes,
td.nodes.as_slice().iter().cloned().collect::<Vec<_>>(),
vec![
Term::Var("x".into()),
Term::Var("y".into()),
Expand All @@ -236,7 +225,7 @@ mod tests {
let (td, t) = parse_term(s);
match_term_app!(t; {
("f", [_, x, _, _]) =>
assert_eq!(td.term_to_expr(&td.get(*x)), ast::GenericExpr::Var(DUMMY_SPAN.clone(), Symbol::new("x"))),
assert_eq!(td.term_to_expr(td.get(*x)), ast::GenericExpr::Var(DUMMY_SPAN.clone(), Symbol::new("x"))),
(head, _) => panic!("unexpected head {}, in {}:{}:{}", head, file!(), line!(), column!())
})
}
Expand Down
Loading

0 comments on commit 0042f72

Please sign in to comment.