Skip to content

Commit

Permalink
prune impossible nonterminal-state triples
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Oct 12, 2023
1 parent d5dfbce commit f798ffd
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,41 @@ package ai.hypergraph.kaliningraph.parsing
import ai.hypergraph.kaliningraph.types.*
import kotlin.time.TimeSource

infix fun FSA.intersect(cfg: CFG) = cfg.intersect(this)
infix fun FSA.intersectLevFSA(cfg: CFG) = cfg.intersectLevFSA(this)
// http://www.cs.umd.edu/~gasarch/BLOGPAPERS/cfg.pdf#page=2
// https://browse.arxiv.org/pdf/2209.06809.pdf#page=5

infix fun CFG.intersect(fsa: FSA): CFG {
infix fun CFG.intersectLevFSA(fsa: FSA): CFG {
val clock = TimeSource.Monotonic.markNow()
val initFinal =
(fsa.init * fsa.final).map { (q, r) -> "START -> [$q,START,$r]" }

val transits =
fsa.Q.map { (q, a, r) -> "[$q,$a,$r] -> $a" }

fun Triple<Σᐩ, Σᐩ, Σᐩ>.isValid(): Boolean {
fun Σᐩ.coords() =
substringAfter("_").split("/")
.let { (i, j) -> i.toInt() to j.toInt() }

fun Pair<Int, Int>.dominates(other: Pair<Int, Int>) =
first <= other.first && second <= other.second

return first.coords().dominates(second.coords()) &&
second.coords().dominates(third.coords())
}

// For each production A → BC in P , for every p, q, r ∈ Q,
// we have the production [p,A,r] → [p,B,q] [q,C,r] in P′.
val binaryProds =
nonterminalProductions.map {
val triples = fsa.states * fsa.states * fsa.states
val (A, B, C) = it.π1 to it.π2[0] to it.π2[1]
triples.map { (p, q, r) -> "[$p,$A,$r] -> [$p,$B,$q] [$q,$C,$r]" }
triples
// CFG ∩ FSA in general we are not allowed to do this, but it works
// because we assume a Levenshtein FSA which is monotone and acylic.
.filter { it.isValid() }
.map { (p, q, r) -> "[$p,$A,$r] -> [$p,$B,$q] [$q,$C,$r]" }
}.flatten()

// For every production A → σ in P, for every (p, σ, q) ∈ Q × Σ × Q
Expand All @@ -34,7 +50,7 @@ infix fun CFG.intersect(fsa: FSA): CFG {

return (initFinal + transits + binaryProds + unitProds).joinToString("\n")
.parseCFG(normalize = false)
.also { print("∩-grammar has ${it.size} total productions") }
.also { println("∩-grammar has ${it.size} total productions") }
.removeVestigalProductions().normalForm.noNonterminalStubs
.also { println("∩-grammar has ${it.size} useful productions") }
.also { println("∩-grammar construction took: ${clock.elapsedNow().inWholeMilliseconds}ms") }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ class BarHillelTest {
val origStr = "1 + 1"
val levFSA = makeLevFSA(origStr, 2, simpleCFG.terminals)

val levCFG = levFSA.intersect(simpleCFG)
val levCFG = levFSA.intersectLevFSA(simpleCFG)

fun testLevenshteinAcceptance(s: Σᐩ) {
assertTrue(levFSA.recognizes(s))
Expand Down Expand Up @@ -259,13 +259,13 @@ class BarHillelTest {
@Test
fun testBooleanBarHillel() {
val arithCFG = """
START -> START and START | START or START | ( START ) | true | false | ! START
START -> START and START | START or START | ( START ) | T | F | ! START
""".parseCFG()

val arithCFGNoEps = arithCFG.noEpsilonOrNonterminalStubs

val origStr = "true and ! ( true )"
val levCFG = arithCFGNoEps.intersect(makeLevFSA(origStr, 1, arithCFG.terminals))
val origStr = "T and ! ( F )"
val levCFG = arithCFGNoEps.intersectLevFSA(makeLevFSA(origStr, 1, arithCFG.terminals))

val template = List(8) { "_" }.joinToString(" ")
val lbhSet = levCFG.solveSeq(template).toSet()//.onEach { println(it) }
Expand All @@ -278,4 +278,39 @@ class BarHillelTest {

assertEquals(lbhSet, efset, "Levenshtein/Bar-Hillel and enumerative filtering should return the same solutions")
}

/*
./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.parsing.BarHillelTest.testDyckBarHillel"
*/
@Test
fun testDyckBarHillel() {
val arithCFG = """
START -> ( START ) | ( ) | START START | [ ] | [ START ] | { } | { START }
""".parseCFG()

val arithCFGNoEps = arithCFG.noEpsilonOrNonterminalStubs

val origStr = "( ( ) ) [ { }"
val levCFG = arithCFGNoEps.intersectLevFSA(makeLevFSA(origStr, 2, arithCFG.terminals))

val template = List(9) { "_" }.joinToString(" ")
val lbhSet = levCFG.solveSeq(template).toSet()//.onEach { println(it) }
.also { println("Found ${it.size} solutions using Levenshtein/Bar-Hillel") }

val totalParticipatingNonterminals =
lbhSet.map { levCFG.parseTable(it).data.map { it.map { it.root } } }.flatten().flatten().toSet()

println("Participation ratio: " + totalParticipatingNonterminals.size + "/" + levCFG.nonterminals.size)
println("Active nonterminals: $totalParticipatingNonterminals")
println("Inactive nonterminals: ${levCFG.nonterminals - totalParticipatingNonterminals}")

val efset = arithCFG.solveSeq(template).toList()
.filter { levenshtein(it, origStr) < 3 }.toSet()
// .onEach { println(it) }
.also { println("Found ${it.size} solutions using enumerative filtering") }

assertEquals(lbhSet, efset, "Levenshtein/Bar-Hillel and enumerative" +
" filtering should return the same solutions, but disjoint union was: " +
"${(lbhSet + efset) - (lbhSet intersect efset)}")
}
}

0 comments on commit f798ffd

Please sign in to comment.