diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt index ebf3dc09..1c644d20 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/BarHillel.kt @@ -56,21 +56,24 @@ private infix fun CFG.intersectLevFSAP(fsa: FSA): CFG { val lengthBoundsCache = lengthBounds.let { lb -> ntLst.map { lb[it] ?: 0..0 } } val validTriples: List> = fsa.validTriples - val ct = prods.map { it.first }.toSet().flatMap { fsa.validPairs * setOf(it) } - val ct1: Map, Boolean> = - ct.associate { Pair(it.π1.π1 to it.π3 to it.π2.π1, lengthBoundsCache[it.π3].overlaps(fsa.SPLP(it.π1, it.π2))) } - val ct2: Map, Boolean> = - ct.associate { Pair(it.π1.π1 to it.π3 to it.π2.π1, fsa.obeys(it.π1, it.π2, it.π3, parikhMap)) } + val ct = (fsa.validPairs * nonterminals.indices.toSet()).toList() +// val ct1 = Array(fsa.states.size) { Array(nonterminals.size) { Array(fsa.states.size) { false } } } +// ct.filter { lengthBoundsCache[it.π3].overlaps(fsa.SPLP(it.π1, it.π2)) } +// .forEach { ct1[it.π1.π1][it.π3][it.π2.π1] = true } + val ct2 = Array(fsa.states.size) { Array(nonterminals.size) { Array(fsa.states.size) { false } } } + ct.filter { fsa.obeys(it.π1, it.π2, it.π3, parikhMap) } + .forEach { ct2[it.π1.π1][it.π3][it.π2.π1] = true } val binaryProds = prods.map { // if (i % 100 == 0) println("Finished ${i}/${nonterminalProductions.size} productions") val (A, B, C) = it.π1 to it.π2[0] to it.π2[1] + val trip = A to B to C validTriples // CFG ∩ FSA - in general we are not allowed to do this, but it works // because we assume a Levenshtein FSA, which is monotone and acyclic. - .filter { it.checkCT(A to B to C, ct1) } - .filter { it.checkCT(A to B to C, ct2) } +// .filter { it.checkCT(trip, ct1) } + .filter { it.checkCT(trip, ct2) } // .filter { it.obeysLevenshteinParikhBounds(A to B to C, fsa, parikhMap) } .map { (a, b, c) -> val (p, q, r) = fsa.stateLst[a.π1] to fsa.stateLst[b.π1] to fsa.stateLst[c.π1] @@ -288,7 +291,7 @@ fun Π3A.isCompatibleWith(nts: Π3A, fsa: FSA, lengthBounds: List.checkCT(nts: Π3A, ct: Map<Π3A, Boolean>): Boolean = - true == ct[π1.π1 to nts.π1 to π3.π1] && - true == ct[π1.π1 to nts.π2 to π2.π1] && - true == ct[π2.π1 to nts.π3 to π3.π1] \ No newline at end of file +fun Π3A.checkCT(nts: Π3A, ct: Array>>): Boolean = + ct[π1.π1][nts.π1][π3.π1] && + ct[π1.π1][nts.π2][π2.π1] && + ct[π2.π1][nts.π3][π3.π1] \ No newline at end of file diff --git a/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt b/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt index 2d2fcc29..a967c421 100644 --- a/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt +++ b/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt @@ -171,14 +171,9 @@ private fun CFG.jvmIntersectLevFSAP(fsa: FSA, parikhMap: ParikhMap): CFG { val validTriples: List> = fsa.validTriples val ct = (fsa.validPairs * nonterminals.indices.toSet()).toList() - val ct1: Map, Boolean> = ct.parallelStream() - .filter { lengthBoundsCache[it.π3].overlaps(fsa.SPLP(it.π1, it.π2)) } - .map { Pair(it.π1.π1 to it.π3 to it.π2.π1, true) } - .collect(Collectors.toMap({ it.first }, { it.second })) - val ct2: Map, Boolean> = ct.parallelStream() - .filter { fsa.obeys(it.π1, it.π2, it.π3, parikhMap) } - .map { Pair(it.π1.π1 to it.π3 to it.π2.π1, true) } - .collect(Collectors.toMap({ it.first }, { it.second })) + val ct2 = Array(fsa.states.size) { Array(nonterminals.size) { Array(fsa.states.size) { false } } } + ct.filter { fsa.obeys(it.π1, it.π2, it.π3, parikhMap) } + .forEach { ct2[it.π1.π1][it.π3][it.π2.π1] = true } val elimCounter = AtomicInteger(0) val counter = AtomicInteger(0) @@ -187,13 +182,14 @@ private fun CFG.jvmIntersectLevFSAP(fsa: FSA, parikhMap: ParikhMap): CFG { prods.parallelStream().flatMap { if (BH_TIMEOUT < clock.elapsedNow()) throw Exception("Timeout: ${nts.size} nts") val (A, B, C) = it.π1 to it.π2[0] to it.π2[1] + val trip = A to B to C validTriples.stream() // CFG ∩ FSA - in general we are not allowed to do this, but it works // because we assume a Levenshtein FSA, which is monotone and acyclic. // .filter { it.isCompatibleWith(A to B to C, fsa, lengthBoundsCache).also { if (!it) elimCounter.incrementAndGet() } } - .filter { it.checkCT(A to B to C, ct1).also { if (!it) elimCounter.incrementAndGet() } } +// .filter { it.checkCT(trip, ct1).also { if (!it) elimCounter.incrementAndGet() } } // .filter { it.obeysLevenshteinParikhBounds(A to B to C, fsa, parikhMap).also { if (!it) elimCounter.incrementAndGet() } } - .filter { it.checkCT(A to B to C, ct2).also { if (!it) elimCounter.incrementAndGet() } } + .filter { it.checkCT(trip, ct2).also { if (!it) elimCounter.incrementAndGet() } } .map { (a, b, c) -> if (MAX_PRODS < counter.incrementAndGet()) throw Exception("∩-grammar has too many productions! (>$MAX_PRODS)") val (p, q, r) = fsa.stateLst[a.π1] to fsa.stateLst[b.π1] to fsa.stateLst[c.π1]