Skip to content

Commit

Permalink
Implement SLR parse table generation
Browse files Browse the repository at this point in the history
  • Loading branch information
mpsijm committed Feb 25, 2020
1 parent bbe76de commit 589e581
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 190 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,23 @@
public interface ISymbol {

String name();

boolean isNullable();

void setNullable(boolean nullable);

ICharacterClass getFirst();

void setFirst(ICharacterClass first);

ICharacterClass getFollow();

void setFollow(ICharacterClass follow);

ICharacterClass followRestriction();

List<ICharacterClass[]> followRestrictionLookahead();

org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol();
void normalizeFollowRestrictionLookahead();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
import java.util.Map;
import java.util.Set;

import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
import org.metaborg.parsetable.characterclasses.ICharacterClass;

import org.metaborg.sdf2table.grammar.ISymbol;
import org.metaborg.parsetable.symbols.SortCardinality;
import org.metaborg.parsetable.symbols.SyntaxContext;
import org.metaborg.sdf2table.deepconflicts.Context;
Expand All @@ -24,12 +23,15 @@ public abstract class Symbol implements Serializable, ISymbol {
protected List<ICharacterClass[]> followRestrictionsLookahead;

private boolean nullable = false;
private ICharacterClass first = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
private ICharacterClass follow = CharacterClassFactory.EMPTY_CHARACTER_CLASS;

/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see org.metaborg.sdf2table.grammar.ISymbol#name()
*/
@Override
public abstract String name();
@Override public abstract String name();

public boolean isNullable() {
return nullable;
Expand All @@ -39,6 +41,22 @@ public void setNullable(boolean nullable) {
this.nullable = nullable;
}

@Override public ICharacterClass getFirst() {
return first;
}

@Override public void setFirst(ICharacterClass first) {
this.first = first;
}

@Override public ICharacterClass getFollow() {
return follow;
}

@Override public void setFollow(ICharacterClass follow) {
this.follow = follow;
}

@Override public String toString() {
return name();
}
Expand Down Expand Up @@ -147,5 +165,6 @@ public org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol() {
return toParseTableSymbol(null, null);
}

public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, SortCardinality cardinality);
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext,
SortCardinality cardinality);
}
Original file line number Diff line number Diff line change
@@ -1,52 +1,19 @@
package org.metaborg.sdf2table.parsetable;

import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.*;

import org.metaborg.parsetable.IParseTable;
import org.metaborg.parsetable.states.IState;
import org.metaborg.sdf2table.deepconflicts.Context;
import org.metaborg.sdf2table.deepconflicts.ContextPosition;
import org.metaborg.sdf2table.deepconflicts.ContextType;
import org.metaborg.sdf2table.deepconflicts.ContextualFactory;
import org.metaborg.sdf2table.deepconflicts.ContextualProduction;
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
import org.metaborg.sdf2table.deepconflicts.DeepConflictsAnalyzer;
import org.metaborg.sdf2table.grammar.ContextFreeSymbol;
import org.metaborg.sdf2table.grammar.GeneralAttribute;
import org.metaborg.sdf2table.grammar.IAttribute;
import org.metaborg.sdf2table.grammar.IProduction;
import org.metaborg.sdf2table.grammar.ISymbol;
import org.metaborg.sdf2table.grammar.IterSepSymbol;
import org.metaborg.sdf2table.grammar.IterStarSepSymbol;
import org.metaborg.sdf2table.grammar.IterStarSymbol;
import org.metaborg.sdf2table.grammar.IterSymbol;
import org.metaborg.sdf2table.grammar.Layout;
import org.metaborg.sdf2table.grammar.LexicalSymbol;
import org.metaborg.sdf2table.grammar.NormGrammar;
import org.metaborg.sdf2table.grammar.OptionalSymbol;
import org.metaborg.sdf2table.grammar.Priority;
import org.metaborg.sdf2table.grammar.Production;
import org.metaborg.sdf2table.grammar.Sort;
import org.metaborg.sdf2table.grammar.Symbol;
import org.metaborg.sdf2table.deepconflicts.*;
import org.metaborg.sdf2table.grammar.*;
import org.metaborg.sdf2table.util.CheckOverlap;
import org.metaborg.sdf2table.util.Graph;
import org.metaborg.sdf2table.util.SCCNodes;
import org.metaborg.util.log.ILogger;
import org.metaborg.util.log.LoggerUtils;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Sets;
import com.google.common.collect.*;

public class ParseTable implements IParseTable, Serializable {

Expand Down Expand Up @@ -143,6 +110,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) {
// create JSGLR parse table productions
createJSGLRParseTableProductions(productionLabels);

// calculate FIRST-set and FOLLOW-set
calculateFirst();
calculateFollow();

// create states if the table should not be generated dynamically
initialProduction = grammar.getInitialProduction();

Expand Down Expand Up @@ -190,6 +161,137 @@ private void calculateNullable() {
} while(markedNullable);
}

// Based on https://compilers.iecc.com/comparch/article/01-04-079
private void calculateFirst() {
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
Set<ISymbol> symbols = grammar.getSymbols();
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();

for(ISymbol s : symbols) {
// The FIRST set of a CharacterClass symbol is equal to the character class it represents.
if(s instanceof CharacterClassSymbol) {
s.setFirst(((CharacterClassSymbol) s).getCC());
continue;
}

for(IProduction p : symbolProductionsMapping.get(s)) {
// Direct contributions:
// If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
for(ISymbol rhs : p.rightHand()) {
// Then, a is in FIRST(A).
if(rhs instanceof CharacterClassSymbol) {
s.setFirst(((CharacterClassSymbol) rhs).getCC());
break;
}

// Indirect contributions: calculate contains-the-FIRSTs-of
// If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
// Then, A contains-the-FIRSTs-of B
containsTheFirstOf.put(s, rhs);

if(!rhs.isNullable())
break;
}
}
}

// Indirect contributions: Tarjan's algorithm for strongly connected components
final int DONE = symbols.size();
final Map<ISymbol, Integer> low = new HashMap<>();
final Stack<ISymbol> stack = new Stack<>();
for(ISymbol v : symbols) {
if(low.get(v) == null /* CLEAN */)
traverseFirst(v, containsTheFirstOf, DONE, low, stack);
}
}

private void traverseFirst(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf, int DONE,
Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
stack.push(v);
int top1 = stack.size() - 1;
low.put(v, top1);
for(ISymbol w : containsTheFirstOf.get(v)) {
if(low.get(w) == null /* CLEAN */) {
traverseFirst(w, containsTheFirstOf, DONE, low, stack);
}
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
v.setFirst(v.getFirst().union(w.getFirst())); // union!
if(low.get(w) < low.get(v))
low.put(v, low.get(w));
}
if(low.get(v) == top1) // v is the root of this SCC
while(stack.size() - 1 >= top1) {
ISymbol w = stack.pop();
w.setFirst(v.getFirst()); // distribute!
low.put(w, DONE);
}
}

// Based on https://compilers.iecc.com/comparch/article/01-04-079
// and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
private void calculateFollow() {
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
Set<ISymbol> symbols = grammar.getSymbols();
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
SetMultimap<ISymbol, ISymbol> containsTheFollowOf = HashMultimap.create();

for(ISymbol s : symbols) {
for(IProduction p : symbolProductionsMapping.get(s)) {
List<ISymbol> rightHand = p.rightHand();
for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) {
ISymbol symbolI = rightHand.get(i);

// If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
for(int j = i + 1; j < rightHandSize; j++) {
// If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
ISymbol symbolJ = rightHand.get(j);
containsTheFirstOf.put(symbolI, symbolJ);

if(!symbolJ.isNullable())
break;
}

// If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
containsTheFollowOf.put(symbolI, s);
}
}
}

// Indirect contributions: Tarjan's algorithm for strongly connected components
final int DONE = symbols.size();
final Map<ISymbol, Integer> low = new HashMap<>();
final Stack<ISymbol> stack = new Stack<>();
for(ISymbol v : symbols) {
if(low.get(v) == null /* CLEAN */)
traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
}
}

private void traverseFollow(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf,
SetMultimap<ISymbol, ISymbol> containsTheFollowOf, int DONE, Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
stack.push(v);
int top1 = stack.size() - 1;
low.put(v, top1);
for(ISymbol w : containsTheFirstOf.get(v)) {
v.setFollow(v.getFollow().union(w.getFirst())); // union!
}
for(ISymbol w : containsTheFollowOf.get(v)) {
if(low.get(w) == null /* CLEAN */) {
traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
}
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
v.setFollow(v.getFollow().union(w.getFollow())); // union!
if(low.get(w) < low.get(v))
low.put(v, low.get(w));
}
if(low.get(v) == top1) // v is the root of this SCC
while(stack.size() - 1 >= top1) {
ISymbol w = stack.pop();
w.setFollow(v.getFollow()); // distribute!
low.put(w, DONE);
}
}

private void calculateRecursion() {
// direct and indirect left recursion :
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
Expand Down Expand Up @@ -353,8 +455,7 @@ private void normalizePriorities() {
// dangling prefix
// p1 : A = A γ and p = α A γ or vice-versa
boolean matchSuffix = false;
for(i = p.higher().arity() - 1, j = p.lower().arity() - 1; i >= 0
&& j >= 0; i--, j--) {
for(i = p.higher().arity() - 1, j = p.lower().arity() - 1; i >= 0 && j >= 0; i--, j--) {
if(p.higher().rightHand().get(i).equals(p.lower().rightHand().get(j))) {
matchSuffix = true;
} else {
Expand Down Expand Up @@ -444,28 +545,6 @@ private boolean mutuallyRecursive(Priority p) {
|| grammar.getRightRecursiveSymbolsMapping().get(p.higher().getLhs()).contains(p.lower().leftHand());
}

/*
* TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
* getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
*
* tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
* getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
* first_components); } } }
*
*
* private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
* the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
*
* for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
* stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
* if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
* d.index); } }
*
* TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
* component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
* while(t != v); components.add(component); } }
*/

private void extractExpressionGrammars(SCCNodes<ISymbol> scc) {

for(ISymbol s : grammar.getSymbols()) {
Expand Down Expand Up @@ -651,8 +730,7 @@ private void checkMissingPriorities() {

if(!conflicts.get(p2).contains(p1)) {
conflicts.put(p1, p2);
if(p1.getRhs().size() > p2.getRhs().size()
&& !Symbol.isListNonTerminal(p1.leftHand())) {
if(p1.getRhs().size() > p2.getRhs().size() && !Symbol.isListNonTerminal(p1.leftHand())) {
logger.warn("GRAMMAR MAY CONTAIN AMBIGUITIES: No priority declaration "
+ printWithConstructor(p1) + " > " + printWithConstructor(p2));
} else if(!Symbol.isListNonTerminal(p2.leftHand())) {
Expand Down Expand Up @@ -688,8 +766,7 @@ private void checkMissingPriorities() {

if(!conflicts.get(p2).contains(p1)) {
conflicts.put(p1, p2);
if(p1.arity() > p2.arity()
&& !Symbol.isListNonTerminal(p1.leftHand())) {
if(p1.arity() > p2.arity() && !Symbol.isListNonTerminal(p1.leftHand())) {
logger.warn("GRAMMAR MAY CONTAIN AMBIGUITIES: No priority declaration "
+ printWithConstructor(p1) + " > " + printWithConstructor(p2));
} else if(!Symbol.isListNonTerminal(p2.leftHand())) {
Expand Down
Loading

0 comments on commit 589e581

Please sign in to comment.