"""
Eliminate General Left Recursion

by Anoop Sarkar (anoop@sfu.ca)

* Uses NLTK so install that first
* Copyright (c) 2016 Anoop Sarkar 
* Distributed under the MIT License 
* https://opensource.org/licenses/MIT
"""

import nltk
from collections import defaultdict

inclass_grammar = nltk.CFG.fromstring("""
   S -> S S
   S -> a
""")

grammar1 = nltk.CFG.fromstring("""
    S -> A 'a' | 'b'
    A -> A 'c' | S 'd' | 
    """)

output1 = nltk.CFG.fromstring("""
    S -> A 'a' | 'b'
    A -> 'b' 'd' A_r | A_r
    A_r -> 'c' A_r | 'a' 'd' A_r | 
    """)

grammar2 = nltk.CFG.fromstring("""
    S -> A 'a' | 'b'
    A -> A 'c' | S 'd' | B
    B -> B 'e' | A 'f' | S 'g' | 'h'
    """)

def ordered_lhs(g):
    lhs_set = set([r.lhs() for r in g.productions()])
    ordered_lhs = [n for n in filter(lambda x: g.start() != x, lhs_set)]
    ordered_lhs.sort()
    ordered_lhs.insert(0, g.start())
    return ordered_lhs

def new_nonterminal(n):
    nsym = n.symbol() + '_r'
    return nltk.Nonterminal(nsym)

def print_grammar(g):
    print 'start:', g.start()
    for r in g.productions():
        print 'rule:', r

def print_rules(nonterminal, rules):
    print "Current nonterminal:", nonterminal
    cur_g = nltk.CFG(nonterminal, rules, calculate_leftcorners=False)
    print cur_g


def lhs_rules(rules, lhs):
    return filter(lambda x: x.lhs() == lhs, rules)

def group_rhs(rules, lhs, match):
    matched = []
    not_matched = []
    for r in lhs_rules(rules, lhs):
        rhs = r.rhs()
        if len(r) > 0:
            first = rhs[0]
            if nltk.grammar.is_nonterminal(first) and first == match:
                matched.append(r)
            else:
                not_matched.append(r)
        else:
            not_matched.append(r)
    return matched, not_matched

def noleft_immediate(rules, lhs):
    (left, not_left) = group_rhs(rules, lhs, lhs)
    new_rules = []
    if len(left) > 0:
        new_lhs = new_nonterminal(lhs)
        for r in not_left:
            new_rules.append( nltk.Production(lhs, r.rhs() + (new_lhs,)) )
        for r in left:
            old_rhs = r.rhs()
            if len(old_rhs) > 0:
                new_rules.append( nltk.Production(new_lhs, r.rhs()[1:] + (new_lhs,)) )
            else:
                new_rules.append( nltk.Production(lhs, old_rhs) )
        new_rules.append( nltk.Production(new_lhs, ()) )
    else:
        new_rules = rules
    return new_rules

def expand_match(rules, expand_rules, lhs, match):
    (matched, not_matched) = group_rhs(expand_rules, lhs, match)
    new_rules = []
    if len(matched) > 0:
        for r in matched:
            for match_rule in lhs_rules(rules, match):
                new_rules.append( nltk.Production(lhs, match_rule.rhs() + r.rhs()[1:]) )
        new_rules.extend( not_matched )
    else:
        new_rules.extend( not_matched )
    return new_rules

def noleft(g):
    print "Before:\n", g
    lhs_list = ordered_lhs(g)
    rules = []
    for i in range(len(lhs_list)):
        expand_rules = g.productions(lhs_list[i])
        for j in range(i):
            expand_rules = expand_match(rules, expand_rules, lhs_list[i], lhs_list[j])
            #print_rules(lhs_list[i], expand_rules)
        rules.extend( noleft_immediate(expand_rules, lhs_list[i]) )
        #print_rules(lhs_list[i], rules)
    new_g = nltk.CFG(g.start(), rules, calculate_leftcorners=False)
    print "After:\n", new_g
    
#noleft(grammar1)
#print output1

#noleft(grammar2)
noleft(inclass_grammar)