Pratt Parsers for Dummies

Recursive descent works ideally when you can make a decision about a piece of code to be parsed using the current context and token.


Expressions spoil the picture : postfix, infix and others. Problem: you cannot understand what type of expression you are processing until you parse its first half. Often the priority of the operation and its associativity are also important for you so that the constructed AST has the correct structure.


In this article, we will write a parser for the Go dialect, the features of which we will consider a little later. As you can see, the Pratt algorithm solves most of our problems.



Terminology


. , , - .


: , (, ).
: , .
/expression: , . : .
Statement: , . : .
/precedence: .
: , . : x++.
: , . : ++x.
: , . : x+y.
/left-associative: .
/right-associative: .
non-associative: .
: , AST .
AST: , .
/: , .
: .
: .
: ().
: , .
: , .




Go ++ and ++ Go


In order not to waste time on lexical analysis , we will take everything we need from the standard library:



, , :


type Token struct {
    kind  token.Token //  () , INT/IDENT/ADD
    value string      //   
}

scanner.Scanner lexer, :


  • Peek() β€” ,
  • Consume() β€” ,

go/ast, , .


Go right-associative , .


Go β€” statement, expression. . , .


. , , .


. , README : github.com/richardjennings/prattparser.


, .


// exprNode     AST.
type exprNode interface {
    expr()
}

type nameExpr struct {
    Value string
}

type prefixExpr struct {
    Op  token.Token //  , , '+'  '-'
    Arg exprNode    //   
}

func (e *nameExpr) expr()   {}
func (e *prefixExpr) expr() {}

, parseExpr(), :


func (p *exprParser) parseExpr() exprNode {
    tok := p.lexer.Consume()
    switch tok.kind {
    case token.IDENT:
        return p.parseName(tok)
    case token.ADD, token.SUB:
        return p.parsePrefixExpr(tok)
    case token.LPAREN:
        return p.parseParenExpr(tok)
    // ...   
    }
}

func (p *exprParser) parseName(tok Token) exprNode {
    return &nameExpr{Value: tok.value}
}

func (p *exprParser) parsePrefixExpr(tok Token) exprNode {
    arg := p.parseExpr()
    return &prefixExpr{Op: tok.kind, Arg: arg}
}

- .


, . , -, parsePrefixExpr(), map , . , , map.


, prefixParselet:


type prefixParselet func(Token) exprNode

map[token.Token]prefixParselet. :


func newExprParser() *exprParser {
    p := &exprParser{
        prefixParselets: make(map[token.Token]prefixParselet),
    }

    prefixExpr := func(kinds ...token.Token) {
        for _, kind := range kinds {
            p.prefixParselets[kind] = p.parsePrefixExpr
        }
    }

    p.prefixParselets[token.IDENT] = p.parseName
    prefixExpr(token.ADD, token.SUB)

    return p
}

prefixExpr() . , (. ).


func (p *exprParser) parseExpr() exprNode {
  tok := p.lexer.consume()
  prefix, ok := p.prefixParselets[tok.kind]
  if !ok {
    // Parse error: unexpected token
  }
  return prefix(tok)
}


, x+y.


nameExpr{Value:"x"}, +. , .


infixParselet, prefixParselet , Expr, . x+y x.


type infixParselet func(left exprNode, tok Token) exprNode

map. , .


, + - binaryExpr:


func (p *exprParser) parseBinaryExpr(left exprNode, tok token) exprNode {
    right := p.parseExpr()
    return &binaryExpr{Op: tok.kind, Left: left, Right: right}
}

parseExpr() :


func (p *exprParser) parseExpr() exprNode {
    tok := p.lexer.Consume()
    prefix, ok := p.prefixParselets[tok.kind]
    if !ok {
        // Parse error: unexpected token
    }
    left := prefix(tok)
    tok = p.lexer.Peek()
    infix, ok := p.infixParselets[tok.kind]
    if !ok {
        return left
    }
    p.lexer.Consume() //  skip/discard  
    return infix(left, tok)
}

:


  1. -: x-y-z => x-(y-z)
  2. : x*y+z => x*(y+z)


, .


{token.Token => precedence}. , , :


p.prefixPrecedenceTab = map[token.Token]int{
    token.ADD: 4,
    token.SUB: 4,
}

p.infixPrecedenceTab = map[token.Token]int{
    token.ADD: 2,
    token.SUB: 2,
    token.MUL: 3,
    token.QUO: 3,
    // ...   
}

parseExpr() precedence:


func (p *exprParser) parseExpr(precedence int) exprNode {
    tok := p.lexer.Consume()
    prefix, ok := p.prefixParselets[tok.kind]
    if !ok {
        // Parse error: unexpected token
    }
    left := prefix(tok)

    for precedence < p.infixPrecedenceTab[p.lexer.Peek().kind] {
        tok := p.lexer.Consume()
        infix := p.infixParselets[tok.kind]
        left = infix(left, tok)
    }

    return left
}

, , , AST.


precedence () parseExpr():


func (p *exprParser) parseBinaryExpr(left exprNode, tok Token) exprNode {
    right := p.parseExpr(p.infixPrecedenceTab[tok.kind])
    return &binaryExpr{Op: tok.kind, Left: left, Right: right}
}

parseBinaryExpr() -. - , 1 :


func (p *exprParser) rparseBinaryExpr(left exprNode, tok Token) exprNode {
    right := p.parseExpr(p.infixPrecedenceTab[tok.kind] - 1)
    return &binaryExpr{Op: tok.kind, Left: left, Right: right}
}

, << -, rparseBinaryExpr().



β€” infixParselet, '(' parseExpr(0) , ')'.


() β€” prefixParselet, '(' parseExpr(0), ')'.


func (p *exprParser) parseParenExpr(tok Token) exprNode {
    x := p.parseExpr(0)
    p.expect(token.RPAREN)
    return x
}

β€” infixParselet:


func (p *exprParser) parsePostfixExpr(left exprNode, tok Token) exprNode {
    return &postfixExpr{Op: tok.kind, Arg: left}
}


, , .


, , , helper- precedence:


prefixExpr := func(precedence int, kinds ...token.Token) {
    for _, kind := range kinds {
        p.prefixParselets[kind] = p.parsePrefixExpr
        p.prefixPrecedenceTab[kind] = precedence
    }
}

:


addPrefixParselet(token.LPAREN, 0, p.parseParenExpr)
addPrefixParselet(token.IDENT, 0, p.parseNameExpr)
leftAssocBinaryExpr(1, token.LOR)  // ||
leftAssocBinaryExpr(2, token.LAND) // &&
leftAssocBinaryExpr(3,
    token.EQL, // ==
    token.NEQ, // !=
)
rightAssocBinaryExpr(4,
    token.SHL, // <<
)
leftAssocBinaryExpr(4,
    token.ADD, // +
    token.SUB, // -
    token.SHR, // >>
)
leftAssocBinaryExpr(5,
    token.MUL, // *
    token.QUO, // /
    token.REM, // %
)
prefixExpr(6,
    token.ADD, // +
    token.SUB, // -
    token.INC, // ++
    token.DEC, // --
)
postfixExpr(7,
    token.INC, // ++
    token.DEC, // --
)
addInfixParselet(token.LPAREN, 8, p.parseCallExpr)

, , PrecAdd=3, PrecMult=4 .


map.


, , newExprParser . prefixParselet infixParslet , β€” *exprParser.


, , . , : , β€” .



Pratt Parsers: Expression Parsing Made Easy (Bob Nystrom). , , , , ", ".


, .


github.com/quasilyte/pratt-parsers-go.


: github.com/richardjennings/prattparser.




:



All Articles