OCaml: How to Construct AST during LL Parsing without stack











up vote
0
down vote

favorite












I wrote a predictive parser for a LL1 grammar. Each nonterminal A has a corresponding parseA method, which takes in a tokenlist, and returns the remainder of tokenlist and a parse tree.



I'm confused about which AST method to call in my parser. Is there a general approach to figuring this out?



This is my attempt:



Take for instance a subsection of my grammar:



expr -> t eprime 
eprime -> PLUS t eprime | MINUS t eprime | ε
t -> t tprime
tprime -> TIMES f tprime | DIVIDE f tprime | ε
f -> LPAREN expr RPAREN | LITERAL | TRUE | FALSE | ID


I have four parse methods, one for each nonterminal.



let parseExpr tokenlist =
match tokenlist.head with
| "LPAREN" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "LITERAL" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "TRUE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "FALSE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))
| "ID" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
let e_expr tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Expression(t_expr, e_expr))


let parseEPrime tokenlist =
match tokenlist with
| "PLUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Add(expr_t, expr_eprime))
| "MINUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
(tokenlist_e, Ast.Minus(expr_t, expr_eprime))
| "SEMI" -> (tokenlist, )
| "RPAREN" -> (tokenlist, )
| _ -> raise error


let parseT tokenlist =
match tokenlist.lookathead with
| "LPAREN" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| "LITERAL" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.Literal(expr_f, expr_tprime))
| "TRUE" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| "FALSE" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| "ID" -> let expr_f tokenlist_f = parseF tokenlist in
let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
(tokenlist_tprime, Ast.F(expr_f, expr_tprime))
| _-> raise error

let parseTprime tokenlist =
match tokenlist.lookathead with
| "TIMES" -> let expr_f tokenlist_f = next tokenlist |> parseF in
let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
(tokenlist_tprime, Ast.Times(expr_f, expr_tprime))
| "DIVIDE" -> let expr_f tokenlist_f = next tokenlist |> parseF in
let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
(tokenlist_tprime, Ast.Divide(expr_f, expr_tprime))
| "PLUS" -> (tokenlist, )
| "MINUS" -> (tokenlist, )
| "SEMI" -> (tokenlist, )
| "RPAREN" -> (tokenlist, )
| _ -> raise error

let parseF tokenlist =
match tokenlist.lookathead with
| "LPAREN" -> let expr tokenlist_expr = next tokenlist |> parseE in
match next tokenlist_expr with
| "RPAREN" -> (next tokenlist_expr, Ast.ExpressionParen(expr))
| "LITERAL" -> (next tokenlist, Ast.FLiteral)
| "TRUE" -> (next tokenlist, Ast.BoolLit)
| "FALSE" -> (next tokenlist, Ast.FBool)
| "ID" -> (next tokenlist, Ast.Id)
| _ -> raise error


As you can probably tell from my code, I wrote a type for every nonterminal, and then had a method for every production of that nonterminal.



(*expr -> T E* *)
type expr =
| Expression of t eprime


(*T -> F T*)
type t =
| F of f * tprime

(*E* -> + T E*
E* -> - T E*
E* -> ε *)
type eprime =
| Add of t eprime
| Minus of t eprime
| Eempty


(*T* -> TIMES F T*
T* -> / F T*
T* -> ε*)
type tprime =
| Divide of f * tprime
| Times of f * tprime
| TEmpty

(*F -> LPAREN E RPAREN
F -> Literal
F -> TRUE
F -> FALSE
F -> ID*)
type f =
| ExpressionParen of expr
| Literal of int
| BoolLit of bool
| Id of string


But I don't know my approach keeps too much unnecessary information than a AST would normally shake out (I imagine an AST to be a parse tree that shakes and rids itself of unnecessary leaves). So far, I've just left off the parentheses and semi colons. I'm afraid I'm leaving too much on by having type t, type f, type tprime, type eprime in my AST. But if I were to remove them, I wouldn't know how to write the type expr in my AST.










share|improve this question


























    up vote
    0
    down vote

    favorite












    I wrote a predictive parser for a LL1 grammar. Each nonterminal A has a corresponding parseA method, which takes in a tokenlist, and returns the remainder of tokenlist and a parse tree.



    I'm confused about which AST method to call in my parser. Is there a general approach to figuring this out?



    This is my attempt:



    Take for instance a subsection of my grammar:



    expr -> t eprime 
    eprime -> PLUS t eprime | MINUS t eprime | ε
    t -> t tprime
    tprime -> TIMES f tprime | DIVIDE f tprime | ε
    f -> LPAREN expr RPAREN | LITERAL | TRUE | FALSE | ID


    I have four parse methods, one for each nonterminal.



    let parseExpr tokenlist =
    match tokenlist.head with
    | "LPAREN" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
    let e_expr tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Expression(t_expr, e_expr))
    | "LITERAL" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
    let e_expr tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Expression(t_expr, e_expr))
    | "TRUE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
    let e_expr tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Expression(t_expr, e_expr))
    | "FALSE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
    let e_expr tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Expression(t_expr, e_expr))
    | "ID" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
    let e_expr tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Expression(t_expr, e_expr))


    let parseEPrime tokenlist =
    match tokenlist with
    | "PLUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
    let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Add(expr_t, expr_eprime))
    | "MINUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
    let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
    (tokenlist_e, Ast.Minus(expr_t, expr_eprime))
    | "SEMI" -> (tokenlist, )
    | "RPAREN" -> (tokenlist, )
    | _ -> raise error


    let parseT tokenlist =
    match tokenlist.lookathead with
    | "LPAREN" -> let expr_f tokenlist_f = parseF tokenlist in
    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
    | "LITERAL" -> let expr_f tokenlist_f = parseF tokenlist in
    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
    (tokenlist_tprime, Ast.Literal(expr_f, expr_tprime))
    | "TRUE" -> let expr_f tokenlist_f = parseF tokenlist in
    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
    | "FALSE" -> let expr_f tokenlist_f = parseF tokenlist in
    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
    | "ID" -> let expr_f tokenlist_f = parseF tokenlist in
    let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
    (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
    | _-> raise error

    let parseTprime tokenlist =
    match tokenlist.lookathead with
    | "TIMES" -> let expr_f tokenlist_f = next tokenlist |> parseF in
    let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
    (tokenlist_tprime, Ast.Times(expr_f, expr_tprime))
    | "DIVIDE" -> let expr_f tokenlist_f = next tokenlist |> parseF in
    let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
    (tokenlist_tprime, Ast.Divide(expr_f, expr_tprime))
    | "PLUS" -> (tokenlist, )
    | "MINUS" -> (tokenlist, )
    | "SEMI" -> (tokenlist, )
    | "RPAREN" -> (tokenlist, )
    | _ -> raise error

    let parseF tokenlist =
    match tokenlist.lookathead with
    | "LPAREN" -> let expr tokenlist_expr = next tokenlist |> parseE in
    match next tokenlist_expr with
    | "RPAREN" -> (next tokenlist_expr, Ast.ExpressionParen(expr))
    | "LITERAL" -> (next tokenlist, Ast.FLiteral)
    | "TRUE" -> (next tokenlist, Ast.BoolLit)
    | "FALSE" -> (next tokenlist, Ast.FBool)
    | "ID" -> (next tokenlist, Ast.Id)
    | _ -> raise error


    As you can probably tell from my code, I wrote a type for every nonterminal, and then had a method for every production of that nonterminal.



    (*expr -> T E* *)
    type expr =
    | Expression of t eprime


    (*T -> F T*)
    type t =
    | F of f * tprime

    (*E* -> + T E*
    E* -> - T E*
    E* -> ε *)
    type eprime =
    | Add of t eprime
    | Minus of t eprime
    | Eempty


    (*T* -> TIMES F T*
    T* -> / F T*
    T* -> ε*)
    type tprime =
    | Divide of f * tprime
    | Times of f * tprime
    | TEmpty

    (*F -> LPAREN E RPAREN
    F -> Literal
    F -> TRUE
    F -> FALSE
    F -> ID*)
    type f =
    | ExpressionParen of expr
    | Literal of int
    | BoolLit of bool
    | Id of string


    But I don't know my approach keeps too much unnecessary information than a AST would normally shake out (I imagine an AST to be a parse tree that shakes and rids itself of unnecessary leaves). So far, I've just left off the parentheses and semi colons. I'm afraid I'm leaving too much on by having type t, type f, type tprime, type eprime in my AST. But if I were to remove them, I wouldn't know how to write the type expr in my AST.










    share|improve this question
























      up vote
      0
      down vote

      favorite









      up vote
      0
      down vote

      favorite











      I wrote a predictive parser for a LL1 grammar. Each nonterminal A has a corresponding parseA method, which takes in a tokenlist, and returns the remainder of tokenlist and a parse tree.



      I'm confused about which AST method to call in my parser. Is there a general approach to figuring this out?



      This is my attempt:



      Take for instance a subsection of my grammar:



      expr -> t eprime 
      eprime -> PLUS t eprime | MINUS t eprime | ε
      t -> t tprime
      tprime -> TIMES f tprime | DIVIDE f tprime | ε
      f -> LPAREN expr RPAREN | LITERAL | TRUE | FALSE | ID


      I have four parse methods, one for each nonterminal.



      let parseExpr tokenlist =
      match tokenlist.head with
      | "LPAREN" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "LITERAL" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "TRUE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "FALSE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "ID" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))


      let parseEPrime tokenlist =
      match tokenlist with
      | "PLUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
      let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Add(expr_t, expr_eprime))
      | "MINUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
      let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Minus(expr_t, expr_eprime))
      | "SEMI" -> (tokenlist, )
      | "RPAREN" -> (tokenlist, )
      | _ -> raise error


      let parseT tokenlist =
      match tokenlist.lookathead with
      | "LPAREN" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "LITERAL" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.Literal(expr_f, expr_tprime))
      | "TRUE" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "FALSE" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "ID" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | _-> raise error

      let parseTprime tokenlist =
      match tokenlist.lookathead with
      | "TIMES" -> let expr_f tokenlist_f = next tokenlist |> parseF in
      let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
      (tokenlist_tprime, Ast.Times(expr_f, expr_tprime))
      | "DIVIDE" -> let expr_f tokenlist_f = next tokenlist |> parseF in
      let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
      (tokenlist_tprime, Ast.Divide(expr_f, expr_tprime))
      | "PLUS" -> (tokenlist, )
      | "MINUS" -> (tokenlist, )
      | "SEMI" -> (tokenlist, )
      | "RPAREN" -> (tokenlist, )
      | _ -> raise error

      let parseF tokenlist =
      match tokenlist.lookathead with
      | "LPAREN" -> let expr tokenlist_expr = next tokenlist |> parseE in
      match next tokenlist_expr with
      | "RPAREN" -> (next tokenlist_expr, Ast.ExpressionParen(expr))
      | "LITERAL" -> (next tokenlist, Ast.FLiteral)
      | "TRUE" -> (next tokenlist, Ast.BoolLit)
      | "FALSE" -> (next tokenlist, Ast.FBool)
      | "ID" -> (next tokenlist, Ast.Id)
      | _ -> raise error


      As you can probably tell from my code, I wrote a type for every nonterminal, and then had a method for every production of that nonterminal.



      (*expr -> T E* *)
      type expr =
      | Expression of t eprime


      (*T -> F T*)
      type t =
      | F of f * tprime

      (*E* -> + T E*
      E* -> - T E*
      E* -> ε *)
      type eprime =
      | Add of t eprime
      | Minus of t eprime
      | Eempty


      (*T* -> TIMES F T*
      T* -> / F T*
      T* -> ε*)
      type tprime =
      | Divide of f * tprime
      | Times of f * tprime
      | TEmpty

      (*F -> LPAREN E RPAREN
      F -> Literal
      F -> TRUE
      F -> FALSE
      F -> ID*)
      type f =
      | ExpressionParen of expr
      | Literal of int
      | BoolLit of bool
      | Id of string


      But I don't know my approach keeps too much unnecessary information than a AST would normally shake out (I imagine an AST to be a parse tree that shakes and rids itself of unnecessary leaves). So far, I've just left off the parentheses and semi colons. I'm afraid I'm leaving too much on by having type t, type f, type tprime, type eprime in my AST. But if I were to remove them, I wouldn't know how to write the type expr in my AST.










      share|improve this question













      I wrote a predictive parser for a LL1 grammar. Each nonterminal A has a corresponding parseA method, which takes in a tokenlist, and returns the remainder of tokenlist and a parse tree.



      I'm confused about which AST method to call in my parser. Is there a general approach to figuring this out?



      This is my attempt:



      Take for instance a subsection of my grammar:



      expr -> t eprime 
      eprime -> PLUS t eprime | MINUS t eprime | ε
      t -> t tprime
      tprime -> TIMES f tprime | DIVIDE f tprime | ε
      f -> LPAREN expr RPAREN | LITERAL | TRUE | FALSE | ID


      I have four parse methods, one for each nonterminal.



      let parseExpr tokenlist =
      match tokenlist.head with
      | "LPAREN" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "LITERAL" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "TRUE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "FALSE" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))
      | "ID" -> let t_expr tokenlist_t = next tokenlist |> parseExpr in
      let e_expr tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Expression(t_expr, e_expr))


      let parseEPrime tokenlist =
      match tokenlist with
      | "PLUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
      let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Add(expr_t, expr_eprime))
      | "MINUS" -> let expr_t tokenlist_t = next tokenlist |> parseT in
      let expr_eprime tokenlist_e = parseEPrime tokenlist_t in
      (tokenlist_e, Ast.Minus(expr_t, expr_eprime))
      | "SEMI" -> (tokenlist, )
      | "RPAREN" -> (tokenlist, )
      | _ -> raise error


      let parseT tokenlist =
      match tokenlist.lookathead with
      | "LPAREN" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "LITERAL" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.Literal(expr_f, expr_tprime))
      | "TRUE" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "FALSE" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | "ID" -> let expr_f tokenlist_f = parseF tokenlist in
      let expr_tprime tokenlist_tprime = parseTprime tokenlist_f in
      (tokenlist_tprime, Ast.F(expr_f, expr_tprime))
      | _-> raise error

      let parseTprime tokenlist =
      match tokenlist.lookathead with
      | "TIMES" -> let expr_f tokenlist_f = next tokenlist |> parseF in
      let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
      (tokenlist_tprime, Ast.Times(expr_f, expr_tprime))
      | "DIVIDE" -> let expr_f tokenlist_f = next tokenlist |> parseF in
      let expr_tprime tokenlist_tprime = parseTPrime tokenlist_f in
      (tokenlist_tprime, Ast.Divide(expr_f, expr_tprime))
      | "PLUS" -> (tokenlist, )
      | "MINUS" -> (tokenlist, )
      | "SEMI" -> (tokenlist, )
      | "RPAREN" -> (tokenlist, )
      | _ -> raise error

      let parseF tokenlist =
      match tokenlist.lookathead with
      | "LPAREN" -> let expr tokenlist_expr = next tokenlist |> parseE in
      match next tokenlist_expr with
      | "RPAREN" -> (next tokenlist_expr, Ast.ExpressionParen(expr))
      | "LITERAL" -> (next tokenlist, Ast.FLiteral)
      | "TRUE" -> (next tokenlist, Ast.BoolLit)
      | "FALSE" -> (next tokenlist, Ast.FBool)
      | "ID" -> (next tokenlist, Ast.Id)
      | _ -> raise error


      As you can probably tell from my code, I wrote a type for every nonterminal, and then had a method for every production of that nonterminal.



      (*expr -> T E* *)
      type expr =
      | Expression of t eprime


      (*T -> F T*)
      type t =
      | F of f * tprime

      (*E* -> + T E*
      E* -> - T E*
      E* -> ε *)
      type eprime =
      | Add of t eprime
      | Minus of t eprime
      | Eempty


      (*T* -> TIMES F T*
      T* -> / F T*
      T* -> ε*)
      type tprime =
      | Divide of f * tprime
      | Times of f * tprime
      | TEmpty

      (*F -> LPAREN E RPAREN
      F -> Literal
      F -> TRUE
      F -> FALSE
      F -> ID*)
      type f =
      | ExpressionParen of expr
      | Literal of int
      | BoolLit of bool
      | Id of string


      But I don't know my approach keeps too much unnecessary information than a AST would normally shake out (I imagine an AST to be a parse tree that shakes and rids itself of unnecessary leaves). So far, I've just left off the parentheses and semi colons. I'm afraid I'm leaving too much on by having type t, type f, type tprime, type eprime in my AST. But if I were to remove them, I wouldn't know how to write the type expr in my AST.







      parsing ocaml abstract-syntax-tree






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 10 at 2:24









      Matt

      4751624




      4751624
























          2 Answers
          2






          active

          oldest

          votes

















          up vote
          1
          down vote



          accepted










          Given an AST defined as such:



          type expr =
          | Add of expr * expr
          | Minus of expr * expr
          | Times of expr * expr
          | Divide of expr * expr
          | IntLit of int
          | BoolLit of bool
          | Id of string


          You can adjust your parse functions to return such an AST by making the Prime functions take the left operand as an argument like this:



          let parseExpr tokens =
          let (lhs, remainingTokens) = parseT tokens in
          parseExprPrime lhs remainingTokens

          let parseExprPrime lhs tokens = match tokenlist.lookahead with
          | PLUS :: tokens ->
          let (rhs, remainingTokens) = parseT (next tokens) in
          parseExprPrime (Add (lhs, rhs)) remainingTokens
          | MINUS :: tokens ->
          let (rhs, remainingTokens) = parseT (next tokens) in
          parseExprPrime (Minus (lhs, rhs)) remainingTokens
          | tokens ->
          lhs, tokens


          parseT and parseTPrime would look the same (except with multiplication and division of course) and parseF would stay almost exactly as-is, except that Ast.ExpressionParen(expr) would just be expr because I've also removed the ExpressionParen case from the AST definition.



          Note that it's not necessary to distinguish between legal and illegal tokens here. It's okay to just return lhs, tokens both for legal tokens like ; or ) and for illegal tokens. In the latter case, the illegal token will eventually detected by a calling parser - no need to detect the error in multiple places. The same is true for the expression rule: if tokens starts with an illegal token, that will be detected by parseF, so there's no need to check this here. Nor is there any need to repeat the same code four times, so you can just call parseT and parseExprPrime without even looking at the current token and those functions will take care of it.





          As for whether simplifying the AST like this is worth it - let's consider a function eval: expr -> int as a case study (and let's ignore BoolLit and Id for that purpose). Using the original definition it would look like this:



          let rec eval = function
          | Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime

          and evalEPrime lhsValue = function
          | Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
          | Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
          | Eempty -> lhsValue

          and evalT = function
          | T (lhs, tprime) -> evalTPrime (evalF lhs) tprime

          and evalTPrime lhsValue = function
          | Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
          | Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
          | TEmpty -> lhsValue

          and evalF = function
          | ExpressionParen expr -> eval expr
          | IntLit i -> i


          Using the simplified defintiion it would instead be:



          let rec eval = function
          | Add (lhs, rhs) -> eval lhs + eval rhs
          | Minus (lhs, rhs) -> eval lhs - eval rhs
          | Times (lhs, rhs) -> eval lhs * eval rhs
          | Divide (lhs, rhs) -> eval lhs / eval rhs
          | IntLit i -> i


          So I'd say the simplified version definitely improves working with the AST and I'd consider it worth it.






          share|improve this answer




























            up vote
            0
            down vote













            It seems true that if you have a type for every nonterminal you'll end up with tree that's more on the concrete side (similar to a parse tree) than the abstract side.



            I don't know that this is so bad, it's still a good representation of the code.



            One way to look at it is that your grammar is so simple and streamlined that there's not a lot of incidental punctuation that needs to be left out to make the tree more abstract.



            You could probably unify the types for expressions and terms. In other words, you could use just one internal node type for your expression trees. Once the precedences have been sorted out in the parsing, both expressions and terms are a list of subexpressions with operators between them.






            share|improve this answer





















            • I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
              – Matt
              Nov 10 at 3:08










            • The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
              – Jeffrey Scofield
              Nov 10 at 4:37










            • I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
              – Matt
              Nov 10 at 4:59










            • First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
              – Jeffrey Scofield
              Nov 10 at 5:09













            Your Answer






            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "1"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            convertImagesToLinks: true,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: 10,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














             

            draft saved


            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53235509%2focaml-how-to-construct-ast-during-ll-parsing-without-stack%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            2 Answers
            2






            active

            oldest

            votes








            2 Answers
            2






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes








            up vote
            1
            down vote



            accepted










            Given an AST defined as such:



            type expr =
            | Add of expr * expr
            | Minus of expr * expr
            | Times of expr * expr
            | Divide of expr * expr
            | IntLit of int
            | BoolLit of bool
            | Id of string


            You can adjust your parse functions to return such an AST by making the Prime functions take the left operand as an argument like this:



            let parseExpr tokens =
            let (lhs, remainingTokens) = parseT tokens in
            parseExprPrime lhs remainingTokens

            let parseExprPrime lhs tokens = match tokenlist.lookahead with
            | PLUS :: tokens ->
            let (rhs, remainingTokens) = parseT (next tokens) in
            parseExprPrime (Add (lhs, rhs)) remainingTokens
            | MINUS :: tokens ->
            let (rhs, remainingTokens) = parseT (next tokens) in
            parseExprPrime (Minus (lhs, rhs)) remainingTokens
            | tokens ->
            lhs, tokens


            parseT and parseTPrime would look the same (except with multiplication and division of course) and parseF would stay almost exactly as-is, except that Ast.ExpressionParen(expr) would just be expr because I've also removed the ExpressionParen case from the AST definition.



            Note that it's not necessary to distinguish between legal and illegal tokens here. It's okay to just return lhs, tokens both for legal tokens like ; or ) and for illegal tokens. In the latter case, the illegal token will eventually detected by a calling parser - no need to detect the error in multiple places. The same is true for the expression rule: if tokens starts with an illegal token, that will be detected by parseF, so there's no need to check this here. Nor is there any need to repeat the same code four times, so you can just call parseT and parseExprPrime without even looking at the current token and those functions will take care of it.





            As for whether simplifying the AST like this is worth it - let's consider a function eval: expr -> int as a case study (and let's ignore BoolLit and Id for that purpose). Using the original definition it would look like this:



            let rec eval = function
            | Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime

            and evalEPrime lhsValue = function
            | Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
            | Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
            | Eempty -> lhsValue

            and evalT = function
            | T (lhs, tprime) -> evalTPrime (evalF lhs) tprime

            and evalTPrime lhsValue = function
            | Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
            | Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
            | TEmpty -> lhsValue

            and evalF = function
            | ExpressionParen expr -> eval expr
            | IntLit i -> i


            Using the simplified defintiion it would instead be:



            let rec eval = function
            | Add (lhs, rhs) -> eval lhs + eval rhs
            | Minus (lhs, rhs) -> eval lhs - eval rhs
            | Times (lhs, rhs) -> eval lhs * eval rhs
            | Divide (lhs, rhs) -> eval lhs / eval rhs
            | IntLit i -> i


            So I'd say the simplified version definitely improves working with the AST and I'd consider it worth it.






            share|improve this answer

























              up vote
              1
              down vote



              accepted










              Given an AST defined as such:



              type expr =
              | Add of expr * expr
              | Minus of expr * expr
              | Times of expr * expr
              | Divide of expr * expr
              | IntLit of int
              | BoolLit of bool
              | Id of string


              You can adjust your parse functions to return such an AST by making the Prime functions take the left operand as an argument like this:



              let parseExpr tokens =
              let (lhs, remainingTokens) = parseT tokens in
              parseExprPrime lhs remainingTokens

              let parseExprPrime lhs tokens = match tokenlist.lookahead with
              | PLUS :: tokens ->
              let (rhs, remainingTokens) = parseT (next tokens) in
              parseExprPrime (Add (lhs, rhs)) remainingTokens
              | MINUS :: tokens ->
              let (rhs, remainingTokens) = parseT (next tokens) in
              parseExprPrime (Minus (lhs, rhs)) remainingTokens
              | tokens ->
              lhs, tokens


              parseT and parseTPrime would look the same (except with multiplication and division of course) and parseF would stay almost exactly as-is, except that Ast.ExpressionParen(expr) would just be expr because I've also removed the ExpressionParen case from the AST definition.



              Note that it's not necessary to distinguish between legal and illegal tokens here. It's okay to just return lhs, tokens both for legal tokens like ; or ) and for illegal tokens. In the latter case, the illegal token will eventually detected by a calling parser - no need to detect the error in multiple places. The same is true for the expression rule: if tokens starts with an illegal token, that will be detected by parseF, so there's no need to check this here. Nor is there any need to repeat the same code four times, so you can just call parseT and parseExprPrime without even looking at the current token and those functions will take care of it.





              As for whether simplifying the AST like this is worth it - let's consider a function eval: expr -> int as a case study (and let's ignore BoolLit and Id for that purpose). Using the original definition it would look like this:



              let rec eval = function
              | Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime

              and evalEPrime lhsValue = function
              | Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
              | Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
              | Eempty -> lhsValue

              and evalT = function
              | T (lhs, tprime) -> evalTPrime (evalF lhs) tprime

              and evalTPrime lhsValue = function
              | Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
              | Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
              | TEmpty -> lhsValue

              and evalF = function
              | ExpressionParen expr -> eval expr
              | IntLit i -> i


              Using the simplified defintiion it would instead be:



              let rec eval = function
              | Add (lhs, rhs) -> eval lhs + eval rhs
              | Minus (lhs, rhs) -> eval lhs - eval rhs
              | Times (lhs, rhs) -> eval lhs * eval rhs
              | Divide (lhs, rhs) -> eval lhs / eval rhs
              | IntLit i -> i


              So I'd say the simplified version definitely improves working with the AST and I'd consider it worth it.






              share|improve this answer























                up vote
                1
                down vote



                accepted







                up vote
                1
                down vote



                accepted






                Given an AST defined as such:



                type expr =
                | Add of expr * expr
                | Minus of expr * expr
                | Times of expr * expr
                | Divide of expr * expr
                | IntLit of int
                | BoolLit of bool
                | Id of string


                You can adjust your parse functions to return such an AST by making the Prime functions take the left operand as an argument like this:



                let parseExpr tokens =
                let (lhs, remainingTokens) = parseT tokens in
                parseExprPrime lhs remainingTokens

                let parseExprPrime lhs tokens = match tokenlist.lookahead with
                | PLUS :: tokens ->
                let (rhs, remainingTokens) = parseT (next tokens) in
                parseExprPrime (Add (lhs, rhs)) remainingTokens
                | MINUS :: tokens ->
                let (rhs, remainingTokens) = parseT (next tokens) in
                parseExprPrime (Minus (lhs, rhs)) remainingTokens
                | tokens ->
                lhs, tokens


                parseT and parseTPrime would look the same (except with multiplication and division of course) and parseF would stay almost exactly as-is, except that Ast.ExpressionParen(expr) would just be expr because I've also removed the ExpressionParen case from the AST definition.



                Note that it's not necessary to distinguish between legal and illegal tokens here. It's okay to just return lhs, tokens both for legal tokens like ; or ) and for illegal tokens. In the latter case, the illegal token will eventually detected by a calling parser - no need to detect the error in multiple places. The same is true for the expression rule: if tokens starts with an illegal token, that will be detected by parseF, so there's no need to check this here. Nor is there any need to repeat the same code four times, so you can just call parseT and parseExprPrime without even looking at the current token and those functions will take care of it.





                As for whether simplifying the AST like this is worth it - let's consider a function eval: expr -> int as a case study (and let's ignore BoolLit and Id for that purpose). Using the original definition it would look like this:



                let rec eval = function
                | Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime

                and evalEPrime lhsValue = function
                | Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
                | Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
                | Eempty -> lhsValue

                and evalT = function
                | T (lhs, tprime) -> evalTPrime (evalF lhs) tprime

                and evalTPrime lhsValue = function
                | Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
                | Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
                | TEmpty -> lhsValue

                and evalF = function
                | ExpressionParen expr -> eval expr
                | IntLit i -> i


                Using the simplified defintiion it would instead be:



                let rec eval = function
                | Add (lhs, rhs) -> eval lhs + eval rhs
                | Minus (lhs, rhs) -> eval lhs - eval rhs
                | Times (lhs, rhs) -> eval lhs * eval rhs
                | Divide (lhs, rhs) -> eval lhs / eval rhs
                | IntLit i -> i


                So I'd say the simplified version definitely improves working with the AST and I'd consider it worth it.






                share|improve this answer












                Given an AST defined as such:



                type expr =
                | Add of expr * expr
                | Minus of expr * expr
                | Times of expr * expr
                | Divide of expr * expr
                | IntLit of int
                | BoolLit of bool
                | Id of string


                You can adjust your parse functions to return such an AST by making the Prime functions take the left operand as an argument like this:



                let parseExpr tokens =
                let (lhs, remainingTokens) = parseT tokens in
                parseExprPrime lhs remainingTokens

                let parseExprPrime lhs tokens = match tokenlist.lookahead with
                | PLUS :: tokens ->
                let (rhs, remainingTokens) = parseT (next tokens) in
                parseExprPrime (Add (lhs, rhs)) remainingTokens
                | MINUS :: tokens ->
                let (rhs, remainingTokens) = parseT (next tokens) in
                parseExprPrime (Minus (lhs, rhs)) remainingTokens
                | tokens ->
                lhs, tokens


                parseT and parseTPrime would look the same (except with multiplication and division of course) and parseF would stay almost exactly as-is, except that Ast.ExpressionParen(expr) would just be expr because I've also removed the ExpressionParen case from the AST definition.



                Note that it's not necessary to distinguish between legal and illegal tokens here. It's okay to just return lhs, tokens both for legal tokens like ; or ) and for illegal tokens. In the latter case, the illegal token will eventually detected by a calling parser - no need to detect the error in multiple places. The same is true for the expression rule: if tokens starts with an illegal token, that will be detected by parseF, so there's no need to check this here. Nor is there any need to repeat the same code four times, so you can just call parseT and parseExprPrime without even looking at the current token and those functions will take care of it.





                As for whether simplifying the AST like this is worth it - let's consider a function eval: expr -> int as a case study (and let's ignore BoolLit and Id for that purpose). Using the original definition it would look like this:



                let rec eval = function
                | Expression (lhs, eprime) -> evalEPrime (evalT lhs) eprime

                and evalEPrime lhsValue = function
                | Add (rhs, rest) -> evalEPrime (lhsValue + evalT rhs) rest
                | Minus (rhs, rest) -> evalEPrime (lhsValue - evalT rhs) rest
                | Eempty -> lhsValue

                and evalT = function
                | T (lhs, tprime) -> evalTPrime (evalF lhs) tprime

                and evalTPrime lhsValue = function
                | Times (rhs, rest) -> evalTPrime (lhsValue * evalF rhs) rest
                | Divide (rhs, rest) -> evalTPrime (lhsValue / evalF rhs) rest
                | TEmpty -> lhsValue

                and evalF = function
                | ExpressionParen expr -> eval expr
                | IntLit i -> i


                Using the simplified defintiion it would instead be:



                let rec eval = function
                | Add (lhs, rhs) -> eval lhs + eval rhs
                | Minus (lhs, rhs) -> eval lhs - eval rhs
                | Times (lhs, rhs) -> eval lhs * eval rhs
                | Divide (lhs, rhs) -> eval lhs / eval rhs
                | IntLit i -> i


                So I'd say the simplified version definitely improves working with the AST and I'd consider it worth it.







                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered Nov 11 at 0:41









                sepp2k

                290k36592604




                290k36592604
























                    up vote
                    0
                    down vote













                    It seems true that if you have a type for every nonterminal you'll end up with tree that's more on the concrete side (similar to a parse tree) than the abstract side.



                    I don't know that this is so bad, it's still a good representation of the code.



                    One way to look at it is that your grammar is so simple and streamlined that there's not a lot of incidental punctuation that needs to be left out to make the tree more abstract.



                    You could probably unify the types for expressions and terms. In other words, you could use just one internal node type for your expression trees. Once the precedences have been sorted out in the parsing, both expressions and terms are a list of subexpressions with operators between them.






                    share|improve this answer





















                    • I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
                      – Matt
                      Nov 10 at 3:08










                    • The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
                      – Jeffrey Scofield
                      Nov 10 at 4:37










                    • I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
                      – Matt
                      Nov 10 at 4:59










                    • First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
                      – Jeffrey Scofield
                      Nov 10 at 5:09

















                    up vote
                    0
                    down vote













                    It seems true that if you have a type for every nonterminal you'll end up with tree that's more on the concrete side (similar to a parse tree) than the abstract side.



                    I don't know that this is so bad, it's still a good representation of the code.



                    One way to look at it is that your grammar is so simple and streamlined that there's not a lot of incidental punctuation that needs to be left out to make the tree more abstract.



                    You could probably unify the types for expressions and terms. In other words, you could use just one internal node type for your expression trees. Once the precedences have been sorted out in the parsing, both expressions and terms are a list of subexpressions with operators between them.






                    share|improve this answer





















                    • I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
                      – Matt
                      Nov 10 at 3:08










                    • The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
                      – Jeffrey Scofield
                      Nov 10 at 4:37










                    • I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
                      – Matt
                      Nov 10 at 4:59










                    • First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
                      – Jeffrey Scofield
                      Nov 10 at 5:09















                    up vote
                    0
                    down vote










                    up vote
                    0
                    down vote









                    It seems true that if you have a type for every nonterminal you'll end up with tree that's more on the concrete side (similar to a parse tree) than the abstract side.



                    I don't know that this is so bad, it's still a good representation of the code.



                    One way to look at it is that your grammar is so simple and streamlined that there's not a lot of incidental punctuation that needs to be left out to make the tree more abstract.



                    You could probably unify the types for expressions and terms. In other words, you could use just one internal node type for your expression trees. Once the precedences have been sorted out in the parsing, both expressions and terms are a list of subexpressions with operators between them.






                    share|improve this answer












                    It seems true that if you have a type for every nonterminal you'll end up with tree that's more on the concrete side (similar to a parse tree) than the abstract side.



                    I don't know that this is so bad, it's still a good representation of the code.



                    One way to look at it is that your grammar is so simple and streamlined that there's not a lot of incidental punctuation that needs to be left out to make the tree more abstract.



                    You could probably unify the types for expressions and terms. In other words, you could use just one internal node type for your expression trees. Once the precedences have been sorted out in the parsing, both expressions and terms are a list of subexpressions with operators between them.







                    share|improve this answer












                    share|improve this answer



                    share|improve this answer










                    answered Nov 10 at 2:59









                    Jeffrey Scofield

                    46.8k24877




                    46.8k24877












                    • I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
                      – Matt
                      Nov 10 at 3:08










                    • The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
                      – Jeffrey Scofield
                      Nov 10 at 4:37










                    • I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
                      – Matt
                      Nov 10 at 4:59










                    • First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
                      – Jeffrey Scofield
                      Nov 10 at 5:09




















                    • I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
                      – Matt
                      Nov 10 at 3:08










                    • The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
                      – Jeffrey Scofield
                      Nov 10 at 4:37










                    • I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
                      – Matt
                      Nov 10 at 4:59










                    • First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
                      – Jeffrey Scofield
                      Nov 10 at 5:09


















                    I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
                    – Matt
                    Nov 10 at 3:08




                    I can see how I would construct one interal node type for expression trees. However, I'm lost as to how return list of subexpressions in the sub parse routines such that I can return Ast.Expression(x, operator, y) in parseExpr
                    – Matt
                    Nov 10 at 3:08












                    The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
                    – Jeffrey Scofield
                    Nov 10 at 4:37




                    The pairs of types (e, eprime) and (t, tprime) are obviously similar, and you can probably just use (e, eprime) for both (with 4 operators instead of 2). But you don't have to use a radically different structure. You can still have Expression of e * eprime.
                    – Jeffrey Scofield
                    Nov 10 at 4:37












                    I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
                    – Matt
                    Nov 10 at 4:59




                    I get that - I suppose I'm asking how I would still get the operators - in my current structure, if i see Add in eprime, I call Ast.Add. If I wait till ive trickled through all the productions from expr and then from expr call Ast.Expression(e, eprime) - how does it not lose the operands?
                    – Matt
                    Nov 10 at 4:59












                    First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
                    – Jeffrey Scofield
                    Nov 10 at 5:09






                    First, don't get me wrong, I think your current code is fine. Like I said it's not too concrete, really, it's just that the grammar is very streamlined. But to answer your question, just imagine that you define eprime with 5 variants (Add, Minus, Divide, Times, Empty). Then you can use the new eprime instead of the old eprime and tprime.You can still use Ast.Add and Ast.Divide like before, now they're just both constructors of eprime.
                    – Jeffrey Scofield
                    Nov 10 at 5:09




















                     

                    draft saved


                    draft discarded



















































                     


                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53235509%2focaml-how-to-construct-ast-during-ll-parsing-without-stack%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    Guess what letter conforming each word

                    Port of Spain

                    Run scheduled task as local user group (not BUILTIN)