{ 

 (* Grail parser.  Adapted from example in Moscow ML distribution. *)

open Lexing Parser;

exception LexicalError of string * int * int (* (message, loc1, loc2) *)

fun lexerError lexbuf s = 
     raise LexicalError (s, getLexemeStart lexbuf, getLexemeEnd lexbuf);

val commentStart = ref 0;  (* Start of outermost comment being scanned *)
 
fun commentNotClosed lexbuf =
     raise LexicalError ("Comment not terminated", 
                         !commentStart, getLexemeEnd lexbuf);
     
val commentDepth = ref 0;  (* Current comment nesting *)


fun getString lexbuf = 
     let 
	val s = getLexeme lexbuf
     in case String.fromString s of 
         SOME _ => quoteval s (* return the unconverted string:  we're only checking for errors *)
       | NONE => lexerError lexbuf "Badly-formed string literal"
     end

 exception noSuchWord
val keyword_table: (string,token) Polyhash.hash_table
       = Polyhash.mkTable(Polyhash.hash, op=) (64, noSuchWord)

(* Hashtable for identifiers *)

val () =
   List.app (fn (str,tok) => Polyhash.insert keyword_table (str, tok))
 [
  ("alias", ALIAS),
  ("class", CLASS),
  ("field", FIELD),
  ("method", METHOD),
  ("let", LET),
  ("fun", FUN),
  ("in", IN),
  ("and", AND),
  ("end", END),
  ("val", VAL),
  ("if", IF),
  ("then", THEN),
  ("else", ELSE),
  ("case", CASE),
  ("of", OF),
  ("new", NEW),
  ("checkcast", CHECKCAST),
  ("extends", EXTENDS),
  ("implements", IMPLEMENTS),
  ("instanceof", INSTANCEOF),
  ("invokestatic", INVOKESTATIC),
  ("invokevirtual", INVOKEVIRTUAL),
  ("invokespecial", INVOKESPECIAL),
  ("invokeinterface", INVOKEINTERFACE),
  ("getfield", GETFIELD),
  ("putfield", PUTFIELD),
  ("getstatic", GETSTATIC),
  ("putstatic", PUTSTATIC),
  ("add", ADD),
  ("mul", MUL),
  ("sub", SUB),
  ("div", DIV),
  ("mod", MOD),
  ("iand", IAND),
  ("ior", IOR),
  ("ixor", IXOR),
  ("ishr", ISHR),
  ("ishl", ISHL),
  ("iushr", IUSHR        ),
  ("null", NULL),
  ("int", INT),
  ("boolean", BOOLEAN),
  ("float", FLOAT),
  ("ref", REF),
  ("void", VOID),
  ("public", PUBLIC),
  ("private", PRIVATE),
  ("protected", PROTECTED),
  ("static", STATIC),
  ("final", FINAL),
  ("makearray", MAKE),
  ("get", GET),
  ("set", SET),
  ("length", LENGTH),
  ("empty", EMPTY),
  ("itof", ITOF),
  ("ftoi", FTOI),
  ("string", STRING),
  ("layout", LAYOUT),
  ("type", TYPE),
  ("$tag", TAG),
  ("$fields", FIELDS)
]

fun mkKeyword lexbuf =
  let val s = getLexeme lexbuf in
    Polyhash.find keyword_table s
    handle noSuchWord => stringval s
  end
}


rule Token = parse
    [` ` `\t` `\n` `\r`]     { Token lexbuf }
  | `-`?[`0`-`9`]+      { case Int.fromString (getLexeme lexbuf) of
                               NONE   => lexerError lexbuf "internal error"
			    | SOME i => intval i
			}
  | `-`?[`0`-`9`]+`.`[`0`-`9`]+([`e``E`]`-`?[`0`-`9`]+)?
      { case Real.fromString (getLexeme lexbuf) of
                               NONE   => lexerError lexbuf "internal error"
			    | SOME i => floatval i
                        }  

  | [`_``a`-`z``A`-`Z``$``?`][`a`-`z``A`-`Z``0`-`9``_``.``:``#``$``'`]*(`<`[`a`-`z`]+`>`)?`'`*
                        { mkKeyword lexbuf }
  | `<`[`a`-`z`]+`>` { stringval (getLexeme lexbuf) }
  | `"`([^`"`]|"\\\"")*`"`
                        { getString lexbuf }  (* I'm not entirely confident about this *)
(*  | `"`([^`\\``"`]*|"\\\\"|`\\``"`)`"` { quoteval (getLexeme lexbuf)  } weirdness *)
(*  | `"`([^`\\``"`]*|"\\\\"|"\\n")`"` { quoteval (getLexeme lexbuf)  }*)
  | "(*"                { commentStart := getLexemeStart lexbuf;
                          commentDepth := 1; 
                          SkipComment lexbuf; Token lexbuf }
  | "//"                { DiscardRestOfLine lexbuf; Token lexbuf }
  | `=`                 { EQUALS }
  | "<>"                { NOTEQUAL }
  | `<`                 { LESS }
  | "<="                { LESSEQ }
  | `>`                 { GREATER }
  | ">="                { GREATEREQ }
  | "=>"                { ARROW }
  | ".."                { DOTDOT }
  | `(`                 { LPAREN }
  | `)`                 { RPAREN }
  | `{`                 { LBRACE }
  | `}`                 { RBRACE }
  | `[`                 { LSQ }
  | `]`                 { RSQ }
  | `|`                 { BAR } 
  | `,`                 { COMMA }
  | eof                 { EOF }
  | _                   { lexerError lexbuf "Illegal symbol in input" }

and SkipComment = parse
     "*)"               { commentDepth := !commentDepth - 1;  
                          if !commentDepth = 0 then ()
                          else SkipComment lexbuf 
                        } 
   | "(*"               { commentDepth := !commentDepth + 1; 
                          SkipComment lexbuf }
   | (eof | `\^Z`)      { commentNotClosed lexbuf }
   | _                  { SkipComment lexbuf }

and DiscardRestOfLine = parse 
     (eof | `\n`) { () }
   | _ { DiscardRestOfLine lexbuf }

;
