(* Camelot lexer *)

{
open Parser;

exception LexicalError of string * int * int (* (message, loc1, loc2) *)

fun lexerError lexbuf s =
    raise LexicalError (s, getLexemeStart lexbuf, getLexemeEnd lexbuf);

val commentStart = ref 0;  (* Start of outermost comment being scanned *)

fun commentNotClosed lexbuf =
    raise LexicalError ("Comment not terminated",
		   !commentStart, getLexemeEnd lexbuf);

val commentDepth = ref 0;  (* Current comment nesting *)

fun getString lexbuf =
     let
	val s = getLexeme lexbuf
     in case String.fromString s of
         SOME _ => quoteval s (* return the unconverted string:  we're only checking for errors *)
       | NONE => lexerError lexbuf "Badly-formed string literal"
     end

exception noSuchWord
val keyword_table: (string,token) Polyhash.hash_table
       = Polyhash.mkTable(Polyhash.hash, op=) (64, noSuchWord)

(* Scan keywords as identifiers and use this function to distinguish them. *)

val () =
   List.app (fn (str,tok) => Polyhash.insert keyword_table (str, tok))
 [
 ( "int",       INT ),
 ( "bool",      BOOL ),
 ( "char",      CHAR ),
 ( "float",     FLOAT ),
 ( "string",    STRING ),
 ( "type",      TYPE ),
 ( "array",     ARRAY ),
 ( "begin",     BEGIN ),
 ( "end",       END ),
 ( "if",        IF ),
 ( "then",      THEN ),
 ( "else",      ELSE ),
 ( "match",     MATCH ),
 ( "with",      WITH ),
 ( "let",       LET ),
 ( "rec",       REC ),
 ( "in",        IN ),
 ( "val",       VAL ),
 ( "fun",       FUN ),
 ( "of",        OF ),
 ( "and",       AND ),
 ( "end",       END ),
 ( "unit",      UNITT ),
 ( "true",      TRUE ),
 ( "false",     FALSE ),
 ( "not",       NOT ),
 ( "mod",       MOD ),
 ( "lnot",      LNOT ),
 ( "land",      LAND ),
 ( "lor",       LOR ),
 ( "lxor",      LXOR ),
 ( "lsl",       LSL ),
 ( "lsr",       LSR ),
 ( "asr",       ASR ),
 ( "new",       NEW ),
 ( "class",     CLASS ),
 ( "classtype", CLASSTYPE ),
 ( "object",    OBJECT ),
 ( "method",    METHOD ),
 ( "field",     FIELD ),
 ( "virtual",   VIRTUAL ),
 ( "inherit",   INHERIT ),
 ( "implement", IMPLEMENT),
 ( "is",        IS ),
 ( "null",      NULL ),
 ( "isnull",    ISNULL ),
 ( "super",     SUPER),
 ( "maker",     MAKER)
]

fun mkKeyword lexbuf =
  let val s = getLexeme lexbuf in
    Polyhash.find keyword_table s
    handle noSuchWord => ident s
  end
}

rule Token = parse
    [` ` `\t` `\n` `\r`]     { Token lexbuf }
  | [`a`-`z``?`][`a`-`z``A`-`Z``0`-`9``_``%``$`]*`'`*  (* Allow compiler-generated names through *)
        { mkKeyword lexbuf }
  | `_`[`a`-`z``A`-`Z``0`-`9``_``%``$`]+`'`*  (* Allow compiler-generated names through *)
        { mkKeyword lexbuf }
  | "<init>"     { mkKeyword lexbuf }  (* for <init> *)
  | "<clinit>"     { mkKeyword lexbuf }  (* for <init> *)
  | [`0`-`9`]+          { case Int.fromString (getLexeme lexbuf) of
			    NONE   => lexerError lexbuf "internal error (Int.fromString)"
                          | SOME i => intval i
			}
  | [`0`-`9`]+`.`[`0`-`9`]+([`e``E`]`-`?[`0`-`9`]+)?
      { case Real.fromString (getLexeme lexbuf) of
                               NONE   => lexerError lexbuf "internal error (Real.fromString)"
			     | SOME i => floatval i
	  }
  | [`A`-`Z`][`a`-`z``A`-`Z``0`-`9``_``$`]*`'`*       { ConId (getLexeme lexbuf) }
  | [`A`-`Z``a`-`z`][`a`-`z``A`-`Z``0`-`9``_``.`]*    { ExtId (getLexeme lexbuf) }
  | `'`[`a`-`z``A`-`Z`][`a`-`z``A`-`Z``0`-`9``_``.`]* { TyVar (getLexeme lexbuf) }

  | `"`([^`"`]|"\\\"")*`"`
                        { getString lexbuf }  (* I'm not entirely confident about this *)
  | `'`_`'`             { charval (getLexeme lexbuf) }  (* Plain characters *)
  | `'``\`_`'`          { charval (getLexeme lexbuf) } (* '\n' etc *)
  | `'``\`[`0`-`9`][`0`-`9`][`0`-`9`]`'`  (* '\056' etc *)
                        { charval (getLexeme lexbuf) }
  | "(*"                { commentStart := getLexemeStart lexbuf;
                          commentDepth := 1;
                          SkipComment lexbuf; Token lexbuf }
  | "*)"                { lexerError lexbuf "Unmatched comment delimeter" }
  | "<-"                { UPDATE }
  | "<>"                { DIAMOND }
  | "<"                 { LESS }
  | "<="                { LEQ }
  | `+`                 { PLUS }
  | "+."                { PLUSDOT }
  | `-`                 { MINUS }
  | "-."                { MINUSDOT }
  | "->"                { ARROW }
  | `*`                 { STAR }
  | "*."                { STARDOT }
  | `/`                 { SLASH }
  | "/."                { SLASHDOT }
  | "//"                { DiscardRestOfLine lexbuf; Token lexbuf }
  | `(`                 { LPAREN }
  | "()"                { UNIT }
  | `)`                 { RPAREN }
  | `[`                 { LSQ }
  | `]`                 { RSQ }
  | `{`                 { LBRACE }
  | `}`                 { RBRACE }
  | `,`                 { COMMA }
  | `;`                 { SEMI }
  | `#`                 { HASH }
  | `!`                 { BANG }
  | "."                 { DOT }
  | "="                 { EQUALS }
  | "=>"                { WARROW }
  | ">"                 { GREATER }
  | ">="                { GTEQ }
  | "|"                 { BAR }
  | "||"                { BARBAR }
  | ":"                 { COLON }
  | "::"                { CONS }
  | ":>"                { COERCE }
  | "!"                 { BANG }
  | "@"                 { ATSIGN }
  | "&&"                { AMPAMP }
  | "^"                 { CARET }
  | `_`                 { USCORE }
  | eof                 { EOF }
  | _                   { lexerError lexbuf "Illegal symbol in input" }

and SkipComment = parse
     "*)"               { commentDepth := !commentDepth - 1;
                          if !commentDepth = 0 then ()
                          else SkipComment lexbuf
                        }
   | "(*"               { commentDepth := !commentDepth + 1;
                          SkipComment lexbuf }
   | (eof | `\^Z`)      { commentNotClosed lexbuf }
   | _                  { SkipComment lexbuf }

and DiscardRestOfLine = parse
     (eof| `\n`) { () }
   | _ { DiscardRestOfLine lexbuf }

;
