{ 

 (* HLL lexer *)

 open Parser;

 exception LexicalError of string * int * int (* (message, loc1, loc2) *)

 fun lexerError lexbuf s = 
     raise LexicalError (s, getLexemeStart lexbuf, getLexemeEnd lexbuf);

 val commentStart = ref 0;  (* Start of outermost comment being scanned *)
 
 fun commentNotClosed lexbuf =
     raise LexicalError ("Comment not terminated", 
                         !commentStart, getLexemeEnd lexbuf);
     
 val commentDepth = ref 0;  (* Current comment nesting *)


 (* Scan keywords as identifiers and use this function to distinguish them. *)
 (* If the set of keywords is large, use an auxiliary hashtable.            *)

 fun keyword s =
     case s of
         "int" => INT
       | "bool" => BOOL
       | "char" => CHAR
       | "float" => FLOAT
       | "not" => NOT
       | "or" => OR
       | "type" => TYPE
       | "array" => ARRAY
       | "if" => IF
       | "then" => THEN
       | "else" => ELSE
       | "match" => MATCH
       | "with" => WITH
       | "let" => LET
       | "rec" => REC
       | "in" => IN
       | "val" => VAL
       | "of" => OF
       | "and" => AND
       | "string" => STRING
       | "end" => END
       | "float_of_int" => FTOI
       | "int_of_float" => ITOF
       | "list" => LIST
       | "unit" => UNITT
       | "true" => TRUE
       | "false" => FALSE
       | "functor" => FUNCTOR
       | "for" => FOR
       | "to" => TO
       | "do" => DO
       | "main" => MAIN
       | "return" => RETURN
       | "module" => MODULE
       | "external" => EXT
       | "until" => UNTIL
       | "final" => FINAL
       | "fn" => FN
       | "sig" => SIG
 (*| "signature" => SIGNATURE*)
       (*| "is" => IS*)
       | "given" => GIVEN
       | "assert" => ASSERT
 (*| "mod" => MOD*)
       | "struct" => STRUCT
       | "typealias" => TYPEALIAS
       | _ => stringval (Symbol.symbol s);
}


rule Token = parse
    [` ` `\t` `\n` `\r`]     { Token lexbuf }
  | [`0`-`9`]+          { case Int.fromString (getLexeme lexbuf) of
			    NONE   => lexerError lexbuf "internal error"
                          | SOME i => intval i
			}
  | [`0`-`9`]+`.`[`0`-`9`]+([`e``E`]`-`?[`0`-`9`]+)?
      { case Real.fromString (getLexeme lexbuf) of
                               NONE   => lexerError lexbuf "internal error"
			     | SOME i => floatval i
                        }  

  | [`a`-`z``A`-`Z`][`a`-`z``A`-`Z``0`-`9``_`]*`'`*
                        { (* Note I have taken out a dot from the end of
                           * the regular expression rule here since we don't
                           * want identifiers to contain dots, since this will
                           * will most likely be an access to a module member*)
                           keyword (getLexeme lexbuf) }
  | `'`[`a`-`z``A`-`Z`][`a`-`z``A`-`Z``0`-`9``_``.`]*`'`*
                        { TYVAR (Symbol.symbol (getLexeme lexbuf)) }
  | `"`([^`\\``"`]*|"\\\\"|"\\n")`"` { quoteval (getLexeme lexbuf)  }
  | "(*"                { commentStart := getLexemeStart lexbuf;
                          commentDepth := 1; 
                          SkipComment lexbuf; Token lexbuf }
  | "//"                { DiscardRestOfLine lexbuf; Token lexbuf }
  | `=`                 { EQUALS }
  | `<`                 { LESS }
  | `>`                 { GREATER }
  | "<="                { LTEQ }
  | ">="                { GTEQ }
  | "->"                { ARROW }
  | "=>"                { WARROW }
  | `(`                 { LPAREN }
  | `)`                 { RPAREN }
  | `{`                 { LBRACE }
  | `}`                 { RBRACE }
  | `[`                 { LSQ }
  | `]`                 { RSQ }
  | `,`                 { COMMA }
  | eof                 { EOF }
  | `&`                 { AMP }
  | `'`                 { TICK } 
  | `|`                 { BAR }
  | "::"                { CONS }
  | `:`                 { COLON }
  | `.`                 { DOT }
  | `+`                 { PLUS }
  | `-`                 { MINUS }
  | `/`                 { DIV }
  | `*`                 { STAR }
  | `_`                 { USCORE }
| ":=" { ASSIGN}
| ";" {SEMICOLON}
  | _                   { lexerError lexbuf "Illegal symbol in input" }

and SkipComment = parse
     "*)"               { commentDepth := !commentDepth - 1;  
                          if !commentDepth = 0 then ()
                          else SkipComment lexbuf 
                        } 
   | "(*"               { commentDepth := !commentDepth + 1; 
                          SkipComment lexbuf }
   | (eof | `\^Z`)      { commentNotClosed lexbuf }
   | _                  { SkipComment lexbuf }

and DiscardRestOfLine = parse 
     `\n` { () }
   | _ { DiscardRestOfLine lexbuf }

;
