header {* Machine model for Grail *}

theory FunMachine = Finmap + MachineBasic:

subsection {* Heaps *}

text {* 
  The heap is a map from locations to objects.  
  Conceptually, objects consist of a class name together with a
  mapping of field names to values.  We use Burstall's
  idea~\cite{NipkowXX} of treating
  records (i.e. objects) on the heap as separate
  ``mini''-heaps themselves, so that a heap is split into
  three components: the object heap, the integer heap,
  and the reference heap.  The latter two heaps are
  modelled as total functions, which simplifies
  the formalization.  Of course, we must check
  that the semantics only examines
  the contents of heaps at non-empty locations.
  *}

types 
  oheap    = "locn \<leadsto>\<^sub>f cname"           -- {* object heap; a finite map *}
  iheap    = "ifldname \<Rightarrow> locn \<Rightarrow> int"  -- {* integer heap for each field *}
  rheap    = "rfldname \<Rightarrow> locn \<Rightarrow> ref"  -- {* reference heap for each field *}


text {*
  The next translations are included to fold up type definitions in printing.
  *}

translations
  "oheap"   <= (type)  "nat \<leadsto>\<^sub>f cname"
  "iheap"   <= (type)  "ifldname \<Rightarrow> nat \<Rightarrow> int"
  "rheap"   <= (type)  "rfldname \<Rightarrow> nat \<Rightarrow> ref"



subsection {* Envs *}

text {*
  Like heaps, envs are modelled as total functions for simplicity.
  If the source program is well-typed, it should never attempt to
  access an undefined part of the env. 

  A frame is a method name (which can
  be used for profiling), together with a env.
  For a small-step semantics we would also need 
  a return point and a place to env the return value.
  We do not need to include the class of the currently
  executing method because this can be retrieved from
  the @{text self} variable.  When executing a static
  method, @{text self} is set to null.
 *}

types
  ienv   = "iname \<Rightarrow> int"	  -- {* an integer env *}
  renv   = "rname \<Rightarrow> ref"          -- {* a reference env *}

text {*
  The next translations are included to fold up type definitions in printing.
  *}

translations
  "ienv"  <= (type)  "iname \<Rightarrow> int"
  "renv"  <= (type)  "rname \<Rightarrow> ref"

subsection {* Heaps, environments, and states *}

record heap = 
	oheap   :: oheap		 -- {* object heap     *}
	iheap   :: iheap		 -- {* integer heap    *}
	rheap   :: rheap		 -- {* references heap *}

record env =  (* or maybe just a pair *)
	ienv    :: ienv	         -- {* integer env  *}
	renv    :: renv	         -- {* ref env      *}

(* NB: compared with previous formulation, now we remove framestack,
  the invokecount becomes an invokedepth, used to calculate maxstack.
  FIXME: later on we could reinstate invokecount.
*)

record rescount = (* resource counting *)
        maxstack    :: int		 -- {* maximum depth of stack        *}  
        invokedepth :: int		 -- {* current INVOKE depth	     *}  
        callcount   :: int		 -- {* count of CALLs executed	     *}  
	clock	    :: int		 -- {* instruction counter	     *}

types state = "heap \<times> rescount"

(*  For now we remove frames since we give a functional semantics 
types frame    = "mname \<times> env"
from state: framestack  :: "frame list"      -- 

NB: previously, it was possible to estimate the real size of a framestack
using the method name component to retrieve parameter size for methods.
*)

constdefs
 oheap_fct  :: "state \<Rightarrow> oheap"     "oheap_fct s \<equiv> heap.oheap (fst s)"
 iheap_fct  :: "state \<Rightarrow> iheap"     "iheap_fct s \<equiv> heap.iheap (fst s)"
 rheap_fct  :: "state \<Rightarrow> rheap"     "rheap_fct s \<equiv> heap.rheap (fst s)"
 ienv_fct :: "env \<Rightarrow> ienv"          "ienv_fct e \<equiv> env.ienv e"
 renv_fct :: "env \<Rightarrow> renv"          "renv_fct e \<equiv> env.renv e"
 maxstack_fct :: "state \<Rightarrow> int"     "maxstack_fct s \<equiv> rescount.maxstack (snd s)"
 callcount_fct :: "state \<Rightarrow> int"    "callcount_fct s \<equiv> rescount.callcount (snd s)"
 invokedepth_fct :: "state \<Rightarrow> int"  "invokedepth_fct s \<equiv> rescount.invokedepth (snd s)"
 clock_fct :: "state \<Rightarrow> int"        "clock_fct s \<equiv> rescount.clock (snd s)"

translations
  "env" <= (type) "\<lparr>ienv :: ienv, renv :: renv\<rparr>"
  "env" <= (type) "\<lparr>ienv :: ienv, renv :: renv, \<dots>::'a\<rparr>"
  "heap" <= (type) "\<lparr>oheap :: oheap, iheap :: iheap, rheap :: rheap\<rparr>"
  "heap" <= (type) "\<lparr>oheap :: oheap, iheap :: iheap, rheap :: rheap, \<dots>::'a\<rparr>"
  "rescount" <= (type) 
	"\<lparr>maxstack :: int, invokedepth :: int, callcount :: int, clock :: int\<rparr>"
  "rescount" <= (type) 
	"\<lparr>maxstack :: int, invokedepth :: int, callcount :: int, clock :: int, \<dots>::'a\<rparr>"
  "state" <= (type) "heap \<times> rescount"


text {* The size of the heap can be recovered from the state.
  It's approximated simply as the size of the domain of @{text oheap}. *}

constdefs hpsize ::"state \<Rightarrow> int"
  "hpsize s == int (card (fmap_dom (oheap_fct s)))"


subsection  {* Values *}

text {* Values and assertions are introduced here because
  the syntax of programs can include them.  *}

text {* Values may be either integer values or reference values. *}

datatype val = IVal int | RVal ref
consts
  theival :: "val \<Rightarrow> int"
  therval :: "val \<Rightarrow> ref"
primrec
  "theival  (IVal i) = i"
primrec
  "therval  (RVal r) = r"


section {* Accessing and manipulating states *}

text {* We define some accessor functions for accessing components
  of the state.  These are treated as syntactic abbreviations,
  rather than functions. *}

(* NB: only priority of 1000 is high enough to override curried application in 
  writing "f s<x>".  We might try unifying these projections using axiomatic
  type class hack -- see HOL/Bali --- but that's pretty ugly. 
  Another improvement might be to restrict to using identifiers in the
  syntax: certainly for field names and variable names. *)

syntax
 "_get_ivar"     :: "env \<Rightarrow> iname \<Rightarrow> int"		("_<_>" [1000,1000] 1000)
translations
 "_get_ivar e x" == "ienv_fct e x"

syntax
 "_get_rvar"     :: "env \<Rightarrow> rname \<Rightarrow> ref"		("_\<lfloor>_\<rfloor>" [1000,1000] 1000)
translations
 "_get_rvar e x" == "renv_fct e x"

syntax
 "_get_rvar_nonnil"     :: "env \<Rightarrow> rname \<Rightarrow> locn"		("_\<lceil>_\<rceil>" [1000,1000] 1000)
translations
 "_get_rvar_nonnil e x" == "theloc (renv e x)"

(* da: adjusted concrete syntax of next one because \<guillemotleft> much easier to type! *)
syntax  
 "_get_obj"  ::      "[state, locn] \<Rightarrow> cname option"	("_\<guillemotleft>_\<guillemotright>" [1000,1000] 1000)
translations
 "_get_obj s l" == "fmap_lookup (oheap_fct s) l"

(* HWL: GNU emacs with x-symbols has problems with the \<guillemotleft> \<guillemotright> etc so I use this syntax *)
syntax  
 "_get_obj_HWL"  ::      "[state, locn] \<Rightarrow> cname option"	("_\<lless>_\<ggreater>" [1000,1000] 1000)
translations
 "_get_obj_HWL s l" == "fmap_lookup (oheap_fct s) l"

(* replaced by _HWL versions !! 
syntax
 "_get_ifld" ::	  "[state, locn, ifldname] \<Rightarrow> int"      ("_<_\<bullet>_>" [1000,1000,1000] 1000)
translations
 "_get_ifld s loc inm" == "iheap_fct s inm loc"

syntax
 "_get_rfld" ::	  "[state, locn, rfldname] \<Rightarrow> ref"      ("_\<lfloor>_\<diamondsuit>_\<rfloor>" [1000,1000,1000] 1000)
translations
 "_get_rfld s loc rnm" == "rheap_fct s rnm loc"
*)

syntax
 "_get_ifld" ::	  "[heap, locn, ifldname] \<Rightarrow> int"      ("_<_\<bullet>_>" [1000,1000,1000] 1000)
translations
 "_get_ifld h loc inm" == "iheap h inm loc"

syntax
 "_get_rfld" ::	  "[heap, locn, rfldname] \<Rightarrow> ref"      ("_\<lfloor>_\<diamondsuit>_\<rfloor>" [1000,1000,1000] 1000)
translations
 "_get_rfld h loc rnm" == "rheap h rnm loc"

syntax
 "freshlocst"  :: "state \<Rightarrow> locn"
translations
 "freshlocst s" == "freshloc (fmap_dom (oheap_fct s))"


subsection {* Functions for updating the state *}

text {* The state updating functions are treated as abstract operations
  on states.  This makes propositions and intermediate assertions
  more readable, at the expense of needing to add numerous
  simplification rules to deal with them. *}

text {* Env and heap updates *}

constdefs
  ivarupdate :: "env \<Rightarrow> iname \<Rightarrow> int \<Rightarrow> env"	("_<_:=_>" [1000,1000,0] 1000)
  "ivarupdate e v val \<equiv> (e \<lparr> ienv := (ienv_fct e)(v := val) \<rparr>)"

constdefs
  rvarupdate :: "env \<Rightarrow> rname \<Rightarrow> ref \<Rightarrow> env"        ("_\<lfloor>_:=_\<rfloor>" [1000,1000,0] 1000)
  "rvarupdate e v val \<equiv> (e \<lparr> renv := (renv_fct e)(v := val) \<rparr>)"

(* replaced by _HWL versions !!
constdefs
  obj_ifieldupdate :: "state \<Rightarrow> locn \<Rightarrow> ifldname \<Rightarrow> int \<Rightarrow> state"   ("_<_\<bullet>_:=_>" [1000,80,1000,80] 1000)
  "obj_ifieldupdate s a f rtv \<equiv> 
     ((fst s) \<lparr> iheap := (iheap_fct s) (f:= (iheap_fct s f)(a:=rtv)) \<rparr>, snd s)"

  obj_rfieldupdate :: "state \<Rightarrow> locn \<Rightarrow> rfldname \<Rightarrow> ref \<Rightarrow> state"   ("_\<lfloor>_\<diamondsuit>_:=_\<rfloor>" [1000,80,1000,80] 1000)
  "obj_rfieldupdate s a g rtv \<equiv> 
     ((fst s) \<lparr> rheap := (rheap_fct s) (g:= (rheap_fct s g)(a:=rtv)) \<rparr>, snd s)"
*)

constdefs
  obj_ifieldupdate :: "heap \<Rightarrow> locn \<Rightarrow> ifldname \<Rightarrow> int \<Rightarrow> heap"   ("_<_\<bullet>_:=_>" [1000,80,1000,80] 1000)
  "obj_ifieldupdate h a f rtv \<equiv> 
     h \<lparr> iheap := (iheap h) (f:= (iheap h f)(a:=rtv)) \<rparr>"

  obj_rfieldupdate :: "heap \<Rightarrow> locn \<Rightarrow> rfldname \<Rightarrow> ref \<Rightarrow> heap"   ("_\<lfloor>_\<diamondsuit>_:=_\<rfloor>" [1000,80,1000,80] 1000)
  "obj_rfieldupdate h a g rtv \<equiv> 
     h \<lparr> rheap := (rheap h) (g:= (rheap h g)(a:=rtv)) \<rparr>"


text {* New objects\<dots> *}

consts
  iheapflds :: "[(ifldname \<times> iname) list, locn, iheap, ienv] \<Rightarrow> iheap"
primrec
  "iheapflds [] a ihp st = ihp"
  "iheapflds (ii#iis) a ihp st = iheapflds iis a (ihp ((fst ii):=((ihp (fst ii))(a:= (st (snd ii)))))) st"

consts
  rheapflds :: "[(rfldname \<times> rname) list, locn, rheap, renv] \<Rightarrow> rheap"
primrec
  "rheapflds [] a rhp st = rhp"
  "rheapflds (rr#rrs) a rhp st = rheapflds rrs a (rhp ((fst rr):=((rhp (fst rr))(a:=(st (snd rr)))))) st"

constdefs
  newobj    :: "state \<Rightarrow> env \<Rightarrow> cname \<Rightarrow> (ifldname \<times> iname) list \<Rightarrow> (rfldname \<times> rname) list \<Rightarrow> state"
  "newobj s e c ifldvals rfldvals \<equiv> 
  ((fst s)
       \<lparr> oheap := (oheap_fct s)((freshlocst s) \<mapsto>\<^sub>f c),
         iheap := iheapflds ifldvals (freshlocst s) (iheap_fct s) (ienv e),
         rheap := rheapflds rfldvals (freshlocst s) (rheap_fct s) (renv e) \<rparr>, snd s)"


text {* 
  Object initialization simply sets the class of an object.
  We might also update the fields with empty maps, defined
  below.  But it should be a guaranteed by type-checking that
  no uninitialised field is accessed.
 *}

constdefs
  emptyi    :: "'a \<Rightarrow> int"
  "emptyi x \<equiv> 0"
  emptyr    :: "'a \<Rightarrow> ref"
  "emptyr x \<equiv> Nullref"

text {* A method invocation allocates a new frame on the frame stack. 
        This function adjusts the state accordingly, given a reference
        to the invoking object, and the parameter.
        The new env contains only bindings for the self object
	and the method parameter.  (We might also give initial
        values to the local variables for the method).
	Note that if we are invoking a static method, then the
	self variable will be set to null.
*}

constdefs
   newframe_env :: "ref \<Rightarrow> ref \<Rightarrow> env"
  "newframe_env objref arg \<equiv>   \<lparr> ienv = emptyi, 
 	   	                 renv = ((emptyr(self := objref))(param := arg)) \<rparr>"

   newframe_state :: "state \<Rightarrow> state"
  "newframe_state s \<equiv> 
	(fst s, (snd s) \<lparr> invokedepth := (invokedepth_fct s) + 1,
			  maxstack := max (invokedepth_fct s + 1) (maxstack_fct s) \<rparr>)"

  popframe_state :: "state \<Rightarrow> state"
  "popframe_state s \<equiv> (fst s, (snd s) \<lparr> invokedepth := (invokedepth_fct s) - 1 \<rparr>)"


text {* Resource counting updates *}

constdefs
 tickn	     :: "int \<Rightarrow> state \<Rightarrow> state"
 "tickn n \<equiv>  \<lambda> (h,r). (h, r \<lparr> clock := rescount.clock r + n \<rparr>)"
syntax
 tick	     :: "state \<Rightarrow> state" -- {* a handy abbreviation *}
translations
 "tick"    == "tickn 1"

constdefs
  incrcallcount :: "state \<Rightarrow> state"
  "incrcallcount \<equiv> \<lambda> (h,r). (h, r\<lparr> callcount := (rescount.callcount r)+1 \<rparr>)"


syntax
  tickcall :: "state \<Rightarrow> state"   -- {* the effect of procedure calling *}
translations
  "tickcall"   == "tick o incrcallcount"
(* NB: this can be slightly troublesome: unification fails with
   matching composition tick o incrcallcount against tick (incrcallcount s) *)


text {* Values of booleans as integers *}

syntax
 grailbool :: "bool \<Rightarrow> int"
translations
 "grailbool b" == "if b then (1::int) else (0::int)"

text {* useful collections of lemmas *}

lemmas update_lemmas = ivarupdate_def rvarupdate_def obj_ifieldupdate_def obj_rfieldupdate_def
lemmas frame_lemmas = newframe_env_def newframe_state_def popframe_state_def

end
