(** * Simple imperative programs Version of 2/24/2009 NOTE: This is a preliminary version of While.v. Do not submit it. The material and exercises may change. A full, revised version will be released on Wednesday. You'll need to compile the previous files in order to use this one. *) Require Export Logicsol. (* ------------------------------------------------------- *) (** * Informal proofs, again *) (* In the real world of mathematical communication, written proofs range from extremely longwinded and pedantic to extremely brief and telegraphic. The ideal is probably somewhere in between, but while you are getting used to the style it is better to go more toward the pedantic end. Also, during the learning phase, it is probably helpful to have a clear standard to compare against. So... For the rest of the course, ALL your inductive proofs should match one of the two following templates PRECISELY. 1) TEMPLATE FOR INFORMAL PROOFS BY INDUCTION OVER AN INDUCTIVELY DEFINED SET: THEOREM: PROOF: By induction on n. * Suppose n = c a1 ... ak, where <...and here we state the IH for each of the a's that has type S, if any>. We must show <...and here we restate P(c a1 ... ak)>. * [] EXAMPLE THEOREM: For all sets X, lists l : list X, and numbers n, if length l = n then index (S n) l = None. PROOF: By induction on l. * Suppose l = []. We must show, for all numbers n, that, if length [] = n, then index (S n) [] = None. This follows immediately from the definition of index. * Suppose l = x :: l' for some x and l', where length l' = n' implies index (S n') l' = None, for any number n'. We must show, for all n, that, if length (x::l') = n then index (S n) (x::l') = None. Let n be a number with length l = n. Since length l = length (x::l') = S (length l'), it suffices to show that index (S (length l')) l' = None. But this follows directly from the induction hypothesis, picking n' to be length l'. [] 2) TEMPLATE FOR INFORMAL PROOFS BY INDUCTION OVER AN INDUCTIVELY DEFINED PROPOSITION (I.E., "INDUCTION ON DERIVATIONS"): THEOREM: P," where Q is some inductively defined proposition. (More generally, "For all x y z, (Q x y z) -> (P x y z),"> PROOF: By induction on a derivation of Q. * Suppose the final rule used to show Q is c. Then <...and here we state the types of all of the a's together with any equalities that follow from the definition of the constructor and the IH for each of the a's that has type Q, if there are any>. We must show <...and here we restate P>. * [] EXAMPLE THEOREM: The <= relation is transitive -- i.e., for all numbers n, m, and o, if n <= m and m <= o, then n <= o. PROOF: By induction on a derivation of m <= o. * Suppose the final rule used to show m <= o is le_n. Then m = o and the result is immediate. * Suppose the final rule used to show m <= o is le_S. Then o = S o' for some o' with m <= o'. By induction hypothesis, n <= o'. But then, by le_S, n <= o. [] *) (* ------------------------------------------------------- *) (* ------------------------------------------------------- *) (* ------------------------------------------------------- *) (** * Evaluating arithmetic expressions *) Module AExp. Inductive aexp : Set := | ANum : nat -> aexp | APlus : aexp -> aexp -> aexp | AMinus : aexp -> aexp -> aexp | AMult : aexp -> aexp -> aexp. Definition two_plus_two := APlus (ANum 2) (ANum 2). (* Since we're going to be writing a lot of these arithmetic expressions, let's introduce some concrete syntax that's nicer to look at. *) Notation "a1 '+++' a2" := (APlus a1 a2) (at level 40). Notation "a1 '---' a2" := (AMinus a1 a2) (at level 40). Notation "a1 '***' a2" := (AMult a1 a2) (at level 50). Notation A0 := (ANum 0). Notation A1 := (ANum 1). Notation A2 := (ANum 2). Notation A3 := (ANum 3). Notation A4 := (ANum 4). Notation A5 := (ANum 5). Definition two_plus_two' := A2 +++ A2. Fixpoint aeval (e : aexp) {struct e} : nat := match e with | ANum n => n | APlus a1 a2 => plus (aeval a1) (aeval a2) | AMinus a1 a2 => minus (aeval a1) (aeval a2) | AMult a1 a2 => mult (aeval a1) (aeval a2) end. Example test_aeval1: aeval two_plus_two = 4. Proof. reflexivity. Qed. Inductive bexp : Set := | BTrue : bexp | BFalse : bexp | BEq : aexp -> aexp -> bexp | BLtEq : aexp -> aexp -> bexp | BNot : bexp -> bexp | BAnd : bexp -> bexp -> bexp | BOr : bexp -> bexp -> bexp. Fixpoint beval (e : bexp) {struct e} : bool := match e with | BTrue => true | BFalse => false | BEq a1 a2 => beq_nat (aeval a1) (aeval a2) | BLtEq a1 a2 => ble_nat (aeval a1) (aeval a2) | BNot b1 => negb (beval b1) | BAnd b1 b2 => andb (beval b1) (beval b2) | BOr b1 b2 => orb (beval b1) (beval b2) end. Notation "b1 '===' b2" := (BEq b1 b2) (at level 60). Notation "b1 '<<=' b2" := (BLtEq b1 b2) (at level 60). (* ------------------------------------------------------- *) (** * Correctness of a simple optimization *) Fixpoint optimize_0plus (e:aexp) {struct e} : aexp := match e with | ANum n => ANum n | APlus (ANum 0) e2 => optimize_0plus e2 | APlus e1 e2 => APlus (optimize_0plus e1) (optimize_0plus e2) | AMinus e1 e2 => AMinus (optimize_0plus e1) (optimize_0plus e2) | AMult e1 e2 => AMult (optimize_0plus e1) (optimize_0plus e2) end. Example test_optimize_0plus: optimize_0plus (A2 +++ (A0 +++ (A0 +++ A1))) = A2 +++ A1. Proof. reflexivity. Qed. Theorem optimize_0plus_sound: forall e, aeval (optimize_0plus e) = aeval e. Proof. intros e. induction e. Case "ANum". reflexivity. Case "APlus". destruct e1. SCase "e1 = ANum n". destruct n. SSCase "n = 0". simpl. apply IHe2. SSCase "n <> 0". simpl. rewrite IHe2. reflexivity. SCase "e1 = APlus e1_1 e1_2". simpl. simpl in IHe1. rewrite IHe1. rewrite IHe2. reflexivity. SCase "e1 = AMinus e1_1 e1_2". simpl. simpl in IHe1. rewrite IHe1. rewrite IHe2. reflexivity. SCase "e1 = AMult e1_1 e1_2". simpl. simpl in IHe1. rewrite IHe1. rewrite IHe2. reflexivity. Case "AMinus". simpl. rewrite IHe1. rewrite IHe2. reflexivity. Case "AMult". simpl. rewrite IHe1. rewrite IHe2. reflexivity. Qed. (* ------------------------------------------------------- *) (* ------------------------------------------------------- *) (* ------------------------------------------------------- *) (** * Tacticals *) (* The repetition in this last proof is starting to be a little annoying. It's still bearable here, but clearly, if either the language of arithmetic expressions or the optimization being proved sound were significantly more complex, it would start to be a real problem. So far, we've been doing all our proofs using just a small handful of Coq's tactics and completely ignoring its very powerful facilities for constructing proofs automatically. This section introduces some of these facilities, and we will see more over the next several chapters. Getting used to them will take some energy -- Coq's automation is a power tool -- but it will allow us to scale up our efforts to more complex definitions and more interesting properties without becoming overwhelmed by boring, repetitive, or low-level details. *) (* ------------------------------------------------------- *) (** ** The [try] tactical *) (* "Tactical" is Coq's term for operations that manipulate tactics as data -- higher-order tactics, if you will. *) (* One very simple tactical is [try]: If [T] is a tactic, then [try T] is a tactic that is just like [T] except that, if [T] fails, [try T] does nothing at all (instead of failing). *) (* ------------------------------------------------------- *) (** ** The [;] tactical *) (* Another very basic tactical is written [;]. If [T], [T1], ..., [Tn] are tactics, then T; [T1 | T2 | ... | Tn] is a tactic that first performs [T] and then performs [T1] on the first subgoal generated by [T], performs [T2] on the second subgoal, etc. In the special case where all of the [Ti]s are the same tactic [T'], we can just write [T;T'] instead of: T; [T' | T' | ... | T'] That is, if [T] and [T'] are tactics, then [T;T'] is a tactic that first performs [T] and then performs [T'] on EACH SUBGOAL generated by [T]. This is the form of [;] that is used most often in practice. *) (* Using [try] and [;] together, we can get rid of the repetition in the above proof. *) Theorem optimize_0plus_sound': forall e, aeval (optimize_0plus e) = aeval e. Proof. intros e. induction e; (* Most cases follow directly by the IH *) try (simpl; rewrite IHe1; rewrite IHe2; reflexivity). Case "ANum". reflexivity. Case "APlus". destruct e1; (* Most cases follow directly by the IH *) try (simpl; simpl in IHe1; rewrite IHe1; rewrite IHe2; reflexivity). (* The interesting case is when e1 = ANum n *) SCase "e1 = ANum n". destruct n. SSCase "n = 0". apply IHe2. SSCase "n <> 0". simpl. rewrite IHe2. reflexivity. Qed. (* In practice, Coq experts often use [try] with a tactic like [induction] to take care of many similar "straightforward" cases all at once. Naturally, this practice has an analog in informal proofs. Here is an informal proof of our example theorem that matches the structure of the formal one: THEOREM: For all arithmetic expressions e, aeval (optimize_0plus e) = aeval e. PROOF: By induction on e. The AMinus and AMult cases follow directly from the IH. The remaining cases are as follows: * Suppose e = ANum n for some n. We must show aeval (optimize_0plus (ANum n)) = aeval (ANum n). This is immediate from the definition of optimize_0plus. * Suppose e = APlus e1 e2 for some e1 and e2. We must show aeval (optimize_0plus (APlus e1 e2)) = aeval (APlus e1 e2). Consider the possible forms of e1. For most of them, optimize_0plus simply calls itself recursively for the subexpressions and rebuilds a new expression of the same form as e1; in these cases, the result follows directly from the IH. The interesting case is when e1 = ANum n for some n. If n = ANum 0, then optimize_0plus (APlus e1 e2) = optimize_0plus e2 and the IH for e2 is exactly what we need. On the other hand, if n = S n' for some n', then again optimize_0plus simply calls itself recursively, and the result follows from the IH. [] This proof can still be improved: the first case (for e = ANum n) is very trivial -- even more trivial than the cases that we said simply followed from the IH -- yet we have chosen to write it out in full. It would be better and clearer to drop it and just say, at the top, "Most cases are either immediate or direct from the IH. The only interesting case is the one for APlus..." Of course, we'd like to do the same thing in our formal proof too! Here's how this looks: *) Theorem optimize_0plus_sound'': forall e, aeval (optimize_0plus e) = aeval e. Proof. intros e. induction e; (* Most cases follow directly by the IH *) try (simpl; rewrite IHe1; rewrite IHe2; reflexivity); (* ... or are immediate by definition *) try (reflexivity). (* The interesting case is when e = APlus (ANum 0) e2. *) Case "APlus". destruct e1; try (simpl; simpl in IHe1; rewrite IHe1; rewrite IHe2; reflexivity). SCase "e1 = ANum n". destruct n. SSCase "n = 0". apply IHe2. SSCase "n <> 0". simpl. rewrite IHe2. reflexivity. Qed. (* ------------------------------------------------------- *) (** ** Defining new tactic notations *) (* Coq also provides some handy ways to define "shorthand tactics" that, when invoked, apply several tactics at the same time. *) Tactic Notation "simpl_and_try" tactic(c) := simpl; try c. (* Here's a more interesting use of this feature... *) (* ------------------------------------------------------- *) (** ** Bulletproofing case analyses *) (* Being able to deal with most of the cases of an [induction] or [destruct] all at the same time is very convenient, but it can also be a little confusing. One problem that often comes up is that MAINTAINING proofs written in this style can be difficult. For example, suppose that, later, we extended the definition of [aexp] with another constructor that also required a special argument. The above proof might break because Coq generated the subgoals for this constructor before the one for APlus, so that, at the point when we start working on the [APlus] case, Coq is actually expecting the argument for a completely different constructor. What we'd like is to get a sensible error message saying "I was expecting the [AFoo] case at this point, but the proof script is talking about [APlus]." Here's a nice little trick that smoothly achieves this. *) Tactic Notation "aexp_cases" tactic(first) tactic(c) := first; [ c "ANum" | c "APlus" | c "AMinus" | c "AMult" ]. (* If [e] is a variable of type [aexp], then doing aexp_cases (induction e) (Case) will perform an induction on [e] (the same as if we had just typed [induction e]) and ALSO add a "Case" tag to each subgoal labeling which constructor it comes from. *) (* Exercise: 3 stars (optimize_0plus_b) *) (* Since the optimize_0plus tranformation doesn't change the value of aexps, we should be able to apply it to all the aexps that appear in a bexp without changing the bexp's value. Write a function which performs that transformation on bexps, and prove it is sound. Use the tacticals we've just seen to make the proof as elegant as possible. *) (* FILL IN HERE *) (* Exercise: 4 stars *) (* DESIGN EXERCISE: The optimization implemented by our optimize_0plus function is only one of many imaginable optimizations on arithmetic and boolean expressions. Write a more sophisticated optimizer and prove it correct. FILL IN HERE *) End AExp. (* ------------------------------------------------------- *) (** * A couple more handy tactics *) (* clear H -- remove hypothesis H from the context subst x -- find an assumption [x = e] or [e = x] in the context, replace x with e throughout the context and current goal, and clear the assumption subst -- substitute away ALL assumptions of the form [x = e] or [e = x] *) (* We'll see many examples of these in the proofs below... *) (* ------------------------------------------------------- *) (* ------------------------------------------------------- *) (* ------------------------------------------------------- *) (** * While programs *) (* ----------------------------------------------------- *) (** ** Mappings *) (* In the rest of the course, we'll often need to talk about "identifiers," such as program variables. We could use strings for this, or (as in a real compiler) some kind of fancier structures like symbols from a symbol table. For simplicity, though, let's just use natural numbers as identifiers. *) Definition id := nat. (* Now, a [mapping] is a partial function from identifiers to members of some other type, [X]. We use an [option] in the result type to represent partiality: A result of [None] means that the mapping is not defined for the given id. *) Definition mapping (X : Set) := id -> option X. Definition lookup (X : Set) (k : id) (f : mapping X) : option X := f k. Implicit Arguments lookup [X]. Definition empty_mapping (X:Set) : (mapping X) := constfun None. Definition extend (X : Set) (f : mapping X) (k:id) (x:X) := override f k (Some x). Implicit Arguments extend [X]. Definition remove (X : Set) (f : mapping X) (k:id) := override f k None. Implicit Arguments remove [X]. (* Since the critical functions on mappings, [extend] and [remove], are implemented in terms of the [override] operator that we studied before, their properties can also be derived from those of [override]. *) Theorem extend_eq : forall X x x' k (f : mapping X), x = x' -> lookup k (extend f k x) = Some x'. Proof. intros X x x' k f Heq. rewrite Heq. unfold lookup, extend. apply override_eq. Qed. (* Exercise: 3 stars (extend_eq_variant) *) (* The way [extend_eq] is stated may be a bit surprising: wouldn't it be simpler just to say [lookup k (extend f k x) = Some x]? Try changing the statement of the theorem to read like this; then work through some of the rest of the file and see how the proofs that use [extend_eq] need to be changed to use the simplified version. Briefly explain what changes are needed to use the variant and which version of the theorem you prefer. (This exercise is best done AFTER the rest of this week's homework.) FILL IN HERE *) (* Exercise: 2 stars *) Theorem extend_neq : forall X o1 x2 k1 k2 (f : mapping X), lookup k1 f = o1 -> beq_nat k2 k1 = false -> lookup k1 (extend f k2 x2) = o1. Proof. (* FILL IN HERE (and delete "Admitted") *) Admitted. (* Exercise: 2 stars, optional *) Theorem extend_example : forall (X:Set) (b:bool), lookup 3 (extend (empty_mapping _) 2 b) = None. Proof. (* Before starting to play with tactics, make sure you understand exactly what the theorem is saying! *) (* FILL IN HERE (and delete "Admitted") *) Admitted. (* Exercise: 2 stars, optional *) Theorem extend_shadow : forall X x1 x2 k1 k2 (f : mapping X), lookup k1 (extend (extend f k2 x1) k2 x2) = lookup k1 (extend f k2 x2). Proof. (* FILL IN HERE (and delete "Admitted") *) Admitted. (* Exercise: 2 stars, optional *) Theorem extend_same : forall X x1 k1 k2 (f : mapping X), lookup k1 f = Some x1 -> lookup k2 (extend f k1 x1) = lookup k2 f. Proof. (* FILL IN HERE (and delete "Admitted") *) Admitted. (* Exercise: 2 stars, optional *) Theorem extend_permute : forall X x1 x2 k1 k2 k3 (f : mapping X), false = beq_nat k2 k1 -> lookup k3 (extend (extend f k2 x1) k1 x2) = lookup k3 (extend (extend f k1 x2) k2 x1). Proof. (* FILL IN HERE (and delete "Admitted") *) Admitted. (* --------------------------------------------------- *) (** ** WHILE program syntax *) Definition state := (mapping nat). Definition empty_state := empty_mapping nat. (* Adding variables to what we had before *) Inductive aexp : Set := | ANum : nat -> aexp | AId : id -> aexp | APlus : aexp -> aexp -> aexp | AMinus : aexp -> aexp -> aexp | AMult : aexp -> aexp -> aexp. Tactic Notation "aexp_cases" tactic(first) tactic(c) := first; [ c "ANum" | c "AId" | c "APlus" | c "AMinus" | c "AMult" ]. (* Same notations as before... *) Notation A0 := (ANum 0). Notation A1 := (ANum 1). Notation A2 := (ANum 2). Notation A3 := (ANum 3). Notation A4 := (ANum 4). Notation A5 := (ANum 5). Notation A6 := (ANum 6). Notation "a1 '***' a2" := (AMult a1 a2) (at level 50). Notation "a1 '---' a2" := (AMinus a1 a2) (at level 40). Notation "a1 '+++' a2" := (APlus a1 a2) (at level 40). (* ...plus one more: *) Notation "'!' X" := (AId X) (at level 30). (* Shorthands for variables: *) Definition X : id := 0. Definition Y : id := 1. Definition Z : id := 2. (* Note that the choice of variable names (X, Y, Z) clashes a bit with our earlier use of uppercase letters for Sets. Since we're not using polymorphism heavily in this section, this overloading will hopefully not cause confusion. *) (* Same bexps as before (using the new aexps) *) Inductive bexp : Set := | BTrue : bexp | BFalse : bexp | BEq : aexp -> aexp -> bexp | BLtEq : aexp -> aexp -> bexp | BNot : bexp -> bexp | BAnd : bexp -> bexp -> bexp | BOr : bexp -> bexp -> bexp. Notation "b1 '===' b2" := (BEq b1 b2) (at level 60). Notation "b1 '<<=' b2" := (BLtEq b1 b2) (at level 60). Tactic Notation "bexp_cases" tactic(first) tactic(c) := first; [ c "BTrue" | c "BFalse" | c "BEq" | c "BLtEq" | c "BNot" | c "BAnd" | c "BOr" ]. Inductive com : Set := | CSkip : com | CAss : id -> aexp -> com | CSeq : com -> com -> com | CIf : bexp -> com -> com -> com | CWhile : bexp -> com -> com. Tactic Notation "com_cases" tactic(first) tactic(c) := first; [ c "CSkip" | c "CAss" | c "CSeq" | c "CIf" | c "CWhile" ]. Notation "c1 ; c2" := (CSeq c1 c2) (at level 80, right associativity). Notation "l '::=' a" := (CAss l a) (at level 60). Notation "'while' b 'do' c" := (CWhile b c) (at level 80, right associativity). Notation "'testif' e1 'then' e2 'else' e3" := (CIf e1 e2 e3) (at level 80, right associativity). (* [CIf] is written "testif...then...else..." to avoid confusion with Coq's existing [if...then...else...] syntactic sugar. *)