Our site saves small pieces of text information (cookies) on your device in order to deliver better content and for statistical purposes. You can disable the usage of cookies by changing the settings of your browser. By browsing our website without changing the browser settings you grant us permission to store that information on your device.

# A funky grammar

You are given the following context-free grammar that produces a language L:

``S -> | SS | aSaSb | aSbSa | bSaSa``

It is easy to see that this grammar produces exactly the language M of all words over a, b that contains twice as many a's as b's. But is it easy to prove?

We claim that L ⊆ M is while proving M ⊆ L is not, so we give you some guidance: Assume w ∈ M. We prove w ∈ L by induction on the length of w.

For the induction step, consider w ∈ M with positive length and assume that the claim holds for all w of shorter length (IH).

Consider the function h(w):=2 * |w|b − |w|a. We have h(w)=0 iff w ∈ M. If h is zero for any prefix of w, then it is easy to establish w ∈ L with (IH). So assume the contrary for the remainder of the proof.

Now consider three cases:

1. w = bu. Thus h(u)= − 2. As the value of h for w does not have a zero, starts with b, and can only decrease by 1, we have u = vaa for some v. This means h(v)=0. Use the last production to show that the word is in the grammar.
2. w = ub. Analogous.
3. w = aua. As w does not have a zero, we know that h(v) never takes the value 1 for any prefix of u. With h(u)=2, we can conclude that u = v1bv2 for v1, v2 with h(v1)=h(v2)=0. Now use the fourth production to show w ∈ L.

1. Show L ⊆ M
2. Prove a theorem that helps you to establish the essential step in (a), (b).
3. Prove a theorem that helps you to establish the essential step in (c).
4. Show M ⊆ L. You may want to exploit the symmetry of (a) and (b).

## Resources

### Definitions File

```theory Defs
imports Main
begin

section ‹Grammars and Languages›

text ‹‹#w⇘c⇙› denotes the number of times the character ‹c› occurs in word ‹w››
abbreviation count :: "'a list ⇒ 'a ⇒ nat" ("#_⇘_⇙") where
"count xs x ≡ count_list xs x"

abbreviation
"a ≡ CHR ''a''"

abbreviation
"b ≡ CHR ''b''"

definition prefix :: "'a list ⇒ 'a list ⇒ bool" (infix "≺" 50) where
"prefix xs ys ≡ ∃ zs. ys = xs @ zs ∧ zs ≠ []"

section ‹Problem statement›

text ‹Consider the following grammar:›
inductive_set L :: "string set" where
"[] ∈ L"
| "x @ y ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ a # y @ [b] ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ b # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
| "b # x @ a # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
text ‹Q: Can we delete any of these rules?›

definition
"M = {w. set w ⊆ {a, b} ∧ #w⇘a⇙ = 2 * #w⇘b⇙}"

text ‹
We want to prove that the grammar exactly produces the language of all words that contain
twice as many a's than b's:›
lemma
"L = M"
oops

text ‹The height function›
definition
"h w = 2 * int (#w⇘b⇙) - int (#w⇘a⇙)"

end```

### Template File

```theory Submission
imports Defs
begin

theorem L_sub: "L ⊆ M"
sorry

theorem h_first_zero:
assumes "h (xs @ ys) < 0" "h xs ≥ 0"
shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
sorry

theorem h_2_split:
assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
sorry

theorem L_sup:
assumes h_first_zero:
"⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
assumes h_2_split:
"⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
⟹ t"
assumes "h w = 0" "set w ⊆ {a, b}"
shows "w ∈ L"
sorry

text ‹The final theorem›
corollary
"L = M"
using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end```

### Check File

```theory Check
imports Submission
begin

theorem L_sub: "L ⊆ M"
by (rule Submission.L_sub)

theorem h_first_zero:
assumes "h (xs @ ys) < 0" "h xs ≥ 0"
shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
using assms by (rule Submission.h_first_zero)

theorem h_2_split:
assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
using assms by (rule Submission.h_2_split)

theorem L_sup:
assumes h_first_zero:
"⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
assumes h_2_split:
"⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
⟹ t"
assumes "h w = 0" "set w ⊆ {a, b}"
shows "w ∈ L"
using assms by (rule Submission.L_sup)

text ‹The final theorem›
corollary
"L = M"
using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end```

### Definitions File

```Require Export Ascii List.
Require Export Arith ZArith Lia.
Require Import ssreflect.
Export ListNotations.
Open Scope char_scope.

(** In this challenge, we assume classical logic. *)
Require Export Classical.

(** A simple formalization of (possibly infinite) sets as predicates. *)
Notation set A := (A -> Prop) (only parsing).

Definition Incl {U} (A B: set U) : Prop := forall x, A x -> B x.
Definition Union {U} (A B: set U): set U := fun x => A x \/ B x.
Definition Single {U} a : set U := fun x => x = a.
Definition Pair {U} a b : set U := fun x => x = a \/ x = b.
Definition Empty {U} := fun (x: U) => False.

Notation "A  ⊆  B" := (Incl A B) (at level 70).
Notation "A  ∪  B" := (Union A B) (at level 50).
Notation "'{ x }" := (Single x) (at level 1).
Notation "∅" := Empty.

Axiom set_ext : forall U (A B: set U), (forall x, A x <-> B x) -> A = B.

(* Simple goals involving sets can be solved by the [firstorder] tactic.
This should be enough for our needs here. *)
Goal forall U (A B: set U) a b,
'{a} ∪ ∅ ∪ B ∪ (A ∪ '{b} ∪ B) ⊆ Pair a b ∪ A ∪ B.
Proof. firstorder. (* boom. *) Qed.

(** Definitions on words (as lists of characters). *)

Notation word := (list ascii).

(* [#{c} w] denotes the number of times the character [c] occurs in the word [w]. *)
Notation "'#' '{' c '}'" := (fun w => count_occ ascii_dec w c) (at level 1).

Eval compute in #{"a"} ["a"; "b"; "c"]. (* there is one "a" in "abc" *)

Definition a := "a".
Definition b := "b".

(* The strict prefix relation *)
Definition prefix {A} (xs ys: list A) :=
exists zs, ys = xs ++ zs /\ zs <> [].

Notation "xs  ≺  ys" := (prefix xs ys) (at level 50).

(* The set of letters in a word *)
Definition letters (w: word): set ascii := fun c => List.In c w.

Lemma letters_nil : letters [] = ∅.
Proof. reflexivity. Qed.
Lemma letters_cons x xs : letters (x :: xs) = '{x} ∪ letters xs.
Proof. rewrite /letters /Union /Single //=. apply set_ext. firstorder. Qed.
Lemma letters_app xs ys : letters (xs ++ ys) = letters xs ∪ letters ys.
Proof. rewrite /letters. apply set_ext. intro. rewrite in_app_iff //=. Qed.

(* Can be used by ssreflect's rewrite to rewrite with any of the lemmas in the
tuple: use "rewrite lettersE" *)
Definition lettersE := (letters_nil, letters_cons, letters_app).

(** Problem statement *)

(* Consider the following grammar, which inductively defines a set of words: *)
Inductive L : set word :=
| L_nil : L []
| L_app : forall x y, L x -> L y ->
L (x ++ y)
| L_aab : forall x y, L x -> L y ->
L (a :: x ++ a :: y ++ [b])
| L_aba : forall x y, L x -> L y ->
L (a :: x ++ b :: y ++ [a])
| L_baa : forall x y, L x -> L y ->
L (b :: x ++ a :: y ++ [a]).

(* We want to prove that the grammar exactly produces the language of all words
that contain twice as many a's than b's: *)

Definition M : set word :=
fun w => letters w ⊆ Pair a b /\ #{a} w = 2 * #{b} w.

Goal L = M.
Abort.

(* The height function. w is in M iff h(w) = 0. *)
Definition h (w: word): Z :=
2 * Z.of_nat (#{b} w) - Z.of_nat (#{a} w).```

### Template File

```Require Import Defs.

(* It is not required, but it is a good idea to import ssreflect to benefit
from the better rewrite tactic. (by uncommenting the two lines below)

ssreflect's rewrite cheatsheet: a rewrite invocation is of the form [rewrite foo bar baz],
where each of "foo", "bar", "baz" can be:
- "foo" where foo is a lemma: rewrites with the lemma ("-foo" rewrites in the other direction)
- "!foo" where foo is a lemma: rewrites repeatedly with foo
- "/foo" where foo is a definition: unfolds the definition ("-/foo" folds the definition)
- "//": try to prove the goal or side-condition if it is trivial
- "//=": like "//" but also simplify the goal (using "simp")
- "(_: foo = bar)": ask the user to prove "foo = bar" as a subgoal, and rewrite with it
*)
(*
Require Import ssreflect.
Local Ltac done ::= first [ solve [ trivial | eauto | lia ] ].
*)

(* Prevent simpl/cbn from unfolding the multiplication, which is never a good
idea. *)
Arguments Nat.mul : simpl never.
Arguments Z.mul : simpl never.

Theorem L_sub : L ⊆ M.
Proof.
(* todo *)

Theorem h_first_zero xs ys :
(h (xs ++ ys) < 0)%Z ->
(0 <= h xs)%Z ->
exists zs ws, ys = zs ++ ws /\ h (xs ++ zs) = 0%Z.
Proof.
(* todo *)

(* Hint: this is useful to reason by well-founded induction on words. *)
Definition word_len_lt := ltof _ (@length ascii).
Lemma word_len_wf : well_founded word_len_lt.
Proof. apply well_founded_ltof. Qed.

Definition smallest_word_st (P: word -> Prop) (w: word) :=
P w /\ forall v, P v -> length w <= length v.

(* If [P] holds on some word [w], then we can consider "the" smallest word that
satisfies [P]... *)
Lemma ex_smallest_word_st : forall (P: set word) (w: word),
P w -> exists v, smallest_word_st P v.
Proof.
(* todo *)

Theorem h_2_split w :
h w = 2%Z ->
(forall v, v ≺ w -> h v <> 1%Z) ->
exists x y, h x = 0%Z /\ h y = 0%Z /\ w = x ++ [b] ++ y.
Proof.
(* todo *)

Section L_sup.

Hypothesis h_first_zero : forall xs ys,
(h (xs ++ ys) < 0)%Z ->
(0 <= h xs)%Z ->
exists zs ws, ys = zs ++ ws /\ h (xs ++ zs) = 0%Z.

Hypothesis h_2_split : forall w,
h w = 2%Z ->
(forall v, v ≺ w -> h v <> 1%Z) ->
exists x y, h x = 0%Z /\ h y = 0%Z /\ w = x ++ [b] ++ y.

Theorem L_sup w :
h w = 0%Z ->
letters w ⊆ Pair a b ->
L w.
Proof.
(* todo *)

End L_sup.

(** The final theorem *)

Theorem final : L = M.
Proof.
(* todo *)

### Definitions File

```theory Defs
imports Main
begin

section ‹Grammars and Languages›

text ‹‹#w⇘c⇙› denotes the number of times the character ‹c› occurs in word ‹w››
abbreviation count :: "'a list ⇒ 'a ⇒ nat" ("#_⇘_⇙") where
"count xs x ≡ count_list xs x"

abbreviation
"a ≡ CHR ''a''"

abbreviation
"b ≡ CHR ''b''"

definition prefix :: "'a list ⇒ 'a list ⇒ bool" (infix "≺" 50) where
"prefix xs ys ≡ ∃ zs. ys = xs @ zs ∧ zs ≠ []"

section ‹Problem statement›

text ‹Consider the following grammar:›
inductive_set L :: "string set" where
"[] ∈ L"
| "x @ y ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ a # y @ [b] ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ b # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
| "b # x @ a # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
text ‹Q: Can we delete any of these rules?›

definition
"M = {w. set w ⊆ {a, b} ∧ #w⇘a⇙ = 2 * #w⇘b⇙}"

text ‹
We want to prove that the grammar exactly produces the language of all words that contain
twice as many a's than b's:›
lemma
"L = M"
oops

text ‹The height function›
definition
"h w = 2 * int (#w⇘b⇙) - int (#w⇘a⇙)"

end```

### Template File

```theory Submission
imports Defs
begin

theorem L_sub: "L ⊆ M"
sorry

theorem h_first_zero:
assumes "h (xs @ ys) < 0" "h xs ≥ 0"
shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
sorry

theorem h_2_split:
assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
sorry

theorem L_sup:
assumes h_first_zero:
"⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
assumes h_2_split:
"⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
⟹ t"
assumes "h w = 0" "set w ⊆ {a, b}"
shows "w ∈ L"
sorry

text ‹The final theorem›
corollary
"L = M"
using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end```

### Check File

```theory Check
imports Submission
begin

theorem L_sub: "L ⊆ M"
by (rule Submission.L_sub)

theorem h_first_zero:
assumes "h (xs @ ys) < 0" "h xs ≥ 0"
shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
using assms by (rule Submission.h_first_zero)

theorem h_2_split:
assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
using assms by (rule Submission.h_2_split)

theorem L_sup:
assumes h_first_zero:
"⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
assumes h_2_split:
"⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
⟹ t"
assumes "h w = 0" "set w ⊆ {a, b}"
shows "w ∈ L"
using assms by (rule Submission.L_sup)

text ‹The final theorem›
corollary
"L = M"
using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end```

### Definitions File

```(in-package "ACL2")

(defun ab-listp (lst)
(if (endp lst) t
(and (member (car lst) '(a b))
(ab-listp (cdr lst)))))

(defun M (lst)
(and (ab-listp lst)
(equal (* 2 (count 'b lst))
(count 'a lst))))

(defun zip-append (lst)
(if (endp lst) nil
(cons (append (caar lst) (cdar lst)) (zip-append (cdr lst)))
)
)

(defun zip-3 (l1 l2 l3 lst)
(if (endp lst) nil
(cons (append (LIST l1) (caar lst) (LIST l2) (cdar lst) (LIST l3))
(zip-3 l1 l2 l3 (cdr lst)))
)
)

(defun cons-all (hd l)
(if (endp l) nil
(cons (cons hd (car l)) (cons-all hd (cdr l)))
)
)

(defun cart-prod (l1 l2)
(if (endp l1) nil
(append (cons-all (car l1) l2)
(cart-prod (cdr l1) l2))
)
)

(mutual-recursion
; generate words of length n
(defun gen-L (n)
(declare (xargs :measure (acl2-count (+ (* n 2) 1))))
(if (zp n) '(nil)
(append (zip-append (combine-L (1- n) 1))
(zip-3 'a 'a 'b (combine-L (- n 3) 0))
(zip-3 'a 'b 'a (combine-L (- n 3) 0))
(zip-3 'b 'a 'a (combine-L (- n 3) 0)))
))
(defun combine-L (n m)
(declare (xargs :measure (acl2-count (+ (* n 2) 2))))
(if (OR (not (natp n)) (not (natp m)) (> m n)) nil
(append (cart-prod (gen-L n) (gen-L m))
(cart-prod (gen-L m) (gen-L n))
(combine-L (1- n) (1+ m)))
))
)```

### Template File

```(in-package "ACL2")

(include-book "Defs")

(defthm length-L
(equal (member w (gen-L (len w)))
(M w)))```

### Check File

```; The four lines just below are boilerplate, that is, the same for every
; problem.

(in-package "ACL2")
(include-book "Submission")
(set-enforce-redundancy t)
(include-book "Defs")

; The events below represent the theorem to be proved, and are copied from
; template.lisp.

(defthm length-L
(equal (member w (gen-L (len w)))
(M w)))```

Terms and Conditions