\documentclass[12pt]{article} \usepackage{amsfonts} %\documentstyle[12pt,amsfonts]{article} %\documentstyle{article} \setlength{\topmargin}{-.5in} \setlength{\oddsidemargin}{0 in} \setlength{\evensidemargin}{0 in} \setlength{\textwidth}{6.5truein} \setlength{\textheight}{8.5truein} %\input ../basicmath/basicmathmac.tex % %\input ../adgeomcs/lamacb.tex \input ../adgeomcs/mac.tex \input ../adgeomcs/mathmac.tex \def\Hom#1#2#3{\mathrm{Hom}_{#1}(#2, #3)} \def\fseq#1#2{(#1_{#2})_{#2\geq 1}} \def\fsseq#1#2#3{(#1_{#3(#2)})_{#2\geq 1}} \def\qleq{\sqsubseteq} \def\buildrell#1\over#2{\mathrel{\mathop{\null#1}\limits_{#2}}} \def\rmder#1{{\ \buildrell \Longrightarrow \over{rm}}^{#1}\ } \def\lmder#1{\buildrel #1\over{\buildrell \Longrightarrow \over{lm}}} % \begin{document} \begin{center} \fbox{{\Large\bf Spring, 2009 \hspace*{0.4cm} CIS 511}}\\ \vspace{1cm} {\Large\bf Introduction to the Theory of Computation\\ \vspace{0.5cm} Homework 5}\\[10pt] March 31, 2009; Due April 14, beginning of class\\ \end{center} \vspace {0.25cm} ``A problems'' are for practice only, and should not be turned in. \vspace {0.25cm}\noindent {\bf Problem A1.} Give constructions proving that for any PDA $M$, the acceptance modes $T(M)$, $N(M)$, and $L(M)$, are equivalent. \vspace {0.25cm} \noindent {\bf Problem A2.} (i) Given any set $X$, for any two subsets $A, B\subseteq X$, prove that $A \subseteq B$ iff $A\cap \ptb{B} = \emptyset$, where $\ptb{B}$ denotes the complement of $B$ in $X$. \medskip (ii) Sketch an algorithm to test whether $L(G) \subseteq L(D)$, where $G$ is a context-free grammar and $D$ is a DFA. \vspace {0.25cm} \noindent {\bf Problem A3.} Prove that if $L \subseteq \Sigma^*$ is a context-free language, then its reversal, $L^R = \{w^R\in \Sigma^* \mid w \in \}$, is also context-free. \vspace {0.5cm} ``B problems'' must be turned in. \vspace {0.25cm} \noindent {\bf Problem B1 (40 pts).} Use the pumping lemma (or Ogden's lemma) to show that the following languages are not context-free: \medskip $L_{1} = \{a^mb^nc^p\ | \ 1 \leq m < n < p\}$ \medskip $L_{2} = \{a^nb^nc^p\ | \ n,p \geq 1, p \not= n\}$ \medskip\noindent {\it Hint\/}. For $L_{1}$, consider $a^mb^nc^p$ with $m$ large, and let the $a$'s be distinguished. For $L_{2}$, let $k$ be the integer of Ogden's lemma. Let $p=k!$, and consider $a^{2p}b^{2p}c^{p}$, with the $c$'s distinguished. \vspace{0.25cm}\noindent {\bf Problem B2 (20 pts).} Given any two alphabets $\Sigma, \Delta$, a {\it substitution\/} is a function $\mapdef{\tau}{\Sigma}{2^{\Delta^*}}$ assigning some language $\tau(a) \subseteq \Delta^*$ to every symbol $a\in\Sigma$. A substitution $\mapdef{\tau}{\Sigma}{2^{\Delta^*}}$ is extended to a map $\mapdef{\tau}{2^{\Sigma^*}}{2^{\Delta^*}}$ by first extending $\tau$ to strings using the following definition $$\eqaligneno{ \tau(\epsilon) &= \{\epsilon\},\cr \tau(ua) &= \tau(u)\tau(a),\cr }$$ where $u\in \Sigma^*$ and $a\in\Sigma$, and then to languages by letting $$\tau(L) = \bigcup_{w\in L} \tau(w),$$ for every language $L\subseteq \Sigma^*$. \medskip For example, let $\mapdef{\tau}{\Sigma}{2^{\Sigma^*}}$ be the substitution defined such that $\tau(a) = \{\epsilon, a\}$ for every $a\in \Sigma$. Explain (in words) what $\tau(L)$ is. \medskip In general, prove that if $L$ is a context-free language and if $\tau(a)$ is a context-free language for every $a\in \Sigma$, then $\tau(L)$ is also a context-free language. \vspace{0.25cm}\noindent {\bf Problem B3 (50 pts).} Let $\mapdef{h}{\Sigma^*}{\Delta^*}$ be a homomorphism, and let $L\subseteq \Delta^*$. Assume that $\epsilon\notin L$. \medskip (i) Define $\Omega, \Gamma\subseteq \Sigma$ as follows: \begin{eqnarray*} \Omega &=& \{a\in \Sigma\mid h(a) \not= \epsilon\},\\ \Gamma &=& \{a\in \Sigma\mid h(a) = \epsilon\}. \end{eqnarray*} Let $\ptb{\Omega}$ be the new set of symbols $$\ptb{\Omega} = \{\ptb{a} \mid a\in \Omega\},$$ and let $E$ be a new symbol. \medskip Let $\tau_1$ be the substitution on $\Delta$ defined such that $$\tau_1(a) = (\ptb{\Omega}\cup\{E\})^*\{a\}(\ptb{\Omega}\cup\{E\})^*,$$ for all $a\in \Delta$, and let \[R = \left(\{\ptb{a} h(a)\mid a\in \Omega\}\cup \{E\}\right)^*\] and \[L_2 = \tau_1(L) \cap R.\] Prove that $$L_2\cap \left(\ptb{\Omega}\cup \{h(a) \mid a\in\Omega\}\right)^* =\{\ptb{a_1}h(a_1) \ptb{a_2}h(a_2)\cdots \ptb{a_n}h(a_n)\mid a_i\in \Omega,\> h(a_1a_2\cdots a_n)\in L\}.$$ \medskip (ii) Let $\mapdef{g}{\left(\Delta\cup\ptb{\Omega}\cup\{E\}\right)^*} {\left(\Sigma\cup \{E\}\right)^*}$ be the homomorphism defined such that \begin{eqnarray*} g(a) &=& \epsilon\quad \hbox{if $a\in\Delta$},\\ g(\ptb{a}) &=& a\quad \hbox{if $a\in\Omega$},\\ g(E) &=& E, \end{eqnarray*} let \[L_3 = g(L_2),\] and and let $\tau_2$ be the substitution on $\Sigma\cup\{E\}$ defined such that \begin{eqnarray*} \tau_2(a) &=& \{a\}\quad \hbox{if $a\in\Sigma$},\\ \tau_2(E) &=& \Gamma^+. \end{eqnarray*} \medskip Observe that for every $w\in L$, if $h^{-1}(w)\not=\emptyset$, then there is some $y\in L_3$ such that $h(y) = w$, and that every $y\in L_3$ is in $h^{-1}(L)$ after the occurrences of $E$ have been erased. \medskip Let $\s{L}$ be a family of languages that is closed under substitution by regular languages, intersection with regular languages, and union with regular languages. Prove that for every $L\in \s{L}$, if $\epsilon\notin L$, then $$\tau_2(g(\tau_1(L)\cap R)) = h^{-1}(L).$$ Prove that if $\epsilon\in L$, then $$h^{-1}(L) = \tau_2(g(\tau_1(L - \{\epsilon\})\cap R)) \cup \Gamma^*.$$ Conclude that $\s{L}$ is closed under inverse homomorphisms. \medskip (iii) Prove that if $L$ is context-free, then so is $$h^{-1}(L) = \{w\in \Sigma^*\mid h(w) \in L\}.$$ {\bf Do not\/} give a proof based on PDA's! \vspace {0.25cm} \noindent {\bf Problem B4 (50 pts).} Give pushdown automata for the following languages: \medskip (a) $L_{5} = \{ww^{R}\mid w \in \{a,b\}^{*}\}$\ ($w^{R}$ denotes the reversal of $w$) \medskip (b) $L_{6} = \{a^{m}b^{n}\mid 1 \leq m \leq n \leq 3m\}$ \medskip (c) $L_{7} = \{a^{n}b^{n}\mid n \geq 1\}\cup \{a^{2n}b^{n}\mid n \geq 1\}$ \medskip (d) $L_{8} = \{a^{2m}b^{n}a^{2m}b^{p}\mid m, n, p \geq 1\} \cup \{a^{m}b^{3n}a^{p}b^{3n}\mid m, n, p \geq 1\}$ \medskip (e) $L_9 = \{xcy \mid |x| = |y|,\, x, y\in \{a, b\}^*\}$ \medskip In each case, give a brief justification of the fact that your PDA generates the desired language. \vspace {0.25cm} \noindent {\bf Problem B5 (50 pts).} Let $L\subseteq \{a\}^*$ be a context-free language. Prove that $L$ is actually a regular language. Proceed as follows. If $L$ is finite, this is obvious, thus, assume that $L$ is infinite. Let $L = L(G)$, for some CFG $G$. \medskip (i) Let $K > 1$ be the constant of the pumping lemma for $G$, and let $r = K!$. Prove the following fact: for every $w\in L$, if $|w| \geq K$, then $$\{w a^{rn} \mid n \geq 0\} \subseteq L.$$ \medskip (ii) For every $i$ such that $0 \leq i < r$, let $$L_i = \{a^n \mid a^n\in L,\, n \geq K,\, n \equiv i \bmod r\}.$$ Clearly, $$L = \{a^n \mid a^n \in L,\, n < K\} \cup \bigcup_{i = 0}^{r-1} L_i.$$ If $L_i\not=\emptyset$, let $z_i$ be the shortest string in $L_i$. Prove that $$L_i = \{z_i a^{rm}\ | \ m \geq 0\}.$$ Conclude that $L$ is regular. \medskip (iii) Prove that it is decidable whether $L_i = \emptyset$. \medskip (iv) Given a context-free language $L$ over $\{a, b\}$, prove that it is decidable whether \\ $\{a\}^* \subseteq L$. \vspace {0.25cm}\noindent {\bf Problem B6 (30 pts).} (1) Given the alphabet $\Sigma_2 = \{a,b,\overline a, \overline b\}$, define the relation $\simeq$ on $\Sigma_2^{*}$ as follows: For all $u, v \in \Sigma_2^{*}$, $$u \simeq v\quad\hbox{iff}\quad \exists x, y \in \Sigma_2^{*},\quad u = x a\overline a y,\quad v=xy\quad\hbox{or}\quad u = x b\overline b y,\quad v=xy.$$ \medskip Let $\simeq^{*}$ be the reflexive and transitive closure of $\simeq$, and let $D_{2}= \{w \in \Sigma_2^{*}\mid w \simeq^{*} \epsilon\}$. Give a context-free grammar for $D_{2}$, and justify your answer. \medskip\noindent {\it Note\/}: Strings such as $a \overline a b \overline b$ and $ab\overline b \overline a$ are in $D_{2}$. \medskip (2) Given the alphabet $\Sigma_m = \{a_1, \ldots, a_m, \overline{a_1}, \ldots, \overline{a_m}\}$, define the relation $\simeq$ on $\Sigma_m^{*}$ as follows: For all $u, v \in \Sigma_m^{*}$, $$u \simeq v\quad\hbox{iff}\quad \exists x, y \in \Sigma_m^{*},\quad u = x a_i\overline{a_i} y,\quad v=xy, \quad\hbox{for some $i$, $1\leq i\leq m$.}$$ \medskip Let $\simeq^{*}$ be the reflexive and transitive closure of $\simeq$, and let $D_{m}= \{w \in \Sigma_m^{*}\mid w \simeq^{*} \epsilon\}$. Give a context-free grammar for $D_{m}$, and prove that your grammar is correct {\it rigorously\/}. \medskip\noindent {\it Note\/}: $D_m$ is know as the {\it Dyck set\/} on $m$ letters. \vspace{0.25cm}\noindent {\bf TOTAL: 240 points.} \end{document} \vspace {0.25cm} \noindent {\bf Problem B7 (80 pts).} In this problem, the fundamental property of LR-parsing (due to Knuth) is established. % % \def\rmder#1{\ \hbox{\raise-5pt\hbox{$\buildrel \Longrightarrow \over {\scriptstyle rm}$}}^{#1}\ } \medskip For simplicity, let us consider context-free grammars without $\epsilon$-rules. Given a reduced context-free grammar $G = (V,\Sigma ,P,S')$ augmented with start production $S' \rightarrow S$, where $S'$ does not appear in any other productions, the set $C_{G}$ of {\it characteristic strings of G\/} is the following subset of $V^{*}$ (watch out, not $\Sigma ^{*}$): $$\displaylignes{ \hfill C_{G}=\{\alpha\beta \in V^{*}\mid S' \rmder{*} \alpha Bv \rmder{} \alpha\beta v,\hfill\cr \hfill\qquad\quad\alpha, \beta \in V^{*}, v \in \Sigma^{*}, B \rightarrow \beta \in P \}.\hfill\cr}$$ \medskip In words, $C_{G}$ is a certain set of prefixes of sentential forms obtained in rightmost derivations: those obtained by truncating the part of the sentential form immediately following the rightmost symbol in the righthand side of the production applied at the last step. \medskip The fundamental property of LR-parsing is that $C_{G}$ is a {\it regular language\/}. A nondeterministic automaton $N_{C_{G}}$ accepting $C_{G}$ can be constructed according to the method described in Section 1 of the handout {\sl A Survey of $LR$-Parsing Methods, etc.\/}. Please, review this construction. \medskip (i) Let $G$ be the following grammar: \begin{eqnarray*} S' &\rightarrow& E\\ E &\rightarrow& E + T \mid T\\ T &\rightarrow& T * F \mid F\\ F &\rightarrow& (E) \mid a \end{eqnarray*} with $\Sigma =\{+, *, (, ), a\}$. \medskip Give the automaton $N_{C_{G}}$ for the grammar $G$. \medskip (ii) Using the standard algorithms, give a deterministic finite automaton equivalent to $N_{C_{G}}$. Do not include the ``dead state''. \medskip (iii) You shall now prove that $L(N_{C_{G}}) = C_{G}$! This proves the correctness of the method that you are going to implement in the programming project! \medskip (1) Prove the following claim by induction on the length of rightmost derivations: \medskip {\it Claim\/} 1: For any nonterminal $A$, for every rightmost derivation \[A \rmder{*} \alpha Bv \rmder{} \alpha \beta v,\] where $v \in \Sigma ^{*}$, $B \in N$, and $\alpha, \beta\in V^{*}$, if we denote the first production in the above rightmost derivation as $A \rightarrow \delta$, then there is a computation on input $\alpha\beta$ from state $A \rightarrow \hbox{``.''}\delta$ to the final state $B \rightarrow \beta\hbox{``.''}$. \medskip To prove this claim, you will have to show the following (think about it in terms of parse trees). For any nonterminal $A$, every rightmost derivation from $A$ is either of the form \begin{enumerate} \item[(i)] $A \rmder{} \delta$, for some production $A \rightarrow \delta$, in which case $A = B$ and $\delta = \beta$, or of the form \item[(ii)] $A \rmder{} \lambda B_{i}\rho \rmder{*} \lambda B_{i}w \rmder{*} \lambda \alpha _{i}Bw_{i}w \rmder{} \lambda \alpha _{i}\beta w_{i}w$, with $w,w_{i} \in \Sigma ^{*}$, $A,B,B_{i} \in N$, $\lambda, \rho, \alpha _{i}, \beta\in V^{*}$, and where $$B_{i} \rmder{*} \alpha _{i}Bw_{i} \rmder{} \alpha _{i}\beta w_{i}\quad\hbox{and}\quad \rho \rmder{*} w.$$ \end{enumerate} Let $B_{i} \rightarrow \delta_{i}$ be the first production applied in the rightmost derivation from $B_{i}$. In the first case, there is a computation in $N_{C_{G}}$ from state $A \rightarrow \hbox{``.''}\delta$ to the final state $A \rightarrow \delta \hbox{``.''}$ (where again, $A \rightarrow \delta = B \rightarrow \beta$), and in the second case, there is a computation in $N_{C_{G}}$ from state $A \rightarrow \hbox{``.''}\lambda B_{i}\rho$ to $B_{i} \rightarrow \hbox{``.''}\delta _{i}$ on input $\lambda$, and a computation from state $B_{i} \rightarrow \hbox{``.''}\delta _{i}$ to the final state $B \rightarrow \beta\hbox{``.''}$ on input $\alpha _{i}\beta $. \medskip Conclude that $C_{G}$ is a subset of $L(N_{C_{G}})$. \medskip (2) Prove the following claim by induction on the number of $\epsilon$-transitions in a computation in $N_{C_{G}}$: \medskip {\it Claim\/} 2: For any state $A \rightarrow \hbox{``.''}\delta$, if there is a computation on input $\gamma$ to some final state $B \rightarrow \beta\hbox{``.''}$, then there is some rightmost derivation $A \rmder{*} \alpha Bv \rmder{} \alpha \beta v$, such that, the production applied in the first rightmost derivation step is $A \rightarrow \delta$, and $\gamma=\alpha\beta$. \medskip For this, prove the following: \begin{enumerate} \item[(i)] Either $\gamma = \delta$ and the computation is from state $A \rightarrow \hbox{``.''}\delta$ to state $A \rightarrow \delta \hbox{``.''}$, or \item[(ii)] $\delta$ is of the form $\lambda B_{i}\rho$, $\gamma$ is of the form $\lambda \alpha _{i}\beta$, and there is a computation on input $\alpha _{i}\beta$ from some state of the form $B_{i} \rightarrow \hbox{``.''}\delta _{i}$ to the final state $B \rightarrow \beta\hbox{``.''}$, and a rightmost derivation as in Claim 1. \end{enumerate} Conclude that $L(N_{C_{G}})$ is a subset of $C_{G}$, thus establishing that $C_{G} = L(N_{C_{G}})$.