\documentclass[12pt]{article} \usepackage{amsfonts} %\documentstyle[12pt,amsfonts]{article} %\documentstyle{article} \setlength{\topmargin}{-.5in} \setlength{\oddsidemargin}{0 in} \setlength{\evensidemargin}{0 in} \setlength{\textwidth}{6.5truein} \setlength{\textheight}{8.5truein} %\input ../basicmath/basicmathmac.tex % %\input ../adgeomcs/lamacb.tex \input ../adgeomcs/mac.tex \input ../adgeomcs/mathmac.tex \def\fseq#1#2{(#1_{#2})_{#2\geq 1}} \def\fsseq#1#2#3{(#1_{#3(#2)})_{#2\geq 1}} \def\qleq{\sqsubseteq} \def\buildrell#1\over#2{\mathrel{\mathop{\null#1}\limits_{#2}}} \def\rmder#1{{\ \buildrell \Longrightarrow \over{rm}}^{#1}\ } \def\lmder#1{\buildrel #1\over{\buildrell \Longrightarrow \over{lm}}} % \begin{document} \begin{center} \fbox{{\Large\bf Spring, 2004 \hspace*{0.4cm} CIS 511}}\\ \vspace{1cm} {\Large\bf Introduction to the Theory of Computation\\ Jean Gallier \\ \vspace{0.5cm} Homework 5}\\[10pt] March 25, 2004; Due April 8, 2004\\ \end{center} ``A problems'' are for practice only, and should not be turned in. \vspace {0.25cm}\noindent {\bf Problem A1.} Give constructions proving that for any PDA $M$, the acceptance modes $T(M)$, $N(M)$, and $L(M)$, are equivalent. \vspace {0.25cm}\noindent {\bf Problem A2.} (i) Given any set $X$, for any two subsets $A, B\subseteq X$, prove that $A \subseteq B$ iff $A\cap \ptb{B} = \emptyset$, where $\ptb{B}$ denotes the complement of $B$ in $X$. \medskip (ii) Sketch an algorithm to test whether $L(G) \subseteq L(D)$, where $G$ is a context-free grammar and $D$ is a DFA. \vspace {0.25cm}\noindent {\bf Problem A3.} Given a homomorphism $\mapdef{h}{\Sigma^*}{\Delta^*}$, for any any context-free language $L\subseteq \Delta^*$, prove that $h^{-1}(L)$ is context-free. \medskip\noindent {\it Hint\/}. Find a construction using a PDA. \vspace {0.5cm} ``B problems'' must be turned in. \vspace {0.25cm} \noindent {\bf Problem B1 (40 pts).} Use the pumping lemma (or Ogden's lemma) to show that the following languages are not context-free: \medskip $L_{1} = \{a^mb^nc^p\ | \ 1 \leq m < n < p\}$ \medskip $L_{2} = \{a^nb^nc^p\ | \ n,p \geq 1, p \not= n\}$ \medskip\noindent {\it Hint\/}. For $L_{1}$, pick $w$ with the $a$'s be distinguished. For $L_{2}$, pick $w$ with the $c$'s distinguished. \vspace {0.25cm} \noindent {\bf Problem B2 (50 pts).} Give pushdown automata for the following languages: \medskip (a) $L_{5} = \{ww^{R}\ |\ w \in \{a,b\}^{*}\}$\ ($w^{R}$ denotes the reversal of $w$) \medskip (b) $L_{6} = \{a^{m}b^{n}\ |\ 1 \leq m \leq n \leq 3m\}$ \medskip (c) $L_{7} = \{a^{n}b^{n}\ |\ n \geq 1\}\cup \{a^{2n}b^{n}\ |\ n \geq 1\}$ \medskip (d) $L_{8} = \{a^{2m}b^{n}a^{2m}b^{p}\ |\ m, n, p \geq 1\} \cup \{a^{m}b^{3n}a^{p}b^{3n}\ |\ m, n, p \geq 1\}$ \medskip (e) $L_9 = \{xcy \ |\ |x| = |y|,\, x, y\in \{a, b\}^*\}$ \medskip In each case, give a brief justification of the fact that your PDA generates the desired language. \vspace {0.25cm}\noindent {\bf Problem B3 (30 pts).} (1) Given the alphabet $\Sigma_2 = \{a,b,\overline a, \overline b\}$, define the relation $\simeq$ on $\Sigma_2^{*}$ as follows: For all $u, v \in \Sigma_2^{*}$, $$u \simeq v\quad\hbox{iff}\quad \exists x, y \in \Sigma_2^{*},\quad u = x a\overline a y,\quad v=xy\quad\hbox{or}\quad u = x b\overline b y,\quad v=xy.$$ \medskip Let $\simeq^{*}$ be the reflexive and transitive closure of $\simeq$, and let $D_{2}= \{w \in \Sigma_2^{*}\ |\ w \simeq^{*} \epsilon\}$. Give a context-free grammar for $D_{2}$, and justify your answer. \medskip\noindent {\it Note\/}: Strings such as $a \overline a b \overline b$ and $ab\overline b \overline a$ are in $D_{2}$. \medskip (2) Given the alphabet $\Sigma_m = \{a_1, \ldots, a_m, \overline{a_1}, \ldots, \overline{a_m}\}$, define the relation $\simeq$ on $\Sigma_m^{*}$ as follows: For all $u, v \in \Sigma_m^{*}$, $$u \simeq v\quad\hbox{iff}\quad \exists x, y \in \Sigma_m^{*},\quad u = x a_i\overline{a_i} y,\quad v=xy, \quad\hbox{for some $i$, $1\leq i\leq m$.}$$ \medskip Let $\simeq^{*}$ be the reflexive and transitive closure of $\simeq$, and let $D_{m}= \{w \in \Sigma_m^{*}\ |\ w \simeq^{*} \epsilon\}$. Give a context-free grammar for $D_{m}$, and justify your answer. \medskip\noindent {\it Note\/}: $D_m$ is know as the {\it Dyck set\/} on $m$ letters. \vspace {0.25cm}\noindent {\bf Problem B4 (60 pts).} A context-free grammar, $G=(V,\Sigma,P,S)$, is {\it linear\/} iff for every production $(A \rightarrow \alpha)\in P$, $$\alpha \in \Sigma^{*}N\Sigma^{*}\cup\Sigma^{*},$$ where $N = V-\Sigma$. A language, $L$, is a {\it linear context-free language\/} iff there is some linear context-free grammar, $G$, such that $L=L(G)$. \medskip (a) Recall that for any string, $w\in \Sigma^*$, the string $w^R$ denotes the reversal of the string $w$, i.e., the string $w$ written in reverse order ($\epsilon^R = \epsilon$). Prove that the language \\ $L_{0} = \{ww^{R}\mid w \in \{a,b\}^{*}\}$ is linear context-free. \medskip (b) Given any regular language, $L \subseteq \Sigma^*$, prove that $L^R = \{w^R \mid w\in L\}$ is also regular. \medskip (c) Let $G=(V,\Sigma,P,S)$ be a {\it linear context-free\/} grammar. Assume that the set of productions $P$ contains $p$ productions and that it is ordered in some fashion. Each production is named $p_{i}$, with $1 \leq i \leq p$. Let $\Delta= \{\delta_{1},\ldots,\delta_{p}\}$ be a set in one-to-one correspondence with the set of productions $P$ and assume that $\Delta$ is disjoint from $V$. The language $R \subseteq \Delta^{*}$ is defined as follows: $$\displaylignes{ R=\{\delta_{i_{1}}\cdots\delta_{i_{n}}\mid n \geq 1,\quad \hbox{there is a derivation}\ S \Longrightarrow \alpha_{1} \Longrightarrow\ \cdots\ \Longrightarrow \alpha_{n}, \hfill\cr \hfill \hbox{such that}\quad \alpha_{n} \in \Sigma^{*} \quad\hbox{and the production applied at the $k$-th step is}\ p_{i_{k}}\}. \cr }$$ Let $h_{1}, h_{2} \co \Delta^{*} \rightarrow \Sigma^{*}$ be the homomorphisms defined as follows: For any $p_{i} \in P$, \begin{enumerate} \item[(1)] If $p_{i}$ is of the form $A \rightarrow uBv$, where $A, B \in N$, and $u, v \in \Sigma^{*}$, then $h_{1}(\delta_{i})=u$ and $h_{2}(\delta_{i})=v$; \item[(2)] If $p_{i}$ is of the form $A \rightarrow w$, where $A\in N$ and $w \in \Sigma^{*}$, then choose any $u, v \in \Sigma^{*}$ such that $w=uv$, and let $h_{1}(\delta_{i})=u$ and $h_{2}(\delta_{i})=v$. \end{enumerate} Prove that for any $k$, with $1 \leq k \leq n-1$, $$\alpha_{k} = h_{1}(\delta_{i_{1}}\cdots\delta_{i_{k}})Bh_{2} (\delta_{i_{k}}\cdots\delta_{i_{1}}),$$ for some $B \in N$, and that $$\alpha_{n} = h_{1}(\delta_{i_{1}}\cdots\delta_{i_{n}})h_{2} (\delta_{i_{n}}\cdots\delta_{i_{1}}).$$ \medskip Conclude that $L(G)=\{h_{1}(w)h_{2}(w^{R})\mid w \in R\}$, for some language $R$ over $\Delta$. \medskip (d) Prove that the language $R$ defined in (c) is regular. %\medskip\noindent %{\it Hint\/}: Construct a right-linear grammar. \medskip (e) Given a linear context-free language, $L=L(G)$, from questions (c) and (d), we know that there is a regular language $R$ and two homomorphisms $h_{1}, h_{2}$, such that $$L=\{h_{1}(w)h_{2}(w^{R})\mid w \in R\}.$$ \medskip Let $\Omega=\{\omega_{1},\ldots,\omega_{p}\}$ be a set in one-to-one correspondence with $\Delta$ and such that $\Omega$ is disjoint from $V$ and $\Delta$. Let $f \co \Delta^{*} \rightarrow \Omega^{*}$ be the homomorphism determined by defining $f(\delta_{i})=\omega_{i}$, for all $i$, with $1 \leq i \leq p$. Let $S$ be the regular language \[ S = \{f(w)^{R}\mid w \in R\} = f(R)^R, \] where $R$ is the regular set of question (c). Also, let $g_{1} \co (\Delta \cup \Omega)^{*} \rightarrow \Sigma^{*}$ be the homomorphism determined by defining \[ g_{1}(\delta_{i})=h_{1}(\delta_{i}) \quad\hbox{and}\quad g_{1}(\omega_{i})=h_{2}(\delta_{i}). \] Finally, let $g_{2} \co (\Delta \cup \Omega)^{*} \rightarrow \{a,b\}^{*}$ be the homomorphism determined by defining \[ g_{2}(\delta_{i})=a^{i}b \quad\hbox{and}\quad g_{2}(\omega_{i})=ba^{i}, \] for all $i$, with $1 \leq i \leq p$. \medskip Prove that $$g_{2}^{-1}(L_{0})\cap RS = \{w_{1}w_{2}\mid w_{1} \in R,\quad w_{2} \in S,\quad\hbox{and}\quad f(w_{1})=w_{2}^{R}\},$$ where $L_{0}$ is the language of question (a), $R$ is defined in question (c), and $S$ is defined above in (e). From this, prove that $$L = g_{1}(g_{2}^{-1}(L_{0})\cap RS).$$ \medskip Hence, prove that every linear context-free language can be obtained from the language $L_{0} = \{ww^{R}\mid w \in \{a,b\}^{*}\}$ by homomorphism, inverse homomorphism, and intersection with regular languages. \medskip\noindent {\it Remark\/}: In fact, it can be shown that the family of linear context-free languages is the least class of languages with these properties. \medskip\noindent {\bf Extra Credit (50 pts).} Using (e), prove that the language $\{a^mb^ma^nb^n \mid m, n\geq 1\}$ is not linear-context-free. \medskip\noindent {\it Hint\/}. First, prove that if $\mapdef{h_i}{\Sigma}{\Delta_i}$ are homomorphisms ($i = 1, 2$), $c$ is a new symbol not in $\Sigma\cup \Delta_1\cup \Delta_2$, and $R\subseteq \Sigma^*$ is a regular language, then $\{h_1(w)ch_2(w^R) \mid w\in R\}$ is a linear context-free language. Then, use some suitable gsm mappings. This is hard! \vspace {0.25cm} \noindent {\bf Problem B5 (80 pts).} In this problem, the fundamental property of LR-parsing (due to Knuth) is established. % % \def\rmder#1{\ \hbox{\raise-5pt\hbox{$\buildrel \Longrightarrow \over {\scriptstyle rm}$}}^{#1}\ } \medskip For simplicity, let us consider context-free grammars without $\epsilon$-rules. Given a reduced context-free grammar $G = (V,\Sigma ,P,S')$ augmented with start production $S' \rightarrow S$, where $S'$ does not appear in any other productions, the set $C_{G}$ of {\it characteristic strings of G\/} is the following subset of $V^{*}$ (watch out, not $\Sigma ^{*}$): $$\displaylignes{ \hfill C_{G}=\{\alpha\beta \in V^{*}\ |\ S' \rmder{*} \alpha Bv \rmder{} \alpha\beta v,\hfill\cr \hfill\qquad\quad\alpha, \beta \in V^{*}, v \in \Sigma^{*}, B \rightarrow \beta \in P \}.\hfill\cr}$$ \medskip In words, $C_{G}$ is a certain set of prefixes of sentential forms obtained in rightmost derivations: those obtained by truncating the part of the sentential form immediately following the rightmost symbol in the righthand side of the production applied at the last step. \medskip The fundamental property of LR-parsing is that $C_{G}$ is a {\it regular language\/}. A nondeterministic automaton $N_{C_{G}}$ accepting $C_{G}$ can be constructed according to the method described in Section 1 of the handout {\sl A Survey of $LR$-Parsing Methods, etc.\/}. Please, review this construction. \medskip (i) Let $G$ be the following grammar: $$\eqaligneno{ S' &\rightarrow E\cr E &\rightarrow E + T\ |\ T\cr T &\rightarrow T * F \ |\ F\cr F &\rightarrow (E) \ |\ a\cr}$$ \indent with $\Sigma =\{+, *, (, ), a\}$. \medskip Give the automaton $N_{C_{G}}$ for the grammar $G$. \medskip (ii) Using the standard algorithms, give a deterministic finite automaton equivalent to $N_{C_{G}}$. Do not include the ``dead state''. \medskip (iii) You shall now prove that $L(N_{C_{G}}) = C_{G}$! This proves the correctness of the method that you are going to implement in the programming project! \medskip (1) Prove the following claim by induction on the length of rightmost derivations: \medskip {\it Claim\/} 1: For any nonterminal $A$, for every rightmost derivation \[A \rmder{*} \alpha Bv \rmder{} \alpha \beta v,\] where $v \in \Sigma ^{*}$, $B \in N$, and $\alpha, \beta\in V^{*}$, if we denote the first production in the above rightmost derivation as $A \rightarrow \delta$, then there is a computation on input $\alpha\beta$ from state $A \rightarrow \hbox{``.''}\delta$ to the final state $B \rightarrow \beta\hbox{``.''}$. \medskip To prove this claim, you will have to show the following (think about it in terms of parse trees). For any nonterminal $A$, every rightmost derivation from $A$ is either of the form \begin{enumerate} \item[(i)] $A \rmder{} \delta$, for some production $A \rightarrow \delta$, in which case $A = B$ and $\delta = \beta$, or of the form \item[(ii)] $A \rmder{} \lambda B_{i}\rho \rmder{*} \lambda B_{i}w \rmder{*} \lambda \alpha _{i}Bw_{i}w \rmder{} \lambda \alpha _{i}\beta w_{i}w$, with $w,w_{i} \in \Sigma ^{*}$, $A,B,B_{i} \in N$, $\lambda, \rho, \alpha _{i}, \beta\in V^{*}$, and where $$B_{i} \rmder{*} \alpha _{i}Bw_{i} \rmder{} \alpha _{i}\beta w_{i}\quad\hbox{and}\quad \rho \rmder{*} w.$$ \end{enumerate} Let $B_{i} \rightarrow \delta_{i}$ be the first production applied in the rightmost derivation from $B_{i}$. In the first case, there is a computation in $N_{C_{G}}$ from state $A \rightarrow \hbox{``.''}\delta$ to the final state $A \rightarrow \delta \hbox{``.''}$ (where again, $A \rightarrow \delta = B \rightarrow \beta$), and in the second case, there is a computation in $N_{C_{G}}$ from state $A \rightarrow \hbox{``.''}\lambda B_{i}\rho$ to $B_{i} \rightarrow \hbox{``.''}\delta _{i}$ on input $\lambda$, and a computation from state $B_{i} \rightarrow \hbox{``.''}\delta _{i}$ to the final state $B \rightarrow \beta\hbox{``.''}$ on input $\alpha _{i}\beta $. \medskip Conclude that $C_{G}$ is a subset of $L(N_{C_{G}})$. \medskip (2) Prove the following claim by induction on the number of $\epsilon$-transitions in a computation in $N_{C_{G}}$: \medskip {\it Claim\/} 2: For any state $A \rightarrow \hbox{``.''}\delta$, if there is a computation on input $\gamma$ to some final state $B \rightarrow \beta\hbox{``.''}$, then there is some rightmost derivation $A \rmder{*} \alpha Bv \rmder{} \alpha \beta v$, such that, the production applied in the first rightmost derivation step is $A \rightarrow \delta$, and $\gamma=\alpha\beta$. \medskip For this, prove the following: \begin{enumerate} \item[(i)] Either $\gamma = \delta$ and the computation is from state $A \rightarrow \hbox{``.''}\delta$ to state $A \rightarrow \delta \hbox{``.''}$, or \item[(ii)] $\delta$ is of the form $\lambda B_{i}\rho$, $\gamma$ is of the form $\lambda \alpha _{i}\beta$, and there is a computation on input $\alpha _{i}\beta$ from some state of the form $B_{i} \rightarrow \hbox{``.''}\delta _{i}$ to the final state $B \rightarrow \beta\hbox{``.''}$, and a rightmost derivation as in Claim 1. \end{enumerate} Conclude that $L(N_{C_{G}})$ is a subset of $C_{G}$, thus establishing that $C_{G} = L(N_{C_{G}})$. \vspace{0.5cm}\noindent {\bf TOTAL: 260 points $+$ 50 points.} \end{document}