\documentclass{article}
\input{6840-preamble}
\usepackage{qtree}
\begin{document}
\psetnum{1}
\date{2004/09/22}

\begin{pset}

  \begin{problem}
    Let DROP-OUT$(A)$ be the language consisting of all the strings
    that can be created by removing one symbol from a string in
    $A$. We show that the class of regular languages is closed under
    this operation.

    Let $L$ be a regular language and $M$ be the DFA that recognizes
    $L$. We show how to construct a NFA that recognizes DROP-OUT$(L)$.
    To do so, we construct a new machine $M'$ that contains two
    ``copies'' of $M$ (we refer to them $M_1$ and $M_2$, though they
    are actually parts of a larger machine rather than separate
    machines). The start state of $M'$ will be the start state of
    $M_1$, and the accept states will be the accept states of
    $M_2$. $M_1$ and $M_2$ will each contain internal edges: replicas
    of the edges in $M$. In addition, we add additional edges crossing
    from $M_1$ to $M_2$: suppose $a$ is a state in $M$ and $a_1$ and
    $a_2$ are the corresponding states in $M_1$ and $M_2$
    respectively, and similarly $b$, $b_1$, and $b_2$. Then if there
    is a transition from $a$ to $b$ in $M$, we add an
    $\epsilon$-transition from $a_1$ to $b_2$. Thus $M_1$ corresponds
    to the states before the removed symbol, $M_2$ corresponds to the
    states after the removed symbol, and jumping between them across
    the $\epsilon$-transition corresponds to skipping the removed
    symbol. A picture follows.
    \vfil
    More formally, let $M = (Q, \Sigma, \delta, q, F)$, and define $M'
    = (Q', \Sigma', \delta', q', F')$.
    \begin{align*}
      Q' &= \cset{r_1, r_2}{r \in Q} \\
      \Sigma' &= \Sigma \\
      q' &= q_1 \\
      F' &= \cset{f_2}{f \in F}\\
    \end{align*}
    The transition function $\delta'$ contains three classes of
    transitions:
    \begin{enumerate}
    \item from $a_1$ to $b_1$ with symbol $\sigma$ if $\delta(a,
      \sigma) = b$
    \item from $a_2$ to $b_2$ with symbol $\sigma$ if $\delta(a,
      \sigma) = b$
    \item from $a_1$ to $b_2$ with the empty string if there exists a
      $\sigma$ such that $\delta(a, \sigma) = b$.
    \end{enumerate}
  \end{problem}

  \begin{problem}
    \begin{subproblem}
      Let $N$ be a NFA with $n$ states, with $L(N) \neq \emptyset$.
      Then there is some string $w$ in $L(N)$. If $w$ has length less
      than $n$, then we are done, so assume it has length $\abs{w} \ge
      n$. Observe that the fact that $w$ is accepted by $N$ indicates
      that there is a path $P$ of at least $\abs{w}$ transitions from
      the start state of $N$ to an accept state (it may have more than
      $\abs{w}$ transitions because of $\epsilon$-transitions). So it
      passes through $\abs{w}+1$ states, including the start
      and end states. But there are only $n$ states in the machine, so
      by the pigeonhole principle, there must be a cycle in the
      graph. This cycle can be removed (repeatedly if necessary) to
      produce a path from the start state to the accept state in $n-1$
      transitions. This corresponds to a string of length at most
      $n-1$ that is accepted, and therefore in $L(N)$.
    \end{subproblem}

    \begin{subproblem}
      Consider the NFA $N$ that consists of only one state, which is
      both the start state and an accept state, and no
      transitions. This NFA accepts the empty string, since the start
      state is an accept state, but does not accept any non-empty
      strings since there are no transitions from the state.
    \end{subproblem}

    \begin{subproblem}
      Let $N$ be an NFA with $n$ states, and define $f(n) = 2^n$. We
      show that if $\overline{L(N)} \neq \emptyset$ then $w \in
      \overline{L(N)}$ for some $w$ with length less than
      $f(n)$.\footnote{This bound may not be tight, but it's an upper
        bound, and it looks like a ``reasonable function'' to me.}
      Assume $\overline{L(N)} \neq \emptyset$, i.e. that there is a
      string $x$ that is not accepted by $N$. We show that a string
      $w$ can be produced with length less than $2^n$.

      Convert $N$ to the equivalent DFA, $M$. $M$ has at most $2^n$
      states. $x$ is not accepted by $N$, so it is not accepted by $M$
      either, so there is a path of transitions in $M$ that leads to a
      reject state. As in part a, we remove cycles from this path of
      transitions in order to create a string of length less than
      $2^n$ (the number of states in $M$) that leads to the same
      reject state.
    \end{subproblem}
  \end{problem}

  \begin{problem}
    Consider the language $L = \cset{wxw}{w,x \in \bit^+}$. We show by
    contradiction that $L$ is not regular. Assume the contrary. Then
    by the pumping lemma, there exists a pumping length $p$. Consider
    the string $a = 0^{p}100^{p}1$. Note that $a \in L$. By the
    pumping lemma, $a = xyz$, $\forall i \ge 0, xy^iz \in L$, $\abs{y}
    > 0$, and $\abs{xy} \le p$. By the last condition, $y$ is located
    in the initial $p$ zeros. Therefore, $a$ can be pumped up to
    achieve the string $a' = 0^{p+k}100^{p}1$ for any $k > 0$, which
    will be in $L$. Arbitrarily, take $k=2$. So $a' = xwx$ for some
    non-empty $x$ and $w$. Since $a'$ ends with a 1, and the last
    character in $a$ must be the last character in $x$, $x$ must end
    with a 1. Therefore, $x$ must be the string $0^{p+2}1$, since this
    is the first prefix of $a'$ that ends with a 1. But then the
    remaining part of the string following this prefix is $0^{p+1}1$,
    and this must be equal to $yx$, which is a contradiction. So $L$
    is not regular.
  \end{problem}

  \begin{problem}
    Let $E = \cset{\texttt{a}^i\texttt{b}^j}{i \neq j, 2i \neq j}$.
    We show that $E$ is a context-free language.

    Note that $E$ can be expressed as the union of three languages:
    \begin{enumerate}
    \item $\cset{\texttt{a}^i\texttt{b}^j}{i < \frac{j}{2}}$
    \item $\cset{\texttt{a}^i\texttt{b}^j}{\frac{j}{2} < i < j}$
    \item $\cset{\texttt{a}^i\texttt{b}^j}{i > j}$
    \end{enumerate}

    The first and last languages are clearly context-free; the second
    is more subtle. Note that if $i = x$ and $j = y$ satisfies the
    inequality $\frac{j}{2} < i < j $, then $i = x+1$ and $j = y+2$
    also satisfies the inequality, and likewise $i = x+2$ and $j =
    y+2$. These recurrences, combined with the base cases $i = 2, j =
    3$ and $i = 3, j = 4$ completely characterize the set of integers
    that satisfies this inequality. The proof is by induction.

    So $E$ can be expressed as a context-free grammar:

    \begin{itemize}
    \item $A \leftarrow \emptystring \;|\; \mathtt{a} A$
    \item $B \leftarrow \emptystring \;|\; \mathtt{b} B$
    \item $C \leftarrow \emptystring \;|\; \mathtt{a} C \mathtt{b}$
    \item $D \leftarrow \emptystring \;|\; \mathtt{a} D \mathtt{bb}$
    \item $F \leftarrow \mathtt{aabbb} \;|\; \mathtt{aaabbbb} \;|\;
      \mathtt{a}F\mathtt{bb} \;|\; \mathtt{aa}F\mathtt{bb}$
    \item $E \leftarrow DB\mathtt{b} \;|\; F \;|\; \mathtt{a}AC$
    \end{itemize}

    $F$ corresponds to the recurrence explained above; the three terms
    of $E$ correspond to the three component languages enumerated above.
  \end{problem}

  \begin{problem}
    \begin{subproblem}
      $G$ is ambiguous. The following string has two possible parse
      trees:

      \texttt{if condition then if condition then a:=1 else a:=1}

      
      \Tree[.STMT
        [.IF-THEN-ELSE
          if-condition-then
          [.IF-THEN
            if
            condition
            then
            [.STMT
              [.ASSIGN
              a:=1 ] ] ]
          else
          [.STMT
            [.ASSIGN
              a:=1 ] ] ] ]

       \Tree
       [.STMT
         [.IF-THEN
           if-condition-then
           [.STMT
             [.IF-THEN-ELSE
               if-condition-then
               [.STMT
                 [.ASSIGN
                   a:=1 ] ]
               else
               [.STMT
                 [.ASSIGN
                   a:=1 ] ] ] ] ] ]
            
    \end{subproblem}
    \newpage

    \begin{subproblem}
      The following grammar is unambiguous and defines the same
      language:
      \begin{itemize}
      \item STMT $\rightarrow$ A $\vert$ B
      \item A $\rightarrow$ ASSIGN $\vert$ \texttt{if condition then} A
        \texttt{ else } A
      \item B $\rightarrow$ \texttt{if condition then } STMT $\vert$
        \texttt{if condition then } A \texttt{ else } B
      \item ASSIGN $\rightarrow$ \texttt{a:=1}
      \end{itemize}

      A represents subexpressions in which every \texttt{if} statement
      has an \texttt{else} clause, while B represents those where at
      least one \texttt{else} is missing.
    \end{subproblem}
  \end{problem}
\end{pset}

\end{document}
