\documentclass{article}
\input{6856-preamble}
\usepackage{clrscode}

\begin{document}
\psetnum{9}
\date{2005/04/13}
\collab{Collaborators: $\set{\mathtt{sarahl}}$}

\begin{pset}
  \begin{problem}
    We derandomize the randomized autopartition algorithm for
    constructing a binary planar partition using the method of
    conditional probabilities to deterministically obtain a partition
    of size $\bigO{n \log n}$.
    
    Let $X$ be the event that the size of the partition exceeds its
    expectation (which we express as $k n \log n$ for some constant
    $k$). Consider the stage in the algorithm where $i$ segments have
    already been chosen as partitions. Let $\pi_{1}, \ldots, \pi_i$ be
    the segments that have already been chosen. Then the next segment
    will be chosen uniformly at random from the remaining $n-i$. Let
    $P(a) = \Pr{X|a}$; then $P(a) = \frac{1}{n-i} \sum_{\alpha \in
      \set{\text{children of } a}} P(\alpha)$. which means that there
    exists some child $b$ of $a$ that has $P(b) \le P(a)$. Thus we can
    apply the method of conditional probabilities.
    
    However, we cannot easily compute $P(a)$. Instead, we give a
    pessimistic estimator $\hat{P}(a)$. Note that the size of the
    partition is $n$ plus the number of split edges. An edge $u$
    splits an edge $v$ if $v$ intersects the line $l(u)$ that is the
    extension of $u$ to the boundaries of the region, and there does
    not exist an edge $w$ such that $l(u)$ intersects $l(w)$ before it
    reaches $v$ and $w$ is selected before $v$. For our pessimistic
    estimator, we relax the latter constraint; now we only require
    that there does not exist an edge $w$ such that $l(u)$ intersects
    $w$ (rather than $l(w)$) before it reaches $v$ and $w$ is selected
    before $v$. This does not affect the analysis in Theorem 1.2 in
    \textsf{M\&R}, since it does not rely on this. Thus, $P(a) \le
    \hat{P}(a)$ because of the relaxation, but
    $\hat{P}(\mathtt{root})$ is strictly less than 1 because the
    analysis gives a non-zero probability that an autopartition of
    size less than $k \log n$ exists. Moreover, it is straightforward
    to see that $\min\set{\hat{P}(\alpha) | \alpha \in
      \set{\text{children of } a}} \le \hat{P}(a)$
    
    We simply need to show that $\hat{P}$ is computable in polynomial
    time. We can do so by analyzing the expected number of split edges
    due to the assignments forced by a node $a$. As in the book,
    define $C_{u,v}$ to be the indicator variable if $u \dashv v$,
    i.e. $l(u)$ cuts $v$. Suppose $v$ is the $j$th segment intersected
    by $l(u)$. Let $u_{1}, u_2, \ldots, u_{j-1}$ be the segments that
    $l(u)$ intersects before $v$. Consider an arbitrary $u$ and $v$.
    If $u$ has already been selected (it is in the
    $\pi_1,\ldots,\pi_i$), and it was selected before $v$ and all of
    the $u_1, \ldots u_{j-1}$, then $v$ is known to be cut by $u$, and
    so $\Pr{C_{u,v}} = 1$; similarly, if $v$ or one of the $u_1,
    \ldots u_{j-1}$ was selected in $\pi_1,\ldots,\pi_i$ before $u$
    (including the case in which $u$ has not been selected yet), then
    $v$ is known to not be cut by $u$, and $\Pr{C_{u,v}} = 0$. These
    can certainly be calculated by computing the intersections of
    segments and scanning over the lists. So the only remaining case
    is when neither $u$, $v$, or any of the $u_1, \ldots u_{j-1}$ has
    been selected. But in this case $\Pr{C_{u,v}}$ is simply
    $\frac{1}{j+1}$ since $v$ is only cut by $u$ if $u$ chosen after
    $v$ and the $u_1, \ldots u_{j-1}$. So this probability can also be
    calculated by computing the intersections of segments. Since
    $\Exp{\text{partition size}} = n + \sum_{u} \sum_{v} C_{u,v}$ we
    can compute the expected partition size and thus determine
    $\hat{P}$ in polynomial time simply by summing up the $\bigO{n^2}$
    $C_{u,v}$ terms, which we showed above can be computed in
    polynomial time. So $\hat{P}$ is a computable pessimistic
    estimator. 
  \end{problem}

  \begin{problem}
    \begin{subproblem}
      Consider some node $a$ at level $i$ of the conditional
      probability tree. At this point in the algorithm, $i$ elements
      of the set have had their value chosen; $n-i$ elements remain,
      and there are two choices for each of them, so $2^{n-i}$
      possible outcomes.
      
      Write $\hat{P}(a)$ as the sum $\sum_k P(\mathscr{E}_k | a)$.
      Recall that $P(\mathscr{E}_k | a)$ is the fraction of the
      possible outcomes that lead to the inner product of the $i$th
      row of $\mathbf{A}$ with $\vec{b}$ being greater than $4 \sqrt{n
        \ln n}$.  Suppose the current value of the inner product (with
      only the current element value assignments as specified by node
      $a$) is $\wp$.  Then there exists some integer $\alpha$
      (dependent on $\wp$) such that if less than $\alpha$ of the
      remaining elements receive value $+1$ then the value will be
      less than $4\sqrt{n \ln n}$, and similarly some integer $\beta$
      such that if more than $\beta$ of the remaining elements receive
      value $+1$ then the value will be greater than $-4\sqrt{n \ln
        n}$. So if between $\alpha$ and $\beta$ of the remaining
      elements receive value $+1$, the value exceeds these bounds (a
      ``bad outcome'' for this row).

      If $\ell$ is the number of remaining elements that receive value
      $+1$, and $j$ is the number of elements in set $k$ that
      have not received a value, then there are $\binom{j}{\ell}$ choices
      for which elements receive value 1, and $2^{n-i-\ell}$ choices
      for the values of the unassigned elements that do not appear in
      set $k$. So, summing over all possible values of $\ell$,
      \[ P(\mathscr{E}_k | a) = \sum_{\ell = \alpha}^\beta
      \frac{\binom{j}{\ell} 2^{j-\ell}}{2^{n-i}} \] and
      \[ P(\mathscr{E} | a) = \sum_k P(\mathscr{E}_k | a) = \sum_k
      \sum_{\ell = \alpha}^\beta \frac{\binom{j}{\ell}
        2^{j-\ell}}{2^{n-i}} \]
      which is a sum of binomial coefficients multiplied by powers of
        two, divided by $2^{n-i}$.
    \end{subproblem}

    \begin{subproblem}
      $\hat{P}(a)$ is a sum of binomial coefficients
      multiplied by powers of two, divided by $2^{n-i}$. We first
      consider the number of terms in the summation. There are
      $\bigO{n}$ terms in the first summation, since this is the
      number of groups, and $\bigO{n}$ terms in the second summation,
      since $\alpha$ and $\beta$ are bounded by $0$ and $n$. So there
      are $\bigO{n^2}$ terms to compute.

      Computing an individual term requires constant time, since the
      multiplication by powers of two can be achieved using bit
      shifting, and the binomial coefficients can be computed using a
      lookup table. The lookup table can be computed using a dynamic
      program based on the Pascal's triangle recurrence. The lookup
      table contains $\bigO{n^2}$ entries, since all the arguments to
      the binomial coefficient are less than $n$, and the entries can
      be computed by summing. So the total amount of time required is
      $\bigO{n^2}$.
    \end{subproblem}

    \begin{subproblem}
      Let $a$ be a node with children $b$ and $c$. $\hat{P}$ is
      defined as the sum $\sum_k P(\mathscr{E}_k | a)$. But each
      individual probability $P(\mathscr{E}_k | a)$ can be expressed
      as
      \[ P(\mathscr{E}_k | a) = \frac{1}{2}P(\mathscr{E}_k | b) +
      \frac{1}{2}P(\mathscr{E}_k | c) \]
      since $b$ and $c$ are equally likely given $a$. So
      \begin{align*}
        \hat{P}(a) &= \sum_k P(\mathscr{E}_k | a) \\
        &= \sum_k \left(\frac{1}{2}P(\mathscr{E}_k | b) +
      \frac{1}{2}P(\mathscr{E}_k | c)\right)\\
       &= \frac{1}{2} \sum_k P(\mathscr{E}_k | b) + \frac{1}{2} \sum_k
        P(\mathscr{E}_k | c) \\
        &= \frac{1}{2} \hat{P}(b) +
        \frac{1}{2} \hat{P}(c)
      \end{align*}
      We can now make the standard argument that at least one of
      $\hat{P}(b)$ and $\hat{P}(c)$ must be at most $\hat{P}(a)$,
      since otherwise $\hat{P}(a)$ could not be their average.
    \end{subproblem}

    \begin{subproblem}
      From above, computing the binomial coefficients requires time
      $\bigO{n^2}$, and computing $\hat{P}(a)$ requires $\bigO{n^2}$
      time. The deterministic algorithm must evaluate the $\hat{P}$
      values at each of $n$ levels of the tree, so the total cost is
      $\bigO{n^3}$ on the unit-cost RAM. On a log-cost RAM, note that
      we deal with binomial coefficients in $n$, which have size
      $\bigO{2^n}$, so the cost of arithmetic operations is
      $\bigO{n}$. This adds another $\bigO{n}$ factor to the runtime
      cost, giving $\bigO{n^4}$ on the log-cost RAM.
    \end{subproblem}
  \end{problem}

  \begin{problem}
    \begin{subproblem}
      This integer linear program captures the set-cover
      problem. $z_i$ is contrained to be a Boolean; it represents
      whether the element $i \in Z$. The objective function is to
      minimize the sum of $z_i$, i.e. the total number of elements
      chosen. The constraint expresses that for each set $j$, the sum
      of the $z_i$ in that set should be at least 1, i.e. that at
      least one element from that set is chosen in $Z$. This is
      precisely the set-cover problem.
    \end{subproblem}

    \begin{subproblem}
      We give a polynomial-time algorithm based on randomized rounding
      that in expectation chooses a set at least as small as the
      solution to the ILP, and covers a constant fraction of the sets.
      We begin by solving the LP relaxation of the ILP, where the
      $z_i$ are now only constrained to take (possibly non-integer)
      values between 0 and 1. Let $w$ be the value of the solution to
      the ILP, and $w'$ that of the solution to the LP relaxation.
      Clearly $w' \le w$, since it is a relaxation of the original
      problem. We now round randomly, choosing an element $i$ to be in
      the set with probability $z_i$. The expected number of elements
      in the set is the sum of these probabilities, which is $w' \le
      w$.

      Now we bound the number of sets covered in expectation. Let
      $X_i$ be the number of elements chosen from set $i$. It is the
      sum of indicator random variables indicating whether each
      element in that set is chosen; by the constraint of the LP,
      $\Exp{X_i} \ge 1$. Now consider the probability that $X_i$ is
      small:
      \[ \Pr{X_i < \frac12} \le
      \Pr{X_i < \frac12 \mu_{X_i}} < e^{-\frac{\mu_{X_i}
          \left(\frac12\right)^2}{2}} \le e^{-\frac{1}{8}}
      \]
      which is a constant less than 1. Since $X_i$ is a sum of
      indicator random variables, it takes only integral values, and
      so this is a bound on the probability that it is less than 1. So
      we have shown that each set is covered with constant
      probability, and by linearity of expectation, a constant
      fraction of the sets are covered in expectation.
    \end{subproblem}

    \begin{subproblem}
      We now give a randomized rounding algorithm that finds a set of
      size at most $\bigO{\log m}$ times the optimum $w$ that covers
      all sets $S_j$ with high probability. As before, we solve the LP
      relaxation to obtain possibly non-integer values for the $z_i$
      that give a solution at least as good as $w$. Now, however, we
      round randomly, choosing an element $i$ to be in the set $Z$
      with probability $k \log m \cdot z_i$.
      
      Now the number of elements chosen is expected to be at most $k
      \log m \cdot w$. The number of elements chosen is a sum of
      indicator random variables, so applying the Chernoff bound in
      the standard way converts this result to a high-probability
      bound of $\bigO{k \log m \cdot w}$.

      Again we consider the number of elements $X_i$ chosen from set
      $i$. Now $\Exp{X_i} \ge k \log m$. Then, applying the Chernoff
      bound and the same reasoning as before,

      \[ \Pr{X_i = 0} =
      \Pr{X_i < \frac{1}{2}} \le \Pr{X_i < \frac12 \mu_{X_i}} <
      e^{-\frac{\mu_{X_i}
          \left(\frac12\right)^2}{2}} \le e^{-\frac{1}{8} k \log m} =
      \bigO{m^{-k}}
      \]
      and by the union bound, the probability that none of the $X_i$
      are zero, i.e. that all sets are covered, is $1 -
      \frac{1}{m^{k-1}}$. So all sets are covered with high
      probability.
    \end{subproblem}
  \end{problem}
\end{pset}
\end{document}