%\documentclass[notes=onlyslideswithnotes]{beamer}
\documentclass[notes=hide]{beamer}
\usepackage{beamerthemesplit}
\usepackage{clrscode}
\title[PersiFS$_2$]{PersiFS$_2$: Structures for Efficient
  File~System-Scale Partial~Persistence}
\author{Austin Clements \and Dan Ports}
\date{Thursday, May 12, 2005}

\begin{document}

\frame{\titlepage}

\section[Outline]{}
\frame{\tableofcontents}

\note[item]{Dan}

\section{Introduction}
\subsection{Overview}
\begin{frame}
  \frametitle{What is PersiFS?}

  \begin{itemize}
  \item A persistent file system \only<2->{(in the systems sense)}
    \begin{itemize}
    \item<2-> We'll call this durable
    \end{itemize}
  \item A persistent file system \only<3->{(in the data structures sense)}
  \end{itemize}
\end{frame}

\subsection{Background}
\begin{frame}
  \frametitle{Persistence (in the data structures sense)}

  \begin{definition}
    \emph{Partially persistent} data structures allow queries on any
    previous version, but only allow modifications to the current
    version.  Each modification produces a new version.
    \vspace{1em}

    \emph{Fully persistent} data structures allow modifications to
    previous versions.  The history of the structure forms a tree.
  \end{definition}
\end{frame}

\begin{frame}
  \frametitle{Persistent File Systems}

  \begin{definition}
    A \emph{persistent file system} allows access to past versions of
    the file system.
  \end{definition}
  \pause
  \begin{examples}
    \begin{itemize}
    \item Version control systems like CVS, Subversion, etc.
      \note[item]<2>{CVS is fully persistent because of branching}
    \item Snapshot and backup systems like AFS's \texttt{OldFiles}
      \note[item]<2>{Partially persistent}
    \end{itemize}
  \end{examples}
\end{frame}

\begin{frame}
  \frametitle{PersiFS}

  PersiFS goes a few steps further
  \pause
  \begin{itemize}
  \item Continuously versioned so every modification is saved
  \item Real file system interface
  \end{itemize}
  \pause
  \note<3>{The question is...}
  How do we do this efficiently, both time and space-wise?
\end{frame}

\begin{frame}
  \frametitle{A File System Data Structure}
  \begin{itemize}
  \item Needs to support:
    \begin{itemize}
    \item $\proc{Read}(\id{file}, \id{timestamp}, \id{offset}) \to \id{substring}$
    \item $\proc{Modify}(\id{file}, \id{offset}, \id{new-substring})$
    \end{itemize}
    
  \item Very large data sets --- must be space-efficient
  \item Need fast access to both current and past revisions
  \end{itemize}
\end{frame}

\section{Original PersiFS}
\subsection{Overview}
\begin{frame}
  \note[item]{Austin}
  \frametitle{What was PersiFS$_1$?}

  An implementation of PersiFS using silly, simple data structures
  from the systems world.

  \note[item]{First, we'll sketch out how PersiFS$_1$ worked because
    the overall design of the system remains unchanged}
\end{frame}

\subsection{Structures}
\begin{frame}
  \frametitle{File System Structures}

  \begin{itemize}
  \item<+-> Chunking
    \begin{itemize}
    \item Divides data into content-sensitive chunks for efficient
      storage of modifications
    \end{itemize}
  \item<+-> Superblob
    \begin{itemize}
    \item Stores chunks in a big append-only vector
    \end{itemize}
  \item<+-> Metadata log
    \begin{itemize}
    \item Stores sequence of file metadata changes over time
      (including pointers to file contents)
    \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Content-sensitive Chunking}

  \note<1>[item]{We'll briefly digress on a cool algorithm.  This
    algorithm is not a data structure.  Furthermore, this algorithm
    comes from the 9th floor.  But it's cool.}
  \note<1>[item]{This will produce chunks of about 8K on average.}

  \begin{itemize}
  \item Use a sliding Rabin fingerprint, $f(A)$
  \item When $f(A) \equiv 42 \pmod{2^{13}}$, draw a chunk boundary

    \vspace{1em}
    \includegraphics{chunking}
  \item Modifications (even insertions) have only local effects on
    chunk contents
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Metadata Log}

  \note<1>[item]{Here's an example of what a piece of the metadata log
    may look like.  In this, the contents of file 908 change twice;
    once at 11:56 and once at 12:00, and only the third chunk in the
    file needs to be changed.}
  \note<1>[item]{Of course, to figure out the state of the file system
    at some point takes linear time.  But this is a systems data
    structure, so it doesn't have to be smart.}
  \note<1>[item]{To cut down replay time, the log periodically stores
    a large snapshot with all of the file system information in it.}
  \note<1>[item]{Clearly this is silly.}

  \begin{center}
    \begin{tabular}{ccl}
      Time & File & Modification \\
      11:56 & 908 & Chunks are now 56, 57, 94, 59 \\
      11:57 & 539 & Chunks are now 80, 95 \\
      12:00 & 908 & Chunks are now 56, 57, 96, 59
    \end{tabular}
  \end{center}

  \begin{itemize}
  \item $O(n)$ replay (and thus read) time
  \item Must periodically store large snapshots for reasonable replay
  \item $O(1)$ write time and space
  \end{itemize}
\end{frame}

\section{PersiFS$_2$}
\subsection{Overview}
\begin{frame}
  \note[item]{Dan}

  \frametitle{What is PersiFS$_2$?}

  \begin{itemize}
  \item The superblob can be improved
  \item The metadata log can be replaced
  \end{itemize}
\end{frame}

\newcommand{\bptree}{B\textsuperscript{+}-tree}

\subsection{Structures}
\begin{frame}
  \frametitle{Model}

  \begin{itemize}
  \item External memory model
  \item Need partial persistence
  \item<+-> Start with a \bptree

    \note<.>[item]{We'll start with everbody's favorite external
      memory structure, the B-tree.  As a reminder, a B-tree is simply
      an $T$-way tree that stores $T$ elements in each node, subject
      to some restrictions.  PersiFS uses B+-trees, which are B-trees
      that store values at the leaves, thus achieving higher branching
      factor with the same block size.}

    \begin{center}
      \includegraphics{bptree}
    \end{center}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Chunk Fusion}

  \note<1>[item]{As a test of plain ole' \bptree{}s, we added chunk
    fusion}
  \note<1>[item]{Optimizes typical file access patterns}

  \begin{itemize}
  \item Utilizes regular \bptree\ to store fingerprint-to-address
    mapping
  \item Chunks with identical content can be fused and only stored
    once in the super blob
  \item<+-> $O(\log_{B+1} n)$ memory transfers for write
  \item $O(1)$ for read (unaffected by fusion)
  \item Potentially massive space savings at very little potential
    space cost
  \end{itemize}
\end{frame}

\subsection{Persistent B+-tree}
\begin{frame}
  \frametitle{A Persistent \bptree{}}
  \begin{itemize}
  \item $\proc{Insert}(\id{key}, \id{value})$
  \item $\proc{Search}(\id{key}, \id{timestamp}) \to \langle \id{key},
  \id{value} \rangle$
  \begin{itemize}
  \item Exact key match or predecessor query
  \end{itemize}
  \item $\proc{Delete}(\id{key})$
  \item $\proc{Commit}() \to \id{timestamp}$
    \begin{itemize}
    \item Allows multiple modifications grouped under a single
      timestamp
    \item Grouping conceals \emph{unnecessary} states (for efficiency),
      and \emph{inconsistent} states (for correctness)
    \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Persistifying a \bptree}
  \note[item]{Austin}

  \note[item]{While many nodes in the tree may change during an
    insertion, this only creates a single new version of the tree}
  \includegraphics<1>{pbptree}
  \includegraphics<2>{pbptree2}
  \includegraphics<3->{pbptree3}
  \begin{overprint}
    \onslide<1-3>
    \begin{itemize}
    \item Similar to ``modification box'' approach by Sleator and Tarjan
    \item Nodes may store a second, modified copy with some version
    \item If mod box is full, create a new node and fix parent's link
    \end{itemize}
    \onslide<4->
    \begin{itemize}
    \item $O(\log_{B+1} n)$ memory transfers for read and write
    \item $O(1)$ additional space per modification
    \end{itemize}
    \note<4>[item]{Where $n$ is the number of nodes at that time}
  \end{overprint}
\end{frame}

\begin{frame}
  \frametitle{Replacing the Metadata Log}

  \begin{center}
    \includegraphics{ail}
  \end{center}
\end{frame}


\section{Conclusion}

\begin{frame}
  \frametitle{Results}
  \begin{itemize}
  \item Chunk fusion is a clear win
    \begin{itemize}
    \item Potentially large space savings with minimal cost
    \end{itemize}
  \item Metadata log vs. arborescent metadata map: less clear
    \begin{itemize}
    \item Depends on filesystem usage patterns
    \item e.g. metadata log snapshot frequency vs. usage
    \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Questions?}
\end{frame}
% \begin{frame}
%   \frametitle{Related Work}
% \end{frame}

\end{document}

% LocalWords:  LocalWords metadata
