\documentclass[landscape,a0b,final]{a0poster}
%\documentclass[landscape,a0b,draft]{a0poster}
\usepackage{pstricks,pst-grad,graphicx}
\usepackage{amsmath,amsthm}

\newtheorem*{theorem}{Theorem}
\newtheorem*{corollary}{Corollary}
\newtheorem*{lemma}{Lemma}
\newtheorem*{observation}{Observation}
\newtheorem*{proposition}{Proposition}
\newtheorem*{definition}{Definition}
\newtheorem*{claim}{Claim}
\newtheorem*{fact}{Fact}
\newtheorem*{assumption}{Assumption}
\newcommand{\bigO}[1]{\ensuremath{O\left(#1\right)}}
\newcommand{\bigOt}[1]{\ensuremath{\tilde{O}\left(#1\right)}}
\newcommand{\littleO}[1]{\ensuremath{o\left(#1\right)}}
\newcommand{\bigTheta}[1]{\ensuremath{\Theta\left(#1\right)}}
\newcommand{\bigOmega}[1]{\ensuremath{\Omega\left(#1\right)}}
\newcommand{\littleOmega}[1]{\ensuremath{\omega\left(#1\right)}}

\newcommand{\bptree}{B\textsuperscript{+}-tree}
\newcommand{\bptrees}{B\textsuperscript{+}-trees}
\newcommand{\PersiFS}{PersiFS}


%%% Section macros

\newrgbcolor{white}{1 1 1}
\newrgbcolor{blue}{.5 .7 .8}
\newrgbcolor{darkblue}{.7 .8 .9}
\newrgbcolor{green}{.4 .8 .6}
\newrgbcolor{darkgreen}{.6 .9 .7}
\newcmykcolor{csailorange}{0 .60 1 .20}
\newcmykcolor{csailgray}{.20 .20 .20 .60}

\ifdraft
  \newpsstyle{sectionbox}{framearc=0.2,linewidth=1mm}
\else
  \newpsstyle{sectionbox}{framearc=0.2,linewidth=1mm,
    fillstyle=gradient,gradangle=10,gradbegin=white,gradmidpoint=1.0}
\fi
\newcommand{\sectionbox}[3]{%
  \begin{center}
    \psframebox[style=sectionbox,gradend=#1,linecolor=dark#1]{%
      \parbox[t][#2][c]{0.9\columnwidth}{\centering{#3}}
    }
  \end{center}
}

\renewcommand{\section}[1]{%
  \def\tmpa{#1}\def\tmpb{*}
  \ifx\tmpa\tmpb
    \expandafter\section
  \else
    \sectionbox{blue}{3em}{\huge #1}
  \fi}
\renewcommand{\subsection}[1]{\sectionbox{green}{2em}{\Large #1}}
\makeatother

% Disable indentation
\setlength{\parindent}{0in}

% Extra paragraph spacing
\setlength{\parskip}{0.4em}

% Scruntch lists and bibliography (but not completely)
\let \olditemize = \itemize
\renewcommand{\itemize}{\vspace{-.2\parskip}%
  \olditemize{}\itemsep=-.2\parskip}
\let \oldenumerate = \enumerate
\renewcommand{\enumerate}{\vspace{-.2\parskip}%
  \oldenumerate{}\itemsep=-.2\parskip}
\let \oldlist = \list
\renewcommand{\list}[2]{\vspace{-.2\parskip}%
  \oldlist{#1}{#2}\itemsep=-.2\parskip}

\setlength{\columnsep}{1.25in}
\setlength{\columnseprule}{1in}
% For debugging columns (ugh)
%\setcounter{tracingmulticols}{2}

%%% Poster

\begin{document}

%%% Title
\sectionbox{blue}{11em}{%
  \begin{minipage}[c][9cm][c]{0.1\textwidth}
    \begin{center}
      \includegraphics[width=7cm,angle=0]{csail-logo.eps}
    \end{center}
  \end{minipage}
  \begin{minipage}{0.7\linewidth}
    \centering
    {\fontsize{96}{115}\selectfont PersiFS} \\[1em]
    {\huge A Versioned File System with an Efficient Representation}
    \\[.5em]
    {\large Dan R. K. Ports, Austin T. Clements, and Erik D. Demaine}\\
    {MIT Computer Science and Artificial Intelligence Laboratory}\\
    \texttt{\{drkp,aclements,edemaine\}@mit.edu}  
  \end{minipage}
  \begin{minipage}[c][9cm][c]{0.1\textwidth}
    \begin{center}
      %\includegraphics[width=7cm,angle=0]{csail-logo.eps}
    \end{center}
  \end{minipage}
}

\newlength{\colwidth}
\newenvironment{column}{%
  \begin{minipage}[t]{0.333\linewidth}
  \begin{center}
  \setlength{\colwidth}{\linewidth}
  \advance\colwidth by -\columnsep
  \vspace{.001em}
  \begin{minipage}[t]{\colwidth}%
  \setlength{\parskip}{0.4em}
}{%
  \end{minipage}
  \end{center}
  \end{minipage}%
}

\begin{tabular}{c|c|c}
  \begin{column}
    \section{Introduction}
    \vspace{.5em}
  \end{column} &
  \multicolumn{2}{c}{%
  \begin{minipage}[t]{0.666\linewidth}
    \section{PersiFS Structures}
  \end{minipage}
  }
  \\
  \begin{column}
  %%% Column 1

  % XXX ``Ephemeral file systems''
  % XXX Consistent capitalization on bulletted lists
  % XXX Our subsubtitle
  
  %\section{Introduction}
  Most file systems are \emph{ephemeral}, meaning that once a change
  has been made, there is no way to recall the previous contents of
  the file system.  \textbf{PersiFS} is a \emph{continuously
    versioned} file system. It stores every modification to the file
  system, allowing access to \emph{any} past version of any
  file.
%   A convenient interface provides access to the file system as
%   it appeared at any previous point in time, directly through the file
%   system.
  \PersiFS\ achieves this without sacrificing access time to either
  current versions or past versions, using inordinate amounts of disk
  space, or requiring modification to existing applications.

  In order to make continuous versioning feasible, PersiFS tackles the
  problems of time and space efficiency with a number of efficient
  data structures for indexing and retaining files.

  \section{Advantages of PersiFS}

  \begin{itemize}
  \item \textbf{Continuous versioning} --- PersiFS retains
    \emph{every} modification to the file system, allowing users to
    examine any file as it was at any point in time.
  \item \textbf{Efficient access to past revisions} --- Operations on
    the current revision are only slightly slower than operations in
    normal \bptree-based file systems.  PersiFS, however, also
    provides the \emph{same time guarantees} for accessing \emph{past}
    revisions.  Reading from any revision or modifying the current
    revision requires disk accesses only logarithmic in the size of
    that revision, and committing the current revision does not
    require any disk accesses.
  \item \textbf{Efficient storage of past revisions} --- PersiFS
    combines content addressable storage with content-sensitive
    chunking to efficiently coalesce portions of files on disk.  This is
    particularly effective for sharing data between incremental
    revisions of the same file, though the same mechanism can share
    data between any revisions of any files.
  \item \textbf{Direct access to past revisions} --- PersiFS directly
    exposes access to past file revisions through the file system
    interface, allowing convenient access with standard file system
    tools. The current version of the file system tree is available
    read-write at \texttt{\small /persifs/now}, and read-only views of
    previous versions are automatically mounted simply by specifying a
    timestamp instead of ``\texttt{\small now}''.  No special tools or
    per-file version numbers are required, though convenience tools
    exist, e.g. for listing all revisions of a particular file.
  \item \textbf{Homogeneous representation} --- Both the current
    revision and past revisions are stored in the \emph{same data
      structures}.  Unlike logging-based systems and some
    snapshot-based systems, there is no need to maintain a separate
    copy of the current version of the file system.  The same
    mechanisms provide efficient access to both the current revision
    and past revisions.
  \end{itemize}
  
  \section{Related Work}
  \subsection{Snapshotting}

  % Comparisons
  % * Snapshotting systems (WAFL, AFS, Plan 9)
  % ** No periodic down/grind-time/performance degradation
  % ** No Planck length on mistakes that can be undone
  % * Log-based systems
  % ** No need for space-consuming log snapshots
  % ** No need for slow log replays
  % * All other systems?
  % ** Efficient contents fusion
  % *** Space saving, but higher fragmentation
  % *** Similar to Venti, but LBFS chunking adds a whole new dimension

  Previous states of the file system are commonly stored by a backup
  system that periodically archives the state of the filesystem.
  \emph{Snapshot}-based version-controlled file systems are the
  natural extension of this idea: they periodically take a snapshot of
  the state of the entire filesystem, and make it readily available.

  File systems that use snapshot-based versioning include \textbf{Plan
    9}, which stores daily snapshots of the file system and stores
  them on a content-addressable Venti archival
  server~\cite{pike95plan}, and \textbf{WAFL}, which uses
  copy-on-write disk blocks to save space~\cite{hitz94file}.
  \textbf{AFS} also uses copy-on-write files to provide access to the
  most recent snapshot through the {\small \texttt{OldFiles}} directory

  While snapshotting systems are typically space efficient, they are
  incapable of tracking changes that occur between snapshots.
  Furthermore, they have inflexible policy because snapshots are taken
  of the entire file system at once. PersiFS uses continuous
  versioning, which does not suffer from these problems.
  
  \subsection{Logging}

  Continuously versioned file systems are usually implemented using an
  undo or redo log. This allows any version to be recovered, but
  requires replaying the log, a potentially expensive operation. The
  disk access cost is asymptotically linear in the size of the log, so
  typically some form of periodic log checkpointing is necessary to
  circumvent the need to replay the log from the beginning.

  Because replaying a log is inefficient, the log is generally
  combined with a separate copy of the current revision in a structure
  such as a \bptree. This optimization is unnecessary in PersiFS,
  since its representation is approximately as fast as unversioned
  \bptree-based file systems when operating on the current
  revision. Moreover, it can retrieve previous revisions with the same
  efficiency.
  
  File systems that use logging to implement continuous versioning
  include \textbf{VersionFS}~\cite{muniswamyreddy03-versionfs} and
  \textbf{Wayback}~\cite{cornell04:wayback}.  \textbf{CVFS} combines
  logging with a standard B-tree containing multiple
  versions~\cite{soules03metadata}, an approach that does not match
  the asymptotic guarantees of PersiFS.
  
%   \subsection{Interface}

%   Plan 9

  \end{column} &
  %%% Column 2
  \begin{column}
  %\section{PersiFS Structures}

  \hfil
  \begin{minipage}[c]{0.4\columnwidth}
    PersiFS stores its data directly on disk using two main data
    structures:

    \begin{itemize}
    \item The \textbf{persistent metadata tree} contains the
      history of the file system. It can efficiently obtain either past
      or present metadata for a specific file, regardless of the number
      of revisions in the system.
    \item The \textbf{superblob} stores file content. It reduces
      storage costs by exploiting similarity between file revision.
    \end{itemize} 
  \end{minipage}
  \hfil
  \begin{minipage}[c]{0.4\columnwidth}
    \begin{center}
      \includegraphics[scale=1.75]{architecture}
    \end{center}    
  \end{minipage}
  \hfil
  \vspace{1em}
  \subsection{Persistent Metadata Tree}

  A versioned file system must be able to recover the previous state
  of any file, without sacrificing the speed of accessing or modifying
  the current revision. Most other continuously-versioned file systems
  require an expensive log replay to access previous revisions, and
  keep a separate copy of the current revision in a more efficient
  structure.  Instead, the \textbf{persistent metadata tree} at the
  core of PersiFS makes accessing past revisions as fast as accessing
  the current one.

  To achieve this, PersiFS uses \textbf{\emph{partially persistent}
    data structures}, which are data structures that can answer
  queries about any of their previous states. Both past and present
  metadata are stored in a \textbf{partially persistent \bptree}. This
  data structure provides the following asymptotic property:

  \begin{theorem}
    Queries or modifications on a partially persistent \bptree\ can be
    performed using $\bigO{\log_{B+1} N}$ disk accesses in the worst
    case, where $N$ is the number of files in the affected revision,
    and $B$ the block size.
  \end{theorem}

  This logarithmic cost is considerably faster than the linear cost
  required to scan a log. It is also a considerable improvement over a
  standard tree: the persistent \bptree{}'s access cost is logarithmic
  in the number of files \emph{in the affected revision}, while a
  standard tree would incur costs logarithmic in the size of \emph{the
    complete history of the file system}.

  \vspace{2em}
  
  \hfil
  \begin{minipage}[b]{0.45\columnwidth}
    To implement our partially-persistent \bptree, we use a variant on
    a well-known technique~\cite{driscoll89peristent}. We add a
    \emph{modification box} to each internal node of the \bptree. This
    consists of a second copy of the contents of the node and a
    timestamp, allowing a modification to be recorded along with the
    time it took effect.

    When a node in the tree needs to be modified, the changed version
    is stored in the modification box if it is free. If it is full, a new
    copy of the node is made, and the parent node is modified to point
    to the new node. Modifying the parent may require a cascading
    chain of copies, but an argument based on amortized analysis gives
    the desired logarithmic bound. Thus, the cost due to persistence
    is only a small constant factor greater than a single-version tree.
    
    Performing a search simply requires checking the time stamp at
    each node traversed in order to decide whether to use or ignore
    the change stored in the modification box, which adds little
    additional cost.

    % This can't seriously be the best way to top-align things?
    \vspace*{0.8in}
  \end{minipage}
  \hfil
  \begin{minipage}[b]{0.47\columnwidth} % .47 to fix some dangling words
    \begin{center}
    \textbf{(a)}~\includegraphics[scale=1.4]{pbptree1}\vspace{1em}
    \textbf{(b)}~\includegraphics[scale=1.4]{pbptree2}\vspace{1em}
    \textbf{(c)}~\includegraphics[scale=1.4]{pbptree3}\vspace{1em}
    \textbf{(d)}~\includegraphics[scale=1.4]{pbptree4}\vspace{1em}
    \end{center}
    \small
    Four modifications to a persistent B-tree.  Data not used in the
    latest revision is shown in gray; changes made by the latest
    modification are shown in green.
    
    \textbf{(a)} The initial tree at revision 0.\\
    \textbf{(b)} Inserting b changes a modification box and creates revision
    1.\\
    \textbf{(c)} Inserting q requires a split and creates revision 2.\\
    \textbf{(d)} Inserting d requires creating a new node for revision 3
    because the modification box is already full.
  \end{minipage}
  \hfil
  
  \end{column} &
  %%% Column 3
  \begin{column}
  \subsection{Superblob}

  File data is not stored directly in the persistent metadata tree,
  but rather in the \textbf{superblob}. The goal of this indirection
  layer is to minimize the amount of storage space required. There is
  a great deal of similarity between files in the file system. Many
  changes affect only a small part of a file, creating a new revision
  largely similar to the previous one; moreover, different files may
  have similar content, such as when a file is copied. The superblob
  attempts to store identical content on disk only once, regardless of
  how many files or revisions it appears in.

  To achieve this goal, files are divided into \textbf{chunks}, which
  are stored in the superblob. Rather than place chunk boundaries at
  regular intervals, we use \textbf{content-sensitive chunking}.  This
  technique places chunk boundaries based on file contents such that
  local modifications to file contents, including insertions and
  deletions, only affect chunks in that region of the file.  We use
  the same Rabin fingerprint-based algorithm as
  LBFS~\cite{muthitacharoen01lowbandwidth}.

  Chunks are identified by SHA-1 hashes of their contents. Thus, the
  superblob is a \textbf{content-addressable} storage system similar
  to Venti~\cite{quinlan02venti}.  The file metadata contains a list
  of the chunks that comprise the file.  The superblob is indexed by a
  \bptree, which makes it possible to determine whether a particular
  chunk has already been stored --- if so, it can be reused. As an
  optimization, the list of chunks in the file metadata can be
  represented as disk addresses, so the superblob index \bptree\ needs
  only be consulted on writes.  \vspace{1.5em}
  
  \begin{center}
    \begin{minipage}[c]{0.85\columnwidth}
      \begin{center}
        \includegraphics[scale=2]{superblob}
      \end{center}
      
      \small{Two file versions sharing identical chunks in the
      superblob. The new version adds a third chunk between the
      existing two. The superblob index \bptree\ maps the chunk
      hashes for the existing chunks to their data locations on
      disk.}
    \end{minipage}
  \end{center}
  
  \vspace{1em}
  
  \section{Future Work}

  \begin{itemize}
  \item \textbf{Retention policy} --- Versioned file systems
    necessarily require large volumes of storage, but by allowing
    users and administrators to set policies on what is retained, this
    space requirement can be significantly reduced.
    \begin{itemize}
    \item \textbf{Expiration policy} --- Support for flexible policies
      that allow the administrator to specify when old revisions are
      removed, in order to reduce storage usage. In addition to expiring
      old revisions, intermediate revisions could be removed, causing
      the system to gracefully shift from continuous versioning to
      snapshotting for times in the distant past.
    \item \textbf{Exclusion policy} --- Support for policy regarding
      what types of files to exclude from continuous versioning.  For
      some files, such as temporary files and intermediate files,
      versioning is of questionable utility.  Users should be able to
      express this in order to save space.
    \end{itemize}
  \item \textbf{Locality optimizations} --- Rearranging file chunks on
    disk for speed. Storing data only once in the superblob is a
    trade-off between clustering of a file's chunks and storage space
    savings, but relocating or replicating some chunks can increase
    locality for common cases.
  \end{itemize}

  \vspace{1em}
  \centering{\Large PersiFS: Bringing the Past to the Present for a Better
    Future}
  \vspace{1em}

  \bibliographystyle{amsalpha} % XXX use something more appropriate
  \footnotesize
  \bibliography{persifs}
  \end{column}
\end{tabular}

\end{document}
