%Introduction
%  Motivation
%
%  Problem statement
%    Reduce inter-ISP traffic

%  Possible Solution
%    Centralized Proxy server
%      expensive, needs maintenance
%    Distributed File Server
%      provides strict consistency, not needed for read-only
%      no security
%      might make people store data they don't need
%    BitTorrent
%      only designed for large files
%

%
% Cool Beamer Command:st
%
%  \begin{block}{Block title}
%  This is a block in blue
%  \end{block}
%  \begin{tabular}{lcc}
%  Class & A & B \\\hline
%  X     & 1 & 2 \pause\\
%  Y     & 3 & 4 \pause\\
%  Z     &5&6
% \end{tabular}
%

\documentclass{beamer}
\title[InHome: Peer-to-Peer Local Area Caching]{How to save 150,000 dollars a year
  \\\small{InHome: Peer-to-Peer Local Area Caching} }
\author{Mihir Kedia, Raluca Ada Popa, Irene Zhang}
\date{\today}
\usepackage{beamerthemeshadow}
\newcommand{\toc}{
  \frame{\tableofcontents[currentsection,hideothersubsections]}
}

\begin{document}
\frame{\titlepage}

\section{Introduction}

\subsection{Motivation}
\begin{frame}
  % People are gradually accessing more Internet content with the advent of applications such as YouTube for video
  % downloading, BitTorrent for large files, or iTunes for music.  
  % The internet is not growing fast enough to keep up the growth of
  % such bandwidth hungry applications. Because of this wide area
  % bandwidth is becoming an increasingly scarce resource leading to large sums being paid by organizations to ISP-s or to congestion.
  % On the other hand, local area bandwidth is cheaper because
  % usually you do not need to pay to anybody to use it because you are within an organization and running more cables
  % across the campus is much cheaper than running more wires across
  % the country 

  %So how can we trade cheaper local area bandwidth for more expensive
  %wide area bandwidth?
  %We observed that much of the traffic traveling over the internet is
  %redundant. When you go to cnn.com, someone else at mit has probably
  %also recently gone to cnn.com 
% Actually this is a feasible observations because   

% 
  \frametitle{Motivation}
  \begin{itemize}
  \item Wide-area bandwidth is becoming increasingly scarce
  \begin{itemize}
  \item Bandwidth-hungry applications like Youtube are outpacing infrastructure upgrades
  \end{itemize}
  \item Local-area bandwidth is cheap and often unused
  \item Much of the data traversing outgoing links is redundant
  \begin{itemize}
  \item 25-40\% of web requests made within a given organization are
    duplicates of previous requests.
  \end{itemize}
  \end{itemize}
%  \vspace{0.2in}
%  \large{For example,}
%  \begin{itemize}
%  \item Large organizations/campus can save money by conserving
%    external bandwidth
%  \item Conference attendees can improve connection speeds by reducing
%    outgoing link contention
%  \end{itemize}
\end{frame}

\subsection{Objective}
%Formally our objective is to :
\begin{frame}
  \frametitle{Objective}
  \large{\textbf{Reduce external bandwidth usage by
    sharing data among peers inside an organization.}}\\
\vspace{0.3in}
% An ideal solution would have the following properties
% latency - may tradeoff, not too bad cause they won't use it 
% incentive to store data from others - not sure about the content
% compromising, virus, insecure,  ?
% hardware: organizations proxy- centralized point of failure, maintenance, easy deploy high adoption, expensive, --
% conference case - like the proxy solution that they use - adoption
% rate much higher because of effort and expenses
% small organizations maybe conference case 

  System requirements:
    \begin{itemize}
    \item Clients should not see a significant increase in latency
    \item Clients should not store data they are not interested in
    \item System should be customizable for organization sizes
    \item System should not require new hardware or maintenance
    \end{itemize}

\end{frame}

\section{System Description}
% we're presenting a different solution from the centralized server
\subsection{System Overview}
\begin{frame}[fragile]
  \frametitle{System Overview}
  \begin{itemize}
  \item InHome is implemented as a peer-to-peer network that operates like a distributed cache
  \item Clients run a background daemon that automatically syncs metadata from InHome-aware applications
    \begin{itemize}
    \item Application-specific plugins query the InHome network for
      data before falling back to the origin server
    \end{itemize}
 
% We would like the lookups for the system to be mention different consistent hashing algorithms for different system 
%sizes that we will discuss later

% We provide a simple interface put the binding name to data
% name is a URL-s, video names self-certifying hash of the content in the case bit torrent
% 
  \item Interface:
\begin{verbatim}
      put(name, data)
      data = get(name)
\end{verbatim}
 \item Consistent hashing is used for fast object lookup
  \end{itemize}
\end{frame}

\subsection{Example Usage}
% To make our system clear, here is an usage example of a web caching application built on top of 
\begin{frame}
  \frametitle{Example Usage: Web Caching}
  For each HTTP request:
  \begin{enumerate}
  \item Mozilla plugin queries the InHome client for the URL  
  \item InHome client hashes the URL into a 160-bit object ID
  \item InHome client searches the InHome peers for the object ID
  \item If the search succeeds, Mozilla plugin will return cached data
    after checking the TTL 
  \item If the search fails or times out, the Mozilla plugin tells
    Mozilla to fetch the page from the origin server in the normal way
  \item Mozilla plugin registers the new data with the InHome client
    by inserting the data with the URL
  \end{enumerate}
\end{frame}

\subsection{Search Algorithms}
%We use consistent hashing to search for data
% ideal solution because it doesn't require a centralized tracker
% but still fast which is important because you want to quickly
% determine if a piece of data has been cached
%
% 
% reiterate what consistent hashing is
% introduce consistent hashing as the straight forward purely
% distributed way to figure out who has what data
% but does not scale well, nodes keep metadata for data other than
% what they are caching, when responsible nodes go down people can't
% find the cached data

\begin{frame}[fragile]
  \frametitle{Search Algorithms}
  \begin{center}
    \begin{tabular}[htp]{ll}
    \begin{minipage}[htp]{0.5\linewidth}
      \textbf{Basic Consistent Hashing}
      \begin{itemize}
      \item Full membership
      \item One-hop lookup
      \item Metadata maintenance  
      \item No fate sharing
      \end{itemize}      
    \end{minipage}
    &
    \begin{minipage}[htp]{0.5\linewidth}
      \textbf{Data-oriented Chord}
      \begin{itemize}
      \item Partial Membership
      \item $\log(n)$ hop lookup
      \item No metadata
      \item Fate sharing
      \end{itemize}      
    \end{minipage}
    \\
  \end{tabular}
  \end{center}
\end{frame}
 
\subsection{Data-Oriented Chord}
% each piece of data is a virtual node in the system 

%already you can
% see how every virtual node can use the other virtual nodes that it
% shares a physical node with and their successors as fingers

% unfortunately the other virtual nodes on the same physical node do
% not have any more information so without fingers lookups are still
% linear because the property of chord that nodes know more about
% nodes around them is not preserved, so we still need the original
% chord algorithm, still have fingers, but also share fingers with the
% other virtual nodes on your physical node second is that maybe you
% could just keep
\begin{frame}[fragile]
  \frametitle{Data-Oriented Chord}
  \includegraphics[scale=0.34]{figures/successors.pdf}
\end{frame}
\begin{frame}[fragile]
  \frametitle{Data-Oriented Chord}
  \includegraphics[scale=0.34]{figures/fingers.pdf}
\end{frame}


% Irene fill in :)
%  want figure!

% \subsection{Security}
% %point out per application filtering, give plug-in example of filtering out pages with password fields
% \begin{frame}
%   \frametitle{Security}
%   \begin{itemize}
%   \item In general, security is application's responsibility
%   \item For larger files, use self-certifying ids.
%   \end{itemize}
% \end{frame}

\section{Evaluation}

\subsection{Search Algorithm Comparison}
\begin{frame}
  \vspace{-0.27in}
  \frametitle{Performance Comparison}
  \includegraphics[scale=0.37]{figures/hops.pdf}
\end{frame}
%As we can see consistent hashing performs much better because you
%have global knowledge, but when we look at bandwidth usage, chord
%scales much better
\begin{frame}
  \vspace{-0.27in}
  \frametitle{Bandwidth Comparison}
  \includegraphics[scale=0.36]{figures/messages.pdf}
\end{frame}

\subsection{Bandwidth Savings}
%Our system is dependent on how common users network usage actually is
%So we did this study of 
\begin{frame}
  \frametitle{Bandwidth Savings}
  \begin{block}{UC Berkeley Traces -- 11/96}
  \begin{itemize}
  \item Duration: 4 hours
  \item Hit rate: 24.3\%
  \item Bandwidth Savings: 27.6\%
  \end{itemize}
  \end{block}

  \begin{block}{IRCache Traces -- 1/10/07}
  \begin{itemize}
  \item Duration: 1 day
  \item Hit rate: 37.6\%
  \item Bandwidth Savings: 41.5\%
  \end{itemize}
  \end{block}

\end{frame}

\begin{frame}
  \frametitle{Bandwidth Savings, cont.}

  \begin{block}{Zipf Distribution}
  \begin{itemize}
  \item Hit rate: 43.2\%
  \item Bandwidth Savings: 45.7\%
  \end{itemize}
  \end{block}
  
  \vspace{1ex}
  \begin{itemize}
  \item For an institution like MIT, a 35\% reduction in web traffic could save \$210,000/year.
  \end{itemize}

\end{frame}

\section{Conclusion}

\subsection{Related Work}
\begin{frame}
  \frametitle{Related Work}
  \begin{itemize}
  \item First distributed caching solution (most companies use centralized proxy server)
  \item Has been research into local BitTorrent -- selecting local peers first
    \begin{itemize}
    \item Ono project
    \item Stanford analysis
    \end{itemize}
  \end{itemize}
\end{frame}

\subsection{Conclusion}

\begin{frame}

\begin{itemize}
\item InHome can save wide-area bandwidth by fetching data from local
  peers 
\item InHome does not worsen user experience
\item InHome realizes substantial savings in external bandwidth  
\end{itemize}
\vspace{0.5in}
\begin{center}
  \textbf{Questions?}
\end{center}
\end{frame}
\end{document}
