from __future__ import division

input = open("c:\\UCB-home-IP-848278026-848292426.tr", 'rb')
data = input.read()
input.close()

import struct

start = 0
records = set()
requests = set()
revisit = 0
upper_bound = 0
total = 0

while start < len(data) - 2:
  client_ip = struct.unpack(">L", data[start+24:start+28])[0]
  url_length = struct.unpack(">H", data[start+58:start+60])[0]
  url = data[start+60:start+60+url_length]
  if (client_ip, url) in records:
    revisit += 1
  if (client_ip, url) not in records and url in requests:
    upper_bound += 1
  records.add((client_ip, url))
  requests.add(url)
  start = start + 60 + url_length
  total += 1
