import sys, os, cPickle, struct, traceback, logging, random, codecs
from twisted.internet import protocol, reactor, defer, task
from twisted.protocols import basic
from twisted.web import xmlrpc
try:
    import sqllite3                     # For Python 2.5
except ImportError:
    from pysqlite2 import dbapi2 as sqlite3

DBNAME = "indexer.db"
CHECK_INTERVAL = 1
BROWSE_INTERVAL = 1
NUMBER_TO_BROWSE = 5
QUERYNAME = "queries.txt"
RESULTNAME = "results.txt"
MAX_ALREADY_BROWSED=50000

db = sqlite3.connect(DBNAME)
db.isolation_level = None

log = logging.getLogger("Indexer")
log.setLevel(logging.DEBUG)

logging.basicConfig(level=logging.DEBUG, # XXX
                    filename="indexerdebug.log",
                    filemode="a",
                    format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s",
                    datefmt="%m-%d %H:%M:%S")
fileLogHandler = logging.FileHandler("indexer.log")
fileLogHandler.setLevel(logging.INFO)
fileLogHandler.setFormatter(logging.Formatter(
        "%(asctime)s %(name)-12s %(levelname)-8s %(message)s",
        "%m-%d %H:%M:%S"))
logging.getLogger('').addHandler(fileLogHandler)
stderrHandler = logging.StreamHandler()
stderrHandler.setLevel(logging.INFO)
stderrHandler.setFormatter(logging.Formatter(
        "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
        , "%m-%d %H:%M:%S"))
logging.getLogger('').addHandler(stderrHandler)

def logErr(x):
    log.error(str(x))

xchars = []
rchars = []

for c in range(256):
    if ord("A") <= c <= ord("Z") or \
       ord("a") <= c <= ord("z") or \
       c >= 0x80:
        xchars.append(chr(c))
    else:
        xchars.append(" ")
xchars = "".join(xchars)
rchars = "'`"

class translatexchars:
    def __getitem__(self, c):
        if c < 256 and chr(c) in rchars:
            return None
    
        if ord("A") <= c <= ord("Z") or \
                ord("a") <= c <= ord("z") or \
                c >= 0x80:
            return c
        else:
            return u" "
translator = translatexchars()


def getRandomLineFromFile(filename):
    fsize = os.stat(filename).st_size
    f = file(filename)
    f.seek(random.randint(0, fsize-1000))
    for i in range(random.randint(1, 25)):
        f.readline()
    s = f.readline()
    return s.decode("utf-8")
    
def initDB():
    c = db.cursor()
    c.execute("CREATE TABLE queries (query text)")
    c.execute("CREATE TABLE browsed (host text, port int)")
    c.execute("CREATE TABLE query_results (filename text)")

class LimeWire:
    def __init__(self, url):
        self.url = url
        self.lw = xmlrpc.Proxy(url)
        self.getQueriesTask = task.LoopingCall(self.checkForQueries)
        self.getQueriesTask.start(CHECK_INTERVAL)
        self.getResultsTask = task.LoopingCall(self.checkForQueryResults)
        self.getResultsTask.start(CHECK_INTERVAL)
        
    def query(self, q):
        q = q.translate(translator).lower()
        if len(q) >= 30:
            q = random.choice(q.split())
            if len(q) >= 30:
                return
        log.debug("QUERY: " + q)
        return self.lw.callRemote("gnutella.query", q).addErrback(logErr)

        
    def browse(self, h):
        log.debug("BROWSE: " + h["host"] + ":" + str(h["port"]))
        return self.lw.callRemote("gnutella.browse", h).addErrback(logErr)

    def checkForQueries(self):
        log.debug("Checking for queries")
        self.lw.callRemote("gnutella.getAndClearQueries").addCallback(
            self.gotQueries).addErrback(logErr)

    def checkForQueryResults(self):
        log.debug("Checking for query results")
        self.lw.callRemote("gnutella.getAndClearQueryResults").addCallback(
            self.gotQueryResults).addErrback(logErr)

    def gotQueries(self, queries):
        log.debug("Got queries: " + str(len(queries)))
        for x in queries:
            #log.debug("Got query: " + x)
            if x.startswith("\\"):
                pass
            else:
                self.gotQuery(x)
        log.debug("Done processing queries")

    def gotQueryResults(self, queryResults):
        log.debug("Got query results: " + str(len(queryResults)))
        for x in queryResults:
            #log.debug("Got query result: " + x["name"])
            self.gotQueryResult(x)
        log.debug("Done processing results")
            
    def gotQuery(self, q):
        pass

    def gotQueryResult(self, r):
        pass

class LimeWireIndexer(LimeWire):
    def __init__(self, url, db, queryInterval):
        LimeWire.__init__(self, url)
        self.db = db
        self.dbc = db.cursor()
        self.queryInterval = queryInterval
        self.alreadyBrowsed = set()
        self.toBrowse = []
        self.sendQueryTask = task.LoopingCall(self.sendRandomQueries)
        self.sendQueryTask.start(queryInterval)
        self.browseTask = task.LoopingCall(self.browseOne)
        self.browseTask.start(BROWSE_INTERVAL)
        self.queryFile = codecs.open(QUERYNAME, mode="a", encoding="utf-8")
        self.resultFile = codecs.open(RESULTNAME, mode="a", encoding="utf-8")

#     def gotQueries(self, queries):
#         log.info("Got queries:" + str(len(queries)))
#         self.dbc.executemany("INSERT INTO queries (query) VALUES (?)",
#                              ((q,) for q in queries if not q.startswith("\\")))
#         log.info("Done processing queries")

    def gotQuery(self, q):
        self.queryFile.write(q)
        self.queryFile.write("\n")
        
#     def gotQueryResults(self, queryResults):
#         log.info("Got query results: " + str(len(queryResults)))
#         self.dbc.executemany("INSERT INTO query_results (filename) VALUES  (?)",
#                              ((r["name"],) for r in queryResults))
#         for r in queryResults:
#             if (r["host"], r["port"]) not in self.alreadyBrowsed:
#                 self.alreadyBrowsed.add((r["host"], r["port"]))
#                 self.toBrowse.append(r)
#         log.info("Done processing results")

    def gotQueryResult(self, r):
#         self.dbc.execute("INSERT INTO query_results (filename) VALUES  (?)",
#                          (r["name"],))
        self.resultFile.write(r["name"])
        self.resultFile.write("\t")
        self.resultFile.write(r["host"])
        self.resultFile.write("\t")
        self.resultFile.write(str(r["port"]))
        self.resultFile.write("\t")
        self.resultFile.write(str(r["size"]))
        self.resultFile.write("\n")
        if r["browse-host-enabled"]:
            if ((r["host"], r["port"]) not in self.alreadyBrowsed) and (
                (r["host"], r["port"]) not in self.toBrowse):
                self.toBrowse.append(r)

    def sendRandomQueries(self):
        self.sendRandomQueryFromQueries()
        self.sendRandomQueryFromResults()
        
    def sendRandomQueryFromQueries(self):
        log.debug("Sending random query from queries")
#         self.dbc.execute("SELECT query FROM queries " +
#                          "WHERE rowid>abs(random()) % " +
#                          "  (SELECT max(rowid) FROM queries)" +
#                          "ORDER BY rowid LIMIT 1")
#         q = self.dbc.fetchone()
#         if q == None:
#             log.info("No queries in DB")
#             return
        q = getRandomLineFromFile(QUERYNAME)
        self.query(q)

    def sendRandomQueryFromResults(self):
        log.debug("Sending random query from results")
#        self.dbc.execute("SELECT filename FROM query_results " +
#                          "WHERE rowid>abs(random()) % " +
#                          "  (SELECT max(rowid) FROM query_results)" +
#                          "ORDER BY rowid LIMIT 1")
#         q = self.dbc.fetchone()
#         if q == None:
#             log.info("No results in DB")
#             return
        q = getRandomLineFromFile(RESULTNAME)
        
        self.query(q.split("\t")[0])

    def browseOne(self):
        log.debug("toBrowse: " + str(len(self.toBrowse)) + ";  alreadyBrowsed: " +
                 str(len(self.alreadyBrowsed)))
        if len(self.alreadyBrowsed) > MAX_ALREADY_BROWSED:
            log.warning("Clearing alreadyBrowsed")
            self.alreadyBrowsed = []
            return
        
        browsed = 0
        while browsed < NUMBER_TO_BROWSE:
            if len(self.toBrowse) == 0:
                return

            r = self.toBrowse.pop(0)
            self.alreadyBrowsed.add((r["host"], r["port"]))
            self.dbc.execute(
                "SELECT host,port FROM browsed WHERE host=? AND port=?",
                (r["host"], r["port"]))
            if self.dbc.fetchone() == None:
                self.dbc.execute(
                    "INSERT INTO browsed (host, port) VALUES (?, ?)",
                    (r["host"], r["port"]))
                self.browse(r)
                browsed += 1

def goForthAndIndex():
    i = LimeWireIndexer("http://127.0.0.1:44266/RPC2",
                        db, 3)
    reactor.run()

#initDB()
goForthAndIndex()


7
