import sys, os, time, re, sha, urlparse
from twisted.python import util
from twisted.internet import reactor, defer, task

from scrapercore import *

class IsoHuntLatestScraper(Scraper):
    URL = "http://isohunt.com/latest.php?mode=bt"
    REGEX = r"""^<tr class="hlRow" onClick="window.location=.*".*?<td class=row1>(?P<age>.*?)</td><td class=row3>(?P<category>.*?)</td><td class=row3 id=name.*?>\[DL\] <a href="(?P<torrenturl>.*?)" .*?>(?P<torrentname>.*?)</a><br>.*</td><td class=row3>(?P<size>.*?)</td><td class=row3>(?P<files>.*?)</td><td class=row3>(?P<seeders>.*?)</td><td class=row3>(?P<leechers>.*?)</td><td class=row3>(?P<downloads>.*?)</td>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         IsoHuntLatestScraper.URL, IsoHuntLatestScraper.REGEX)
        self.transforms["torrenturl"] = self.transformRelativeURL
        def tagSource(ti):
            ti["source"] = "isohunt-latest"
        self.taggers.append(tagSource)

class IsoHuntIndexedScraper(Scraper):
    URL = "http://isohunt.com/latest.php?mode=bt"
    REGEX = r"""^<tr class="hlRow" onClick="servOC\([0-9]*?,'(?P<torrentindurl>.*?)','.*'\).*".*?<td class=row1>(?P<age>.*?)</td><td class=row3>(?P<category>.*?)</td><td class=row3 id=name[0-9]*>(?:.*<br>)?(?P<torrentname>.*?)</a></td><td class=row3>(?P<size>.*?)</td>.*$"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         IsoHuntIndexedScraper.URL, IsoHuntIndexedScraper.REGEX)
        def tagSource(ti):
            ti["source"] = "isohunt-indexed"
        self.taggers.append(tagSource)
        def transformIndUrl(ti, k, v):
            REGEX = r"""^.*<a href="(?P<torrenturl>.*?)" rel="nofollow" title="You need BitTorrent software for this P2P download link to work" style=.*$"""
            ti[k] = v
            subscraper = Subscraper(self.log, self.throttler, ti, REGEX)
            subscraper.transforms["torrenturl"] = subscraper.transformRelativeURL
            return subscraper.subscrape(urlparse.urljoin(self.url, v))
        self.transforms["torrentindurl"] = transformIndUrl

class BitTorrentScraper(Scraper):
    URL = "http://www.bittorrent.com/"
    REGEX = r"""^<li><span class="rankitem hentry"><a class="entry-title entry-summary".*?href="(?P<torrentindurl>\S*?)".*?>(?P<torrentname>.*?)</a></span></li>$"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         BitTorrentScraper.URL, BitTorrentScraper.REGEX)
        def tagSource(ti):
            ti["source"] = "bittorrent-latest"
        self.taggers.append(tagSource)
        def transformIndUrl(ti, k, v):
            REGEX = r"""<span class="descLabel">Media Type: </span>(?P<category>.*?)<br />\n[\s\S]*<span class="descLabel">Torrent Contains: </span>\n(?P<numfiles>[0-9]*) file.\n(?P<size>.*?)\n[\s\S]*<div id="files1"\nclass="showing">\n(?P<filenames>.*?) </div>\n[\s\S]*<a href="(?P<torrenturl>\S*?)"><img src="/img/global/btn_download.png" width.*?/>"""
            ti[k] = v
            subscraper = Subscraper(self.log, self.throttler, ti, REGEX)
            subscraper.transforms["torrenturl"] = subscraper.transformRelativeURL
            def transformFileNames(ti, k, v):
                ti[k] = v.split("<br />")
            subscraper.transforms["filenames"] = transformFileNames
            return subscraper.subscrape(urlparse.urljoin(self.url, v))
        self.transforms["torrentindurl"] = transformIndUrl

class MininovaScraper(Scraper):
    URL = "http://www.mininova.org/today"
    REGEX = r"""<tr.*?><td>(?P<added>\S*?)</td><td><a href="(?P<torrenturl>.*?)"><img src="/images/down.gif" alt="\[D\]"/></a><a href="(?P<indurl>.*?)">(?P<torrentname>.*?)</a></td><td align="right">(?P<size>.*?)</td>.*?</tr>"""
    
    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         MininovaScraper.URL, MininovaScraper.REGEX)
        
        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "mininova-today"
        self.taggers.append(tagSource)
        
        self.transforms["size"] = self.transformStripNBSP

        def transformIndUrl(ti, k, v):
            REGEX = r"""\(<a href="(?P<magnet>.*?)">Azureus magnet link</a>\)[\s\S]*<tr><td>Category:</td><td.*?>(?P<category>.*?)</td></tr>\n<tr><td>Seeds:</td><td>(?:<div.*?>)?(?P<seeders>.*?)(?:</div>)?</td></tr>\n<tr><td>Leechers:</td><td>(?:<div.*?>)?(?P<leechers>.*?)(?:</div>)?</td></tr>\n.*?\n(?:<tr><td>Downloaded:</td><td>(?P<downloads>.*?)</td></tr>)?"""
            ti[k] = v
            subscraper = Subscraper(self.log, self.throttler, ti, REGEX)
            subscraper.transforms["category"] = subscraper.transformStripHTML
            return subscraper.subscrape(urlparse.urljoin(self.url, v))
        self.transforms["indurl"] = transformIndUrl

class PirateBayLatestScraper(Scraper):
    URL = "http://thepiratebay.org/recent"
    REGEX = r"""<td class="vertTh"><a href=".*?" title="More from this category">(?P<category>.*?)</a></td>\n<td><a href="(?P<indurl>.*?)" class="detLink" title="Details for.*?">(?P<torrentname>.*?)</a></td>\n<td>(?:<b>)?(?P<age>.*?)(?:</b>)?</td>\n<td><a href="(?P<torrenturl>.*?)" title="Download.*?"><img .*?/></a></td>\n<td align="right">(?P<size>.*?)</td>\n<td align="right">(?P<seeders>.*?)</td>\n<td align="right">(?P<leechers>.*?)</td>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         PirateBayLatestScraper.URL, PirateBayLatestScraper.REGEX)
        
        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "piratebay-latest"
        self.taggers.append(tagSource)
        
        self.transforms["size"] = self.transformStripNBSP
        self.transforms["age"] = self.transformStripNBSP

class PirateBayTopScraper(Scraper):
    URL = "http://thepiratebay.org/top/all"
    REGEX = r"""<td class="vertTh"><a href=".*?" title="More from this category">(?P<category>.*?)</a></td>\n<td><a href="(?P<indurl>.*?)" class="detLink" title="Details for.*?">(?P<torrentname>.*?)</a></td>\n<td>(?:<b>)?(?P<age>.*?)(?:</b>)?</td>\n<td><a href="(?P<torrenturl>.*?)" title="Download.*?"><img .*?/></a></td>\n<td align="right">(?P<size>.*?)</td>\n<td align="right">(?P<seeders>.*?)</td>\n<td align="right">(?P<leechers>.*?)</td>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         PirateBayTopScraper.URL, PirateBayTopScraper.REGEX)
        
        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "piratebay-top"
        self.taggers.append(tagSource)
        
        self.transforms["size"] = self.transformStripNBSP
        self.transforms["age"] = self.transformStripNBSP

class BitenovaScraper(Scraper):
    URL = "http://www.bitenova.nl/"
    REGEX = r"""[ \t]*<tr .*?>\n[ \t]*<td><a href=".*?">(?P<category>.*?)</a></td>\n[ \t]*<td> <a href="(?P<indurl>.*?)">(?P<torrentname>.*?)</a></td>\n[ \t]*<td.*?>(?P<size>.*?)</td>\n[ \t]*<td><div.*?>(?P<seeders>.*?)</div></td>\n[ \t]*<td><div.*?>(?P<leechers>.*?)</div></td>\n"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         BitenovaScraper.URL, BitenovaScraper.REGEX)
        
        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "bitenova-latest"
        self.taggers.append(tagSource)
        
        def transformIndUrl(ti, k, v):
            REGEX = r"""[ \t]*<td.*?>Download</td>\n[ \t]*<td.*?><a href="(?P<torrenturl>.*?)">.*?</a></td>"""
            ti[k] = v
            subscraper = Subscraper(self.log, self.throttler, ti, REGEX)
            subscraper.transforms["torrenturl"] = subscraper.transformRelativeURL
            return subscraper.subscrape(urlparse.urljoin(self.url, v))
        self.transforms["indurl"] = transformIndUrl

class TorrentSpyScraper(Scraper):
    URL = "http://torrentspy.com/latest.asp"
    REGEX = r"""<tr.*?><td><a href="(?P<torrenturl>.*?)".*?><img.*?></a> <a.*? title="(?P<torrentname>.*?)".*?><font.*?><b>.*?</b></font></a></td><td.*?>(?P<category>.*?)</td><td.*?>(?P<size>.*?)</td><td.*?>(?P<numfiles>.*?)</td><td.*?>(?P<seeders>.*?)</td><td.*?>(?P<leechers>.*?)</td><td.*?>.*?</td></tr>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         TorrentSpyScraper.URL, TorrentSpyScraper.REGEX)
        
        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "torrentspy-latest"
        self.taggers.append(tagSource)
        
        self.transforms["category"] = self.transformStripHTML

class TorrentzScraper(Scraper):
    URL = "http://www.torrentz.com/torrents"
    REGEX = r"""<div class="torrent"><span class="name"><a href="(?P<indurl>.*?)">(?P<torrentname>.*?)</a> - (?P<category>.*?)</span><span class="added">(?P<added>.*?)</span><span class="size"> ?(?P<size>.*?)</span><span class="ul"> (?P<seeders>.*?)</span><span class="dl"> (?P<leechers>.*?)</span></div>"""
    
    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         TorrentzScraper.URL, TorrentzScraper.REGEX)
        
        def tagSource(ti):
            ti["source"] = "torrentz-latest"
        self.taggers.append(tagSource)

        def transformIndUrl(ti, k, v):
            REGEX = r"""<div class="locations" title="Download Locations"><div class="location"><a href="(?P<torrenturlmaybeind>.*?)".*?><span.*?>.*?</span>[\s\S]*Hash: <a href=".*?">(?P<infohash>.*?)</a>"""
            ti[k] = v
            subscraper = Subscraper(self.log, self.throttler, ti, REGEX)
            subscraper.transforms["torrenturlmaybeind"] = subscraper.transformRelativeURL
            return subscraper.subscrape(urlparse.urljoin(self.url, v))
        self.transforms["indurl"] = transformIndUrl

class BTJunkieLatestScraper(Scraper):
    URL = "http://btjunkie.org/?do=latest"
    REGEX = r"""<tr.*?><th.*>\s*?(?:<a href="(?P<torrenturl>.*?)"><img src="/images/down.gif".*></a>|<a.*?><img src="/images/locked.gif".*?></a>)\s*?<a.*?>\s*?<img.*?></a>.*?\s*?<a.*><b>(?P<torrentname>.*?)</b></a></th>\s*?<th.*?><a.*?><b>(?P<category>.*?)</b></a></th>\s*?<th.*?><font.*?>(?P<size>.*?)</font></th>\s*?<th.*?><font.*?>(?P<added>.*?)</font></th>\s*?<th.*?><font.*?>(?P<seeders>.*?)</font></th>\s*?<th.*?><font.*?>(?P<leechers>.*?)</font></th>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         BTJunkieLatestScraper.URL, BTJunkieLatestScraper.REGEX)

        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "btjunkie-latest"
        self.taggers.append(tagSource)

class BTJunkieTopScraper(Scraper):
    URL = "http://btjunkie.org/"
    REGEX = r"""<tr.*?><th.*>\s*?(?:<a href="(?P<torrenturl>.*?)"><img src="/images/down.gif".*></a>|<a.*?><img src="/images/locked.gif".*?></a>)\s*?<a.*?>\s*?<img.*?></a>.*?\s*?<a.*><b>(?P<torrentname>.*?)</b></a></th>\s*?<th.*?><a.*?><b>(?P<category>.*?)</b></a></th>\s*?<th.*?><font.*?>(?P<size>.*?)</font></th>\s*?<th.*?><font.*?>(?P<added>.*?)</font></th>\s*?<th.*?><font.*?>(?P<seeders>.*?)</font></th>\s*?<th.*?><font.*?>(?P<leechers>.*?)</font></th>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         BTJunkieTopScraper.URL, BTJunkieTopScraper.REGEX)

        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "btjunkie-top"
        self.taggers.append(tagSource)

class TorrentReactorLatestScraper(Scraper):
    URL = "http://www.torrentreactor.net/index0.php"
    REGEX = r"""<TR><td.*?><p.*?>(?P<added>.*?)</td>\s*?<td.*?><p.*?><A HREF="(?P<torrenturl>.*?)"><img.*?></a> <A HREF=".*?" title="(?P<torrentname>.*?)">.*?</A></TD>\s*?<td.*?><p.*?>(?P<size>.*?)</TD>\s*?<td.*?><p.*?>(?P<seeders>.*?)</TD>\s*?<td.*?><p.*?>(?P<leechers>.*?)</TD>\s*?<td.*?><p.*?>(?P<downloads>.*?)</TD>\s*?<td.*?><p.*?><A.*?>(?P<category>.*?)</A></TD>"""
    
    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         TorrentReactorLatestScraper.URL,
                         TorrentReactorLatestScraper.REGEX)

        self.transforms["torrenturl"] = self.transformRelativeURL
        self.transforms["size"] = self.transformStripNBSP
        
        def tagSource(ti):
            ti["source"] = "torrentreactor-latest"
        self.taggers.append(tagSource)

class TorrentReactorTopScraper(Scraper):
    URL = "http://www.torrentreactor.net/"
    REGEX = r"""<TR><td.*?><p.*?>(?P<added>.*?)</td>\s*?<td.*?><p.*?><A HREF="(?P<torrenturl>.*?)"><img.*?></a> <A HREF=".*?" title="(?P<torrentname>.*?)">.*?</A></TD>\s*?<td.*?><p.*?>(?P<size>.*?)</TD>\s*?<td.*?><p.*?>(?P<seeders>.*?)</TD>\s*?<td.*?><p.*?>(?P<leechers>.*?)</TD>\s*?<td.*?><p.*?>(?P<downloads>.*?)</TD>\s*?<td.*?><p.*?><A.*?>(?P<category>.*?)</A></TD>"""
    
    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         TorrentReactorLatestScraper.URL,
                         TorrentReactorLatestScraper.REGEX)

        self.transforms["torrenturl"] = self.transformRelativeURL
        self.transforms["size"] = self.transformStripNBSP
        
        def tagSource(ti):
            ti["source"] = "torrentreactor-top"
        self.taggers.append(tagSource)

class MeganovaLatestScraper(Scraper):
    URL = "http://www.meganova.org/order-date.html"
    REGEX = r"""<tr.*?><td.*?>(?P<added>.*?)</td><td>.*?<a rel="nofollow" href="(?P<torrenturl>.*?)">.*?<a href="/details/.*?">(?P<torrentname>.*?)</a>.*?</td><td.*?>(?P<size>.*?)</td><td.*?><div.*?>(?P<seeders>.*?)</div></td><td.*?><div.*?>(?P<leechers>.*?)</div></td>"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         MeganovaLatestScraper.URL,
                         MeganovaLatestScraper.REGEX)

        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "meganova-latest"
        self.taggers.append(tagSource)

class TorrentPortalScraper(Scraper):
    URL = "http://www.torrentportal.com/new-torrents.php"
    REGEX = r"""<tr><td.*?><a href="(?P<torrenturl>.*?)">.*?</a></td><td.*?>.*?</td><td.*?>.*?<a.*?>(?P<category>.*?)</a></td><td.*?>.*?<a href=".*?" title="(?P<torrentname>.*?)">.*?</a></td><td.*?>.*?</td><td.*?>(?P<size>.*?)</td>(?:<td.*?><b>(?P<seeders>.*?)</b></td><td.*?>(?P<leechers>.*?)</td>|<td.*?>No Stats.*?</td>)"""

    def __init__(self, log, throttler, addTorrent):
        Scraper.__init__(self, log, throttler, addTorrent,
                         TorrentPortalScraper.URL,
                         TorrentPortalScraper.REGEX)

        self.transforms["torrenturl"] = self.transformRelativeURL
        
        def tagSource(ti):
            ti["source"] = "torrentportal-latest"
        self.taggers.append(tagSource)
