import sys, os, time, re, sha, urlparse from twisted.python import util from twisted.internet import reactor, defer, task from scrapercore import * class IsoHuntLatestScraper(Scraper): URL = "http://isohunt.com/latest.php?mode=bt" REGEX = r"""^(?P.*?)(?P.*?)\[DL\] (?P.*?)
.*(?P.*?)(?P.*?)(?P.*?)(?P.*?)(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, IsoHuntLatestScraper.URL, IsoHuntLatestScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "isohunt-latest" self.taggers.append(tagSource) class IsoHuntIndexedScraper(Scraper): URL = "http://isohunt.com/latest.php?mode=bt" REGEX = r"""^(?P.*?)(?P.*?)(?:.*
)?(?P.*?)(?P.*?).*$""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, IsoHuntIndexedScraper.URL, IsoHuntIndexedScraper.REGEX) def tagSource(ti): ti["source"] = "isohunt-indexed" self.taggers.append(tagSource) def transformIndUrl(ti, k, v): REGEX = r"""^.*(?P.*?)$""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, BitTorrentScraper.URL, BitTorrentScraper.REGEX) def tagSource(ti): ti["source"] = "bittorrent-latest" self.taggers.append(tagSource) def transformIndUrl(ti, k, v): REGEX = r"""Media Type: (?P.*?)
\n[\s\S]*Torrent Contains: \n(?P[0-9]*) file.\n(?P.*?)\n[\s\S]*

\n(?P.*?)

\n[\s\S]*

""" ti[k] = v subscraper = Subscraper(self.log, self.throttler, ti, REGEX) subscraper.transforms["torrenturl"] = subscraper.transformRelativeURL def transformFileNames(ti, k, v): ti[k] = v.split("
") subscraper.transforms["filenames"] = transformFileNames return subscraper.subscrape(urlparse.urljoin(self.url, v)) self.transforms["torrentindurl"] = transformIndUrl class MininovaScraper(Scraper): URL = "http://www.mininova.org/today" REGEX = r"""(?P\S*?) $D$ (?P.*?)(?P.*?).*?""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, MininovaScraper.URL, MininovaScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "mininova-today" self.taggers.append(tagSource) self.transforms["size"] = self.transformStripNBSP def transformIndUrl(ti, k, v): REGEX = r"""\(Azureus magnet link\)[\s\S]*Category:(?P.*?)\nSeeds:(?:)?(?P.*?)(?:)?\nLeechers:(?:)?(?P.*?)(?:)?\n.*?\n(?:Downloaded:(?P.*?))?""" ti[k] = v subscraper = Subscraper(self.log, self.throttler, ti, REGEX) subscraper.transforms["category"] = subscraper.transformStripHTML return subscraper.subscrape(urlparse.urljoin(self.url, v)) self.transforms["indurl"] = transformIndUrl class PirateBayLatestScraper(Scraper): URL = "http://thepiratebay.org/recent" REGEX = r"""(?P.*?)\n(?P.*?)\n(?:)?(?P.*?)(?:)?\n\n(?P.*?)\n(?P.*?)\n(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, PirateBayLatestScraper.URL, PirateBayLatestScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "piratebay-latest" self.taggers.append(tagSource) self.transforms["size"] = self.transformStripNBSP self.transforms["age"] = self.transformStripNBSP class PirateBayTopScraper(Scraper): URL = "http://thepiratebay.org/top/all" REGEX = r"""(?P.*?)\n(?P.*?)\n(?:)?(?P.*?)(?:)?\n\n(?P.*?)\n(?P.*?)\n(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, PirateBayTopScraper.URL, PirateBayTopScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "piratebay-top" self.taggers.append(tagSource) self.transforms["size"] = self.transformStripNBSP self.transforms["age"] = self.transformStripNBSP class BitenovaScraper(Scraper): URL = "http://www.bitenova.nl/" REGEX = r"""[ \t]*\n[ \t]*(?P.*?)\n[ \t]* (?P.*?)\n[ \t]*(?P.*?)\n[ \t]*(?P.*?)\n[ \t]*(?P.*?)\n""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, BitenovaScraper.URL, BitenovaScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "bitenova-latest" self.taggers.append(tagSource) def transformIndUrl(ti, k, v): REGEX = r"""[ \t]*Download\n[ \t]*.*?""" ti[k] = v subscraper = Subscraper(self.log, self.throttler, ti, REGEX) subscraper.transforms["torrenturl"] = subscraper.transformRelativeURL return subscraper.subscrape(urlparse.urljoin(self.url, v)) self.transforms["indurl"] = transformIndUrl class TorrentSpyScraper(Scraper): URL = "http://torrentspy.com/latest.asp" REGEX = r""" .*?(?P.*?)(?P.*?)(?P.*?)(?P.*?)(?P.*?).*?""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, TorrentSpyScraper.URL, TorrentSpyScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "torrentspy-latest" self.taggers.append(tagSource) self.transforms["category"] = self.transformStripHTML class TorrentzScraper(Scraper): URL = "http://www.torrentz.com/torrents" REGEX = r"""

(?P.*?) - (?P.*?)(?P.*?) ?(?P.*?) (?P.*?) (?P.*?)

""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, TorrentzScraper.URL, TorrentzScraper.REGEX) def tagSource(ti): ti["source"] = "torrentz-latest" self.taggers.append(tagSource) def transformIndUrl(ti, k, v): REGEX = r"""

.*?[\s\S]*Hash: (?P.*?)""" ti[k] = v subscraper = Subscraper(self.log, self.throttler, ti, REGEX) subscraper.transforms["torrenturlmaybeind"] = subscraper.transformRelativeURL return subscraper.subscrape(urlparse.urljoin(self.url, v)) self.transforms["indurl"] = transformIndUrl class BTJunkieLatestScraper(Scraper): URL = "http://btjunkie.org/?do=latest" REGEX = r"""\s*?(?:

|

)\s*?\s*?.*?\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, BTJunkieLatestScraper.URL, BTJunkieLatestScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "btjunkie-latest" self.taggers.append(tagSource) class BTJunkieTopScraper(Scraper): URL = "http://btjunkie.org/" REGEX = r"""\s*?(?:

|

)\s*?\s*?.*?\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, BTJunkieTopScraper.URL, BTJunkieTopScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "btjunkie-top" self.taggers.append(tagSource) class TorrentReactorLatestScraper(Scraper): URL = "http://www.torrentreactor.net/index0.php" REGEX = r"""(?P.*?)\s*? .*?\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, TorrentReactorLatestScraper.URL, TorrentReactorLatestScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL self.transforms["size"] = self.transformStripNBSP def tagSource(ti): ti["source"] = "torrentreactor-latest" self.taggers.append(tagSource) class TorrentReactorTopScraper(Scraper): URL = "http://www.torrentreactor.net/" REGEX = r"""(?P.*?)\s*? .*?\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)\s*?(?P.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, TorrentReactorLatestScraper.URL, TorrentReactorLatestScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL self.transforms["size"] = self.transformStripNBSP def tagSource(ti): ti["source"] = "torrentreactor-top" self.taggers.append(tagSource) class MeganovaLatestScraper(Scraper): URL = "http://www.meganova.org/order-date.html" REGEX = r"""(?P.*?).*?.*?(?P.*?).*?(?P.*?)(?P.*?)

(?P.*?)

""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, MeganovaLatestScraper.URL, MeganovaLatestScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "meganova-latest" self.taggers.append(tagSource) class TorrentPortalScraper(Scraper): URL = "http://www.torrentportal.com/new-torrents.php" REGEX = r""".*?.*?.*?(?P.*?).*?.*?.*?(?P.*?)(?:(?P.*?)(?P.*?)|No Stats.*?)""" def __init__(self, log, throttler, addTorrent): Scraper.__init__(self, log, throttler, addTorrent, TorrentPortalScraper.URL, TorrentPortalScraper.REGEX) self.transforms["torrenturl"] = self.transformRelativeURL def tagSource(ti): ti["source"] = "torrentportal-latest" self.taggers.append(tagSource)