【批量下载worldwidetorrents种子】

我用来管理漫画的脚本之一。
用于下载Nemesis43、Shipjolly和RubrumPopulus的种子，你也可以下载其他用户的，自己填用户id即可。
以后可能会公开更多脚本。
注意：有三个参数要填你自己的，cookie、header还有保存目录。浏览器F12开控制台看。
import os
import re
import urllib.parse

import requests
from lxml import html

user_id = 36  # Nemesis43
user_id = 4445  # Shipjolly
user_id = 20217  # RubrumPopulus

ID_STRING = '''
36
4445
20217
'''


def check_torrent(href):
    """download.php?id=445&amp;name=Back%20to%20the%20Future%20011%20%282016%29%20%28Digital%29%20%28Kileko-Empire%29.cbr.torrent"""
    download_pattern = re.compile(r'^download\.php\?id=\d{1,20}&name=(.+)')
    status = re.search(download_pattern, href)
    return status


def get_filename(href):
    """download.php?id=445&amp;name=Back%20to%20the%20Future%20011%20%282016%29%20%28Digital%29%20%28Kileko-Empire%29.cbr.torrent"""
    download_pattern = re.compile(r'^download\.php\?id=\d{1,20}&name=(.+)')
    download_match = download_pattern.match(href)
    filename = ''
    if download_match:
        filename = download_match.group(1)
    filename = urllib.parse.unquote(filename)
    return filename


def check_page(href):
    """torrents-user.php?id=36&page=239"""
    page_pattern = re.compile(r'^torrents-user\.php\?id=\d{1,20}&page=\d{1,20}')
    # page_pattern = r'^torrents-user'
    status = re.search(page_pattern, href)
    return status


def get_page_num(href):
    """torrents-user.php?id=36&page=239"""
    p_num = re.compile(r'^torrents-user\.php\?id=\d{1,20}&page=(\d{1,20})')
    m_num = p_num.match(href)
    if m_num and m_num.group(1).isnumeric():
        page_num = int(m_num.group(1))
    else:
        page_num = 0
    return page_num


def download_f_url(download_url, file_path):
    print(download_url)
    with open(file_path, "wb") as file:
        response = requests.get(url=download_url, headers=header, cookies=cookie)
        file.write(response.content)


def get_html(url):
    header['Referer'] = url
    page = requests.get(url=url, headers=header, cookies=cookie)
    text = page.text
    return text


def get_hrefs(user_id, page_num):
    url = 'https://worldwidetorrents.me/torrents-user.php'

    param = {
        'id': user_id,
        'page': page_num,
    }
    # referer:https://worldwidetorrents.me/account-details.php?id=4445

    referer = url + '?id=' + str(user_id)
    header['Referer'] = referer

    page = requests.get(url=url, headers=header, cookies=cookie, params=param)

    tree = html.fromstring(page.text)
    # ====================获取列表====================
    hrefs = tree.xpath('//center/table//a[@href]/@href')

    return hrefs


def ReadHtml(user_id, set_page=999):
    sub_dir = os.path.join(file_dir, str(user_id))
    # ================读取第1页================
    """https://worldwidetorrents.me/torrents-user.php?id=36&page=237"""
    page_num = 0
    hrefs = get_hrefs(user_id, page_num)

    page_hrefs = [href for href in hrefs if check_page(href)]
    torrent_hrefs = [href for href in hrefs if check_torrent(href)]

    page_nums = [get_page_num(href) for href in page_hrefs]
    if page_nums == []:
        max_page = 1

    else:
        max_page = max(page_nums)
        if max_page == 0:
            max_page = 1

    print(max_page)

    torrents = []
    wwt_prefix = 'https://worldwidetorrents.me/'
    end_page = min(max_page, set_page)
    print('end_page:', end_page)
    # ================读取全部页面================
    for page_num in range(0, end_page + 1):
        hrefs = get_hrefs(user_id, page_num)
        torrent_hrefs = [href for href in hrefs if check_torrent(href)]
        print(page_num)

        torrents.extend(torrent_hrefs)
        for torrent in torrent_hrefs:
            download_url = wwt_prefix + torrent
            filename = get_filename(torrent)
            if not filename.lower().endswith('.torrent'):
                filename += '.torrent'
            print(filename)
            file_path = os.path.join(sub_dir, filename)
            if not os.path.exists(file_path):  # 判断目标是否存在
                try:
                    download_f_url(download_url, file_path)
                except:
                    print('请注意:', download_url)
            else:
                # print(filename + '已下载')
                pass


if __name__ == '__main__':
    cookie = 'your_cookie'
    header = {'User-Agent': 'your_header'}
    # 打开https://worldwidetorrents.me/，从浏览器控制台复制你的cookie和header填入此处，一定时间后会过期需重填

    file_dir = 'your_destination_folder'  # 设置你要保存到的目录

    id_lines = [line.strip().split('#')[0] for line in ID_STRING.splitlines() if line != '']
    search_list = list(set([int(line) for line in id_lines if line.isnumeric()]))
    search_list.sort()

    for user_id in search_list:
        try:
            ReadHtml(user_id, set_page=10)  # set_page表示你要抓取前多少页
        except:
            pass