«

批量爬取TG频道的图片文件

荔枝的君子 发布于 阅读:58


因为想搞api站嘛,所以就想爬点图片

这不,找到一个很多好图的频道@WaifuP1c

我就想把图片搞到本地

写了个脚本来获取


import os

import asyncio

from telethon import TelegramClient, events

from telethon.tl.types import MessageMediaPhoto

from datetime import datetime

# 设置API凭证(从 https://my.telegram.org 获取)

API_ID = ''  # 替换为你的API ID

API_HASH = ''  # 替换为你的API HASH

CHANNEL_USERNAME = 'WaifuP1c'  # 替换为目标频道用户名

DOWNLOAD_PATH = 'telegram_images'  # 下载目录

if not os.path.exists(DOWNLOAD_PATH):

    os.makedirs(DOWNLOAD_PATH)

def get_existing_files():

    return set(os.listdir(DOWNLOAD_PATH))

async def main():

    client = TelegramClient('session_name', API_ID, API_HASH)

    await client.start()

    print("客户端已启动")

    print(f"开始检查频道 {CHANNEL_USERNAME} 的新图片...")

    existing_files = get_existing_files()

    print(f"已找到 {len(existing_files)} 个现有文件")

    entity = await client.get_entity(CHANNEL_USERNAME)

    new_count = 0

    skipped_count = 0

    async for message in client.iter_messages(entity):

        if message.media and isinstance(message.media, MessageMediaPhoto):

            date_str = message.date.strftime("%Y%m%d_%H%M%S")

            filename = f"{date_str}_{message.id}.jpg"

            if filename in existing_files:

                skipped_count += 1

                if skipped_count % 50 == 0:

                    print(f"已跳过 {skipped_count} 个已存在的文件,正在检查新文件...")

                continue

            filepath = os.path.join(DOWNLOAD_PATH, filename)

            await client.download_media(message.media, file=filepath)

            new_count += 1

            print(f"已下载 {new_count} 张新图片: {filename}")

    print(f"完成! 共下载 {new_count} 张新图片,跳过 {skipped_count} 个已存在的图片")

    print(f"所有图片已保存到 {DOWNLOAD_PATH} 目录")

    await client.disconnect()

if __name__ == '__main__':

    asyncio.run(main())

这需要一些扩展

你可以使用pip安装它


pip install telethon