批量爬取TG频道的图片文件
荔枝的君子 发布于 阅读:58
因为想搞api站嘛,所以就想爬点图片
这不,找到一个很多好图的频道@WaifuP1c
我就想把图片搞到本地
写了个脚本来获取
import os
import asyncio
from telethon import TelegramClient, events
from telethon.tl.types import MessageMediaPhoto
from datetime import datetime
# 设置API凭证(从 https://my.telegram.org 获取)
API_ID = '' # 替换为你的API ID
API_HASH = '' # 替换为你的API HASH
CHANNEL_USERNAME = 'WaifuP1c' # 替换为目标频道用户名
DOWNLOAD_PATH = 'telegram_images' # 下载目录
if not os.path.exists(DOWNLOAD_PATH):
os.makedirs(DOWNLOAD_PATH)
def get_existing_files():
return set(os.listdir(DOWNLOAD_PATH))
async def main():
client = TelegramClient('session_name', API_ID, API_HASH)
await client.start()
print("客户端已启动")
print(f"开始检查频道 {CHANNEL_USERNAME} 的新图片...")
existing_files = get_existing_files()
print(f"已找到 {len(existing_files)} 个现有文件")
entity = await client.get_entity(CHANNEL_USERNAME)
new_count = 0
skipped_count = 0
async for message in client.iter_messages(entity):
if message.media and isinstance(message.media, MessageMediaPhoto):
date_str = message.date.strftime("%Y%m%d_%H%M%S")
filename = f"{date_str}_{message.id}.jpg"
if filename in existing_files:
skipped_count += 1
if skipped_count % 50 == 0:
print(f"已跳过 {skipped_count} 个已存在的文件,正在检查新文件...")
continue
filepath = os.path.join(DOWNLOAD_PATH, filename)
await client.download_media(message.media, file=filepath)
new_count += 1
print(f"已下载 {new_count} 张新图片: {filename}")
print(f"完成! 共下载 {new_count} 张新图片,跳过 {skipped_count} 个已存在的图片")
print(f"所有图片已保存到 {DOWNLOAD_PATH} 目录")
await client.disconnect()
if __name__ == '__main__':
asyncio.run(main())
这需要一些扩展
你可以使用pip安装它
pip install telethon