使用scrapy自带的邮件模块
在setting 配置
SMTP_SERVER = 'smtp.qq.com'
SMTP_PORT = 465
SMTP_OVER_SSL = True
SMTP_CONNECTION_TIMEOUT = 10
EMAIL_PASSWORD = 'XXX' # 填写生成的授权码
EMAIL_SENDER = 'XXX@qq.com'
EMAIL_RECIPIENTS = [EMAIL_SENDER]
EMAIL_SUBJECT = 'Email from #scrapydweb'
在代码中引mail入库
MailSender是scrapy自带的一个发送邮件的库
from scrapy.mail import MailSender
监听任务和发邮件
from scrapy import signals
from scrapy.mail import MailSender
from scrapy.exceptions import NotConfigured
from twisted.internet import task
class StatsMailer():
def __init__(self, stats, recipients, mail, interval=3600.0):
# interval 设置半个小时执行下任务
self.stats = stats
self.node_id = get_node_id()
self.interval = interval
self.task = None
self.stats = stats
self.recipients = recipients
@classmethod
def from_crawler(cls, crawler):
recipients = crawler.settings.getlist("STATSMAILER_RCPTS")
if not recipients:
raise NotConfigured
mail = MailSender.from_settings(crawler.settings)
o = cls(crawler.stats, recipients, mail)
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
return o
def spider_opened(self, spider):
self.task = task.LoopingCall(self.send_email, spider)
self.task.start(self.interval)
def send_email(self, spider):
body = '' # 发送邮件的正文
if body:
return self.mail.send(self.recipients, "Scrapy stats for: %s" % spider.name, body)
def spider_closed(self, spider, reason):
if self.task and self.task.running:
self.task.stop()
在扩展中引用
EXTENSIONS = {
'路径.StatsMailer': 900
}