Reason
最近在做渠道数据的梳理,发现有不少数据是通过发送邮件的方式提供数据,之前手动编辑过几次插入业务mysql数据库,觉得作为一个程序媛老这么干太low了,所以写了个python小脚本,让它自己定时跑去了
Steps
- 验证邮箱拿到excel的xlsx格式附件存到本地文件夹
- 拿已有mysql数据,看看已经写入过哪些日期
- 读取excel数据,把没有插入过日期到数据写入到mysql里
OS & Libs
工作环境mac
安装exchangelib
作用:读exchange邮件需要
安装和使用见官网 https://pypi.org/project/exchangelib/安装xlrd
作用:读excel需要
1、官网下载压缩包 https://pypi.org/project/xlrd/
2、解压压缩包
3、进入到解压的文件夹
4、执行指令:sudo python setup.py install安装MySQLdb
作用:读mysql需要
1、官网下载压缩包 https://pypi.org/project/MySQL-python/1.2.5/
2、解压压缩包
3、进入到解压的文件夹
4、执行指令:sudo python setup.py install安装msyql
默认你本地已经有mysql
Codes
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from exchangelib import DELEGATE, IMPERSONATION, Account, Credentials, ServiceAccount, \
EWSDateTime, EWSTimeZone, Configuration, NTLM, GSSAPI, CalendarItem, Message, \
Mailbox, Attendee, Q, ExtendedProperty, FileAttachment, ItemAttachment, \
HTMLBody, Build, Version, FolderCollection,EWSDate
from exchangelib.protocol import NoVerifyHTTPAdapter
from datetime import datetime, timedelta
import os
import datetime
import xlrd
import MySQLdb
def DownloadExcel(filter_day):
creds = Credentials(
username='emailname',
password='XXXXXXXXX'
)
account = Account(
primary_smtp_address='emailname@xxxx.com',
credentials=creds,
autodiscover=True,
access_type=DELEGATE
)
local_path=os.path.join(base_path, str(filter_day.date())+'.xlsx')
print 'DownloadExcel',local_path
qs=account.inbox
#q=Q(start__gte=filter_day)&Q(subject__icontains='DSP-wifi-')
q=Q(subject__icontains='DSP-wifi-'+str(filter_day.date()))
items=qs.filter(q)
for item in items:
for attachment in item.attachments:
if isinstance(attachment, FileAttachment):
with open(local_path, 'wb') as f:
f.write(attachment.content)
print('Saved attachment to', local_path)
#插入数据
def insertData(sql):
db = MySQLdb.connect("localhost", "root", "yourpassword", "yourdb", charset='utf8' )
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
db.close()
#拿到已有数据
def getExistDataDate():
result=[]
db = MySQLdb.connect("localhost", "root", "yourpassword", "yourdb", charset='utf8' )
cursor = db.cursor()
sql = "SELECT dt FROM cooperation_data "
try:
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
result.append(row[0])
existsDate = row[0]
print "existsDate=%s" % (existsDate)
except:
print "Error: unable to fecth data"
db.close()
return result
#excel读数据
def ReadDataFromExcel(filter_day):
local_path=os.path.join(base_path, str(filter_day.date())+'.xlsx')
print 'ReadDataFromExcel',local_path
dataNew=[]
if os.path.exists(local_path):
workbook = xlrd.open_workbook(r'%s'%(local_path))
sheet_name= workbook.sheet_names()[0]
sheet=workbook.sheet_by_name(sheet_name)
#遍历excel单元格,不要标题行,序号从1开始
for i in range(1,sheet.nrows):
dataNewRow=[]
#业务需要拿到特定列
for j in [0,2,3,5,6]:
if (sheet.cell(i,j).ctype == 3):
date_value = xlrd.xldate_as_tuple(sheet.cell_value(i,j),workbook.datemode)
date_tmp = datetime.date(*date_value[:3]).strftime('%Y-%m-%d')
dataNewRow.append(date_tmp)
else:
dataNewRow.append(sheet.cell_value(i,j))
dataNew.append(dataNewRow)
return dataNew
if __name__=="__main__":
tz = EWSTimeZone.localzone()
year=datetime.datetime.now().year
month=datetime.datetime.now().month
day=datetime.datetime.now().day
filter_day = tz.localize(EWSDateTime(year,month,day,0,0))
print filter_day
base_path='/Users/lily/workspace/email_data'
DownloadExcel(filter_day)
existDates=getExistDataDate()
dataNew=ReadDataFromExcel(filter_day)
for row in dataNew:
if row[0] not in existDates:
sql="insert into cooperation_data (dt,impression,clicks,cost,ecpm) value('%s',%d,%d,%.2f,%.2f);"%(str(row[0]),int(row[1]),int(row[2]),float(row[3]),float(row[4]))
print sql
insertData(sql)
At Last
还是python小白,大神看到莫笑