将数据存储为text
# 将文件保存为text格式
class Music163Pipeline1(object):
@classmethod
def from_crawler(cls, crawler):
fpath = crawler.settings.get('FPATH')
obj = cls(fpath=fpath)
return obj
def __init__(self, fpath = "./1.txt"):
self.path = fpath
def open_spider(self, spider):
self.fhd = open(self.path, 'w')
def close_spider(self, spider):
self.fhd.close()
def process_item(self, item, spider):
dinfo = dict(item)
vals = ','.join(dinfo.values())
self.fhd.write(vals)
self.fhd.write('\n')
return item
将数据存储为json
import json
# 将文件保存为json格式
class Music163Pipeline(object):
@classmethod
def from_crawler(cls, crawler):
return cls(fpath = crawler.settings.get('FPATH'))
def __init__(self, fpath="./tmp.json"):
self.fpath = fpath
def open_spider(self, spider):
self.handler = open(self.fpath, 'w')
self.handler.write('[')
def close_spider(self, spider):
self.handler.write(']')
self.handler.close()
def process_item(self, item, spider):
line = json.dumps(dict(item), ensure_ascii=False)
self.handler.write(line)
self.handler.write(',\n')
return item
将数据存储为csv
class WangyinPipeline_a(object):
def __init__(self):
self.f = open("wangyiyun.csv", "w")
self.writer = csv.writer(self.f)
self.writer.writerow(['id', 'song', 'nickname', 'avatarurl','hotcomment_like','comments'])
def process_item(self, item, spider):
wangyiyun_list = [item['id'], item['song'], item['nickname'], item['avatarurl'],item['hotcomment_like'], item['comments']]
self.writer.writerow(wangyiyun_list)
return item
def close_spider(self, spider):#关闭
self.writer.close()
self.f.close()
在settings中定义你的管道
ITEM_PIPELINES = {
'myproject.pipelines.Music163Pipeline': 300,#text
'myproject.pipelines.Music163Pipeline1': 301,#json
'myproject.pipelines.WangyinPipeline_a': 302,#csv
}