Scrapy指定顺序输出 -《狗嗨默示录》-
items.py
import scrapy
class CollectipsItem(scrapy.Item):
IP = scrapy.Field()
PORT = scrapy.Field()
POSITION = scrapy.Field()
TYPE = scrapy.Field()
SPEED = scrapy.Field()
CONNECT_TIME = scrapy.Field()
SURVIVE_TIME = scrapy.Field()
LAST_CHECK_TIME = scrapy.Field()
(1)在spiders中增加文件csv_item_exporter.py
from scrapy.conf import settings
from scrapy.contrib.exporter import CsvItemExporter
class MyProjectCsvItemExporter(CsvItemExporter):
def __init__(self, *args, **kwargs):
delimiter = settings.get('CSV_DELIMITER', ',')
kwargs['delimiter'] = delimiter
fields_to_export = settings.get('FIELDS_TO_EXPORT', [])
if fields_to_export :
kwargs['fields_to_export'] = fields_to_export
super(MyProjectCsvItemExporter, self).__init__(*args, **kwargs)
(2)在settings.py中配置
FEED_EXPORTERS = {
'csv': 'CollectIPs.spiders.csv_item_exporter.MyProjectCsvItemExporter',
} #CollectIPs为工程名
FIELDS_TO_EXPORT = [
'IP',
'PORT',
'POSITION',
'TYPE',
'SPEED',
'CONNECT_TIME',
'SURVIVE_TIME',
'LAST_CHECK_TIME'
]
在settings.py中也可以指定csv文件中的分隔符
CSV_DELIMITER = "\t"