scrapy管道MySQL简记

import pymysqlfrom scrapy.exceptions import DropItemimport timeclass ErshouchePipeline(object):    def __init__(self):        self.conn = pymysql.connect(            host = '127.0.0.1',            port = 3306,            user = 'root',            passwd = 'mlpythonlmoi',            db = 'ershouche',            charset = 'utf8'        )        self.cusor = self.conn.cursor(cursor=pymysql.cursors.DictCursor)        sql1 = "select 路由网址 from 二手车之家"        result = self.cusor.execute(sql1)#读取已经爬取的数据url        # print(result)        temp = self.cusor.fetchall()#返回查询到的全部记录        print('返回查询获得的记录:',temp)        self.url_list = []        for i in temp:            self.url_list.append(i['路由网址'])        print('存在的:',self.url_list)    def process_item(self, item, spider):        if item['car_url'] not in self.url_list:            sql = "insert into 二手车之家 values(Null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"            lst = (item['city'],item['trademark'],item['model'],item['colour'],item['price'],item['purpose'],                   item['vehicle_condition'],item['drive_mode'],item['Truck_kilometer'],item['car_license'],                   item['Stop_displacemen'],item['year_jian_due'],item['insurance_policy_matures'],item['assurance_due'],                   item['emission_standard'],item['guohu_number'],item['maintenance'],item['car_url'])            self.cusor.execute(sql,lst)            self.conn.commit()        else:            raise DropItem('该item数据库中已经存在!')        return item    def close_spider(self, spider):         self.cusor.close()         self.conn.close()#关闭链接         print("操做结束!")         print('结束时间:' + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
相关文章
相关标签/搜索