import pymysqlfrom scrapy.exceptions import DropItemimport timeclass ErshouchePipeline(object): def __init__(self): self.conn = pymysql.connect( host = '127.0.0.1', port = 3306, user = 'root', passwd = 'mlpythonlmoi', db = 'ershouche', charset = 'utf8' ) self.cusor = self.conn.cursor(cursor=pymysql.cursors.DictCursor) sql1 = "select 路由网址 from 二手车之家" result = self.cusor.execute(sql1)#读取已经爬取的数据url # print(result) temp = self.cusor.fetchall()#返回查询到的全部记录 print('返回查询获得的记录:',temp) self.url_list = [] for i in temp: self.url_list.append(i['路由网址']) print('存在的:',self.url_list) def process_item(self, item, spider): if item['car_url'] not in self.url_list: sql = "insert into 二手车之家 values(Null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" lst = (item['city'],item['trademark'],item['model'],item['colour'],item['price'],item['purpose'], item['vehicle_condition'],item['drive_mode'],item['Truck_kilometer'],item['car_license'], item['Stop_displacemen'],item['year_jian_due'],item['insurance_policy_matures'],item['assurance_due'], item['emission_standard'],item['guohu_number'],item['maintenance'],item['car_url']) self.cusor.execute(sql,lst) self.conn.commit() else: raise DropItem('该item数据库中已经存在!') return item def close_spider(self, spider): self.cusor.close() self.conn.close()#关闭链接 print("操做结束!") print('结束时间:' + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))