从Python爬虫到Spark预处理数据的真实需求[四]

絮叨两句:
博主是一名软件工程系的在校生,利用博客记录本身所学的知识,也但愿能帮助到正在学习的同窗们
人的一辈子中会遇到各类各样的困难和折磨,逃避是解决不了问题的,惟有以乐观的精神去迎接生活的挑战
少年易老学难成,一寸光阴不可轻。
最喜欢的一句话:今日事,今日毕html


博主刚刚接触爬虫,有什么不足之处请你们谅解,也但愿能指导一下

系列文章目录

从Python爬虫到Spark预处理数据的真实需求[一]
从Python爬虫到Spark预处理数据的真实需求[二]
从Python爬虫到Spark预处理数据的真实需求[三]
从Python爬虫到Spark预处理数据的真实需求[四]
从Python爬虫到Spark预处理数据的真实需求[五]python



前言

这一章是用来对没有获取到的数据进行再次请求获取,进行更新mysql


提示:如下是本篇文章正文内容,下面案例可供参考web

代码

火花塞

import requests
from fake_useragent import UserAgent
import pymysql
from bs4 import BeautifulSoup
def get_proxy():
    return  requests.get('http://xxxxxxxxxxx/get/').json()['proxy']
def getHTML(url):
    proxy = get_proxy()  # 获取代理ip
    ua = UserAgent()  # 实例化
# 请求头就能够写成
    cookie = '__jdu=577937999; areaId=15; ipLoc-djd=15-1213-3410-0; PCSYCityID=CN_330000_330100_330105; shshshfpa=bbe2e678-8333-005c-d01a-b070738f7860-1597809413; shshshfpb=pqSL0Bsl%2FLma%20U3QU6OB1xw%3D%3D; mt_xid=V2_52007VwcUVFVaVFIXQSldVWJWFwVVX05cGx0eQAAyVhRODQhWWQNJH1gEY1QWBwhcWwovShhfBHsCG05eWUNaG0IcVA5mACJQbVhiUh9IGV4MYgMbU1xfV14eQR1bAVcDFFZZ; user-key=68c44d85-8cac-4072-8369-c117f62d8eb3; cn=0; unpl=V2_ZzNtbURfFhZwXEEAKx4OVWJTElsSUUoUdQsRAHkbWgFmCkEKclRCFnQUR11nGl0UZwQZWEVcQxxFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH8fWg1lBRpVSmdzEkU4dlN7EFQGZDMTbUNnAUEpCk5Weh5YSGMFFFVAUUsdfThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_79d24e6ea6ca4a17a78012fe337508bf|1597913171572; __jda=122270672.577937999.1597809411.1597906781.1597909689.7; __jdc=122270672; 3AB9D23F7A4B3C9B=3SJYGJUIMVOSXHMAT54Z7M54MSN7POALYPRYHXXL4OTIUAYWVYTBG6AFPA4L4Q5ED37GELWUAZFAMTA6KV6JQSFCHA; shshshfp=b65beee4eac3565989e568b588a5f619; shshshsID=1d65ff1a1d5a2ecc10479b3f2d1ce72b_39_1597913210747; __jdb=122270672.48.577937999|7.1597909689'
    headers = {"User-Agent": ua.random,
               'Cookie': cookie}
    trytimes = 100  # 重试的次数

    for i in range(trytimes):
        try:
            response = requests.get(url, headers=headers, proxies={"http": "https://{}".format(proxy)}, timeout=1)
            # response = requests.get(url, headers=headers,timeout=3)
            # 注意此处也多是302等状态码
            if response.status_code == 200:
                break
        except:
            # logdebug(f'requests failed {i}time')
            print(f'requests failed {i} time','要获取的URL:',url)
    return response.text


def getProduct(https_li_href,brand_name,product_sku,product_Price):
    db = "UPDATE `xxuan_car_jd_hhs_product` SET "
    sql = {'skuid': '',
           'name': '',
           'brand': '',
           'price': '',
           'url': '',
           'commodity_Name': '',
           'image': '',
           'sales': '',
           'material': '',
           'type': '',
           'ArticleNumbera': '',
           'GrossWeight': ''
           }
    sql['url'] = https_li_href
    sql['brand'] = brand_name
    sql['price'] = product_Price
    sql['skuid'] = product_sku
    product_HTML = getHTML(https_li_href)
    produc_soup = BeautifulSoup(product_HTML, 'html.parser')
    # 商品标题名称
    sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
    if sku_name_wrap != None:
        sku_name = sku_name_wrap.find('div', attrs={'class': 'sku-name'})
        if sku_name != None:
            sku_name = sku_name.text
            sku_name = str(sku_name).strip()
            sql['commodity_Name'] = sku_name
    # 商品图片
    spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
    if spec_img == None:
        spec_img = 'NULL'
    else:
        spec_img = spec_img['data-origin']
    # print("https:",spec_img)
    imageURL = f"https:{spec_img}"
    if imageURL.__contains__('NULL'):
        sql['image'] = f"NULL"
    else:
        sql['image'] = imageURL
    # 商品规格信息
    parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
    if parameter_list != None:
        li_all_parameter = parameter_list.findAll('li')
        for li in li_all_parameter:
            # print(li)
            if str(li.text).__contains__('商品名称:'):
                if li.text == None:
                    sql['name'] = 'NULL',
                else:
                    sql['name'] = str(li.text).replace('商品名称:', '')
            elif str(li.text).__contains__('销售规格:'):
                if li.text == None:
                    sql['sales'] = 'NULL',
                else:
                    sql['sales'] = str(li.text).replace('销售规格:', '')
            elif str(li.text).__contains__('产品材质:'):
                if li.text == None:
                    sql['material'] = 'NULL',
                else:
                    sql['material'] = str(li.text).replace('产品材质:', '')
            elif str(li.text).__contains__('产品类型:'):
                if li.text == None:
                    sql['type'] = 'NULL',
                else:
                    sql['type'] = str(li.text).replace('产品类型:', '')
            elif str(li.text).__contains__('货号:'):
                if li.text == None:
                    sql['ArticleNumbera'] = 'NULL',
                else:
                    sql['ArticleNumbera'] = str(li.text).replace('货号:', '')
            elif str(li.text).__contains__('商品毛重:'):
                if li.text == None:
                    sql['GrossWeight'] = 'NULL',
                else:
                    sql['GrossWeight'] = str(li.text).replace('商品毛重:', '')
    # print(sql)
    for i in sql:
        if len(str(sql[i])) == 0:
            sql[i] = 'NULL'
        if i != "GrossWeight":

            db += f"{i}='{sql[i]}',"
        else:
            db += f"{i}='{sql[i]}' WHERE skuid='{product_sku}' AND brand='{brand_name}' AND price='{product_Price}' AND url='{https_li_href}';"
    # print(db)
    ''' 首先生成插入语句,等写入直接source加载 '''
    # with open('E:\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
    # w.write(db + '\r')
    # print(db)
    ''' 直接插入 '''
    # print(db)
    # conneMysql(db)
    return db

def connectMysql():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='jd_qipei',
        charset='utf8',
        autocommit=True,  # 若是插入数据,, 是否自动提交? 和conn.commit()功能一致。
    )
    cur = conn.cursor()
    insert_sql = 'select * from `xxuan_car_jd_hhs_product` where name ="NULL"'
    cur.execute(insert_sql)
    ALL_NULL=cur.fetchall()
    for Null in ALL_NULL:
        sku=str(Null).split(',')[1].replace("'","").strip()
        brand=str(Null).split(',')[3].replace("'","").strip()
        href=str(Null).split(',')[5].replace("'","").strip()
        price=str(Null).split(',')[4].replace("'","").strip()

        db=getProduct(https_li_href=href,brand_name=brand,product_sku=sku,product_Price=price)
        print(db)
        cur.execute(db)
        conn.commit()



if __name__ == '__main__':
 connectMysql()

机油

import requests
from fake_useragent import UserAgent
import pymysql
from bs4 import BeautifulSoup
def get_proxy():
    return  requests.get('http://xxxxxxxxxxx/get/').json()['proxy']
def getHTML(url):
    proxy = get_proxy()  # 获取代理ip
    ua = UserAgent()  # 实例化
# 请求头就能够写成
    cookie = '__jdu=577937999; areaId=15; ipLoc-djd=15-1213-3410-0; PCSYCityID=CN_330000_330100_330105; shshshfpa=bbe2e678-8333-005c-d01a-b070738f7860-1597809413; shshshfpb=pqSL0Bsl%2FLma%20U3QU6OB1xw%3D%3D; mt_xid=V2_52007VwcUVFVaVFIXQSldVWJWFwVVX05cGx0eQAAyVhRODQhWWQNJH1gEY1QWBwhcWwovShhfBHsCG05eWUNaG0IcVA5mACJQbVhiUh9IGV4MYgMbU1xfV14eQR1bAVcDFFZZ; user-key=68c44d85-8cac-4072-8369-c117f62d8eb3; cn=0; unpl=V2_ZzNtbURfFhZwXEEAKx4OVWJTElsSUUoUdQsRAHkbWgFmCkEKclRCFnQUR11nGl0UZwQZWEVcQxxFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH8fWg1lBRpVSmdzEkU4dlN7EFQGZDMTbUNnAUEpCk5Weh5YSGMFFFVAUUsdfThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_79d24e6ea6ca4a17a78012fe337508bf|1597913171572; __jda=122270672.577937999.1597809411.1597906781.1597909689.7; __jdc=122270672; 3AB9D23F7A4B3C9B=3SJYGJUIMVOSXHMAT54Z7M54MSN7POALYPRYHXXL4OTIUAYWVYTBG6AFPA4L4Q5ED37GELWUAZFAMTA6KV6JQSFCHA; shshshfp=b65beee4eac3565989e568b588a5f619; shshshsID=1d65ff1a1d5a2ecc10479b3f2d1ce72b_39_1597913210747; __jdb=122270672.48.577937999|7.1597909689'
    headers = {"User-Agent": ua.random,
               'Cookie': cookie}
    trytimes = 1000  # 重试的次数

    for i in range(trytimes):
        try:
            response = requests.get(url, headers=headers, proxies={"http": "https://{}".format(proxy)}, timeout=1)
            # response = requests.get(url, headers=headers,timeout=3)
            # 注意此处也多是302等状态码
            if response.status_code == 200:
                break
        except:
            # logdebug(f'requests failed {i}time')
            print(f'requests failed {i} time','要获取的URL:',url)
    return response.text


def getProduct(https_li_href,brand_name):
    db = "UPDATE `xxuan_car_jd_mobil_product` SET "
    sql = {'skuid': '',
           'name': '',
           'brand': '',
           'type': '',
           'url': '',
           'originplace': '',
           'netweight': '',
           'commodity_Name': '',
           'image': '',
           'viscosity': '',
           'volume': ''
           }
    sql['url'] = https_li_href
    sql['brand'] = brand_name
    product_HTML = getHTML(https_li_href)
    produc_soup = BeautifulSoup(product_HTML, 'html.parser')
    # 商品标题名称
    sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
    if sku_name_wrap != None:
        sku_name = sku_name_wrap.find('div', attrs={'class': 'sku-name'})
        if sku_name != None:
            sku_name = sku_name.text
            sku_name = str(sku_name).strip()
            sql['commodity_Name'] = sku_name

    # print("商品标题名称:",sku_name)
    # print('商品价格:',li_price)
    # summary_price = produc_soup.find('div', attrs={'class': 'summary-price J-summary-price'})
    # if summary_price != None:
    # p_price = summary_price.find('div', attrs={'class': 'dd'}).find('span', attrs={'class': 'pricing'})
    # if p_price != None:
    # p_price = str(p_price.text).replace('[', '').replace(']', '').replace('¥', '')
    # else:
    # p_price = 'NULL'
    # sql['price'] = p_price
    # else:
    # sql['price'] = 'NULL'
    # 商品图片
    spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
    if spec_img == None:
        spec_img = 'NULL'
    else:
        spec_img = spec_img['data-origin']
    # print("https:",spec_img)
    imageURL = f"https:{spec_img}"
    if imageURL.__contains__('NULL'):
        sql['image'] = f"NULL"
    else:
        sql['image'] = imageURL
    # 商品规格信息
    parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
    if parameter_list != None:
        li_all_parameter = parameter_list.findAll('li')
        for li in li_all_parameter:
            if str(li.text).__contains__('商品名称:'):
                if li.text == None:
                    sql['name'] = 'NULL',
                else:
                    sql['name'] = str(li.text).replace('商品名称:', '')
            elif str(li.text).__contains__('商品编号:'):
                if li.text == None:
                    sql['skuid'] = 'NULL',
                else:
                    sql['skuid'] = str(li.text).replace('商品编号:', '')
            elif str(li.text).__contains__('商品毛重:'):
                if li.text == None:
                    sql['netweight'] = 'NULL',
                else:
                    sql['netweight'] = str(li.text).replace('商品毛重:', '')
            elif str(li.text).__contains__('商品产地:'):
                if li.text == None:
                    sql['originplace'] = 'NULL',
                else:
                    sql['originplace'] = str(li.text).replace('商品产地:', '')
            elif str(li.text).__contains__('粘度:'):
                if li.text == None:
                    sql['viscosity'] = 'NULL',
                else:
                    sql['viscosity'] = str(li.text).replace('粘度:', '')
            elif str(li.text).__contains__('机油种类:'):
                if li.text == None:
                    sql['type'] = 'NULL',
                else:
                    sql['type'] = str(li.text).replace('机油种类:', '')
            elif str(li.text).__contains__('容量:'):
                if li.text == None:
                    sql['volume'] = 'NULL',
                else:
                    sql['volume'] = str(li.text).replace('容量:', '')
    # print(sql)
    for i in sql:
        if len(str(sql[i])) == 0:
            sql[i] = 'NULL'
        if i != "volume":

            db += f"{i}='{sql[i]}',"
        else:
            db += f"{i}='{sql[i]}' WHERE url='{https_li_href}' AND brand='{brand_name}'"
    ''' 首先生成插入语句,等写入直接source加载 '''
    # with open('E:\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
    # w.write(db + '\r')
    # print(db)
    ''' 直接插入 '''
    # print(db)
    # conneMysql(db)
    return db


def connectMysql():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='jd_qipei',
        charset='utf8',
        autocommit=True,  # 若是插入数据,, 是否自动提交? 和conn.commit()功能一致。
    )
    cur = conn.cursor()
    insert_sql = 'select * from `xxuan_car_jd_mobil_product` where skuid ="NULL"'
    cur.execute(insert_sql)
    ALL_NULL=cur.fetchall()
    for Null in ALL_NULL:
        brand=str(Null).split(',')[3].replace("'","").strip()
        href=str(Null).split(',')[6].replace("'","").strip()
        # price=str(Null).split(',')[4].replace("'","").strip()
        # print(brand,'-----',href)

        db=getProduct(https_li_href=href,brand_name=brand)
        print(db)
        # print(db)
        cur.execute(db)
        conn.commit()



if __name__ == '__main__':
 connectMysql()

轮胎

import requests
from fake_useragent import UserAgent
import pymysql
from bs4 import BeautifulSoup
def get_proxy():
    return  requests.get('http://xxxxxxxxxxx/get/').json()['proxy']
def getHTML(url):
    proxy = get_proxy()  # 获取代理ip
    ua = UserAgent()  # 实例化
# 请求头就能够写成
    cookie = '__jdu=577937999; areaId=15; ipLoc-djd=15-1213-3410-0; PCSYCityID=CN_330000_330100_330105; shshshfpa=bbe2e678-8333-005c-d01a-b070738f7860-1597809413; shshshfpb=pqSL0Bsl%2FLma%20U3QU6OB1xw%3D%3D; mt_xid=V2_52007VwcUVFVaVFIXQSldVWJWFwVVX05cGx0eQAAyVhRODQhWWQNJH1gEY1QWBwhcWwovShhfBHsCG05eWUNaG0IcVA5mACJQbVhiUh9IGV4MYgMbU1xfV14eQR1bAVcDFFZZ; user-key=68c44d85-8cac-4072-8369-c117f62d8eb3; cn=0; unpl=V2_ZzNtbURfFhZwXEEAKx4OVWJTElsSUUoUdQsRAHkbWgFmCkEKclRCFnQUR11nGl0UZwQZWEVcQxxFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH8fWg1lBRpVSmdzEkU4dlN7EFQGZDMTbUNnAUEpCk5Weh5YSGMFFFVAUUsdfThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_79d24e6ea6ca4a17a78012fe337508bf|1597913171572; __jda=122270672.577937999.1597809411.1597906781.1597909689.7; __jdc=122270672; 3AB9D23F7A4B3C9B=3SJYGJUIMVOSXHMAT54Z7M54MSN7POALYPRYHXXL4OTIUAYWVYTBG6AFPA4L4Q5ED37GELWUAZFAMTA6KV6JQSFCHA; shshshfp=b65beee4eac3565989e568b588a5f619; shshshsID=1d65ff1a1d5a2ecc10479b3f2d1ce72b_39_1597913210747; __jdb=122270672.48.577937999|7.1597909689'
    headers = {"User-Agent": ua.random,
               'Cookie': cookie}
    trytimes = 1000  # 重试的次数

    for i in range(trytimes):
        try:
            response = requests.get(url, headers=headers, proxies={"http": "https://{}".format(proxy)}, timeout=1)
            # response = requests.get(url, headers=headers,timeout=3)
            # 注意此处也多是302等状态码
            if response.status_code == 200:
                break
        except:
            # logdebug(f'requests failed {i}time')
            print(f'requests failed {i} time','要获取的URL:',url)
    return response.text


def getProduct(https_li_href,brand_name,price):
    db = "UPDATE `xxuan_car_jd_lt_product` SET "
    sql = {  'skuid':'',
            'name':'',
            'brand':'',
            'url':'',
            'price':'',
            'commodity_Name':'',
            'image':'',
            'netweight':'',
            'originplace':'',
            'size':'',
            'width':'',
            'number':'',
            'performance':'',
            'Flattening':'',
            'characteristics':'',
            'type':''
           }
    sql['url'] = https_li_href
    sql['brand'] = brand_name
    sql['price']=price
    product_HTML = getHTML(https_li_href)
    produc_soup = BeautifulSoup(product_HTML, 'html.parser')
    # 商品标题名称
    sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
    if sku_name_wrap != None:
        sku_name = sku_name_wrap.find('div', attrs={'class': 'sku-name'})
        if sku_name != None:
            sku_name = sku_name.text
            sku_name = str(sku_name).strip()
            sql['commodity_Name'] = sku_name

    # print("商品标题名称:",sku_name)
    # print('商品价格:',li_price)
    # summary_price = produc_soup.find('div', attrs={'class': 'summary-price J-summary-price'})
    # if summary_price != None:
    # p_price = summary_price.find('div', attrs={'class': 'dd'}).find('span', attrs={'class': 'pricing'})
    # if p_price != None:
    # p_price = str(p_price.text).replace('[', '').replace(']', '').replace('¥', '')
    # else:
    # p_price = 'NULL'
    # sql['price'] = p_price
    # else:
    # sql['price'] = 'NULL'
    # 商品图片
    spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
    if spec_img == None:
        spec_img = 'NULL'
    else:
        spec_img = spec_img['data-origin']
    # print("https:",spec_img)
    imageURL = f"https:{spec_img}"
    if imageURL.__contains__('NULL'):
        sql['image'] = f"NULL"
    else:
        sql['image'] = imageURL
    # 商品规格信息
    parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
    if parameter_list != None:
        li_all_parameter = parameter_list.findAll('li')
        for li in li_all_parameter:
            if str(li.text).__contains__('商品名称:'):
                if li.text == None:
                    sql['name'] = 'NULL',
                else:
                    sql['name'] = str(li.text).replace('商品名称:', '')
            elif str(li.text).__contains__('商品编号:'):
                if li.text == None:
                    sql['skuid'] = 'NULL',
                else:
                    sql['skuid'] = str(li.text).replace('商品编号:', '')
            elif str(li.text).__contains__('商品毛重:'):
                if li.text == None:
                    sql['netweight'] = 'NULL',
                else:
                    sql['netweight'] = str(li.text).replace('商品毛重:', '')
            elif str(li.text).__contains__('商品产地:'):
                if li.text == None:
                    sql['originplace'] = 'NULL',
                else:
                    sql['originplace'] = str(li.text).replace('商品产地:', '')
            elif str(li.text).__contains__('尺寸:'):
                if li.text == None:
                    sql['size'] = 'NULL',
                else:
                    sql['size'] = str(li.text).replace('尺寸:', '')
            elif str(li.text).__contains__('胎面宽度:'):
                if li.text == None:
                    sql['width'] = 'NULL',
                else:
                    sql['width'] = str(li.text).replace('胎面宽度:', '')
            elif str(li.text).__contains__('扁平比:'):
                if li.text == None:
                    sql['Flattening'] = 'NULL',
                else:
                    sql['Flattening'] = str(li.text).replace('扁平比:', '')
            elif str(li.text).__contains__('货号:'):
                if li.text == None:
                    sql['number'] = 'NULL',
                else:
                    sql['number'] = str(li.text).replace('货号:', '')
            elif str(li.text).__contains__('花纹性能:'):
                if li.text == None:
                    sql['performance'] = 'NULL',
                else:
                    sql['performance'] = str(li.text).replace('花纹性能:', '')
            elif str(li.text).__contains__('轮胎特性:'):
                if li.text == None:
                    sql['characteristics'] = 'NULL',
                else:
                    sql['characteristics'] = str(li.text).replace('轮胎特性:', '')
            elif str(li.text).__contains__('车型类别:'):
                if li.text == None:
                    sql['type'] = 'NULL',
                else:
                    sql['type'] = str(li.text).replace('车型类别:', '')
    # print(sql)
    for i in sql:
        if len(str(sql[i])) == 0:
            sql[i] = 'NULL'
        if i != "type":

            db += f"{i}='{sql[i]}',"
        else:
            db += f"{i}='{sql[i]}' WHERE url='{https_li_href}' AND brand='{brand_name}'"
    ''' 首先生成插入语句,等写入直接source加载 '''
    # with open('E:\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
    # w.write(db + '\r')
    # print(db)
    ''' 直接插入 '''
    # print(db)
    # conneMysql(db)
    return db


def connectMysql():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='jd_qipei',
        charset='utf8',
        autocommit=True,  # 若是插入数据,, 是否自动提交? 和conn.commit()功能一致。
    )
    cur = conn.cursor()
    insert_sql = 'select * from `xxuan_car_jd_lt_product` where skuid ="NULL"'
    cur.execute(insert_sql)
    ALL_NULL=cur.fetchall()
    for Null in ALL_NULL:

        brand=str(Null).split(',')[3].replace("'","").strip()
        href=str(Null).split(',')[4].replace("'","").strip()
        price=str(Null).split(',')[5].replace("'","").strip()
        # print(brand,'-----',href)

        db=getProduct(https_li_href=href,brand_name=brand,price=price)
        print(db)
        # print(db)
        cur.execute(db)
        conn.commit()



if __name__ == '__main__':
 connectMysql()

刹车片

import requests
from fake_useragent import UserAgent
import pymysql
from bs4 import BeautifulSoup
def get_proxy():
    return  requests.get('http://xxxxxxxxxxx/get/').json()['proxy']
def getHTML(url):
    proxy = get_proxy()  # 获取代理ip
    ua = UserAgent()  # 实例化
# 请求头就能够写成
    cookie = '__jdu=577937999; areaId=15; ipLoc-djd=15-1213-3410-0; PCSYCityID=CN_330000_330100_330105; shshshfpa=bbe2e678-8333-005c-d01a-b070738f7860-1597809413; shshshfpb=pqSL0Bsl%2FLma%20U3QU6OB1xw%3D%3D; mt_xid=V2_52007VwcUVFVaVFIXQSldVWJWFwVVX05cGx0eQAAyVhRODQhWWQNJH1gEY1QWBwhcWwovShhfBHsCG05eWUNaG0IcVA5mACJQbVhiUh9IGV4MYgMbU1xfV14eQR1bAVcDFFZZ; user-key=68c44d85-8cac-4072-8369-c117f62d8eb3; cn=0; unpl=V2_ZzNtbURfFhZwXEEAKx4OVWJTElsSUUoUdQsRAHkbWgFmCkEKclRCFnQUR11nGl0UZwQZWEVcQxxFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH8fWg1lBRpVSmdzEkU4dlN7EFQGZDMTbUNnAUEpCk5Weh5YSGMFFFVAUUsdfThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_79d24e6ea6ca4a17a78012fe337508bf|1597913171572; __jda=122270672.577937999.1597809411.1597906781.1597909689.7; __jdc=122270672; 3AB9D23F7A4B3C9B=3SJYGJUIMVOSXHMAT54Z7M54MSN7POALYPRYHXXL4OTIUAYWVYTBG6AFPA4L4Q5ED37GELWUAZFAMTA6KV6JQSFCHA; shshshfp=b65beee4eac3565989e568b588a5f619; shshshsID=1d65ff1a1d5a2ecc10479b3f2d1ce72b_39_1597913210747; __jdb=122270672.48.577937999|7.1597909689'
    headers = {"User-Agent": ua.random,
               'Cookie': cookie}
    trytimes = 100  # 重试的次数

    for i in range(trytimes):
        try:
            response = requests.get(url, headers=headers, proxies={"http": "https://{}".format(proxy)}, timeout=1)
            # response = requests.get(url, headers=headers,timeout=3)
            # 注意此处也多是302等状态码
            if response.status_code == 200:
                break
        except:
            # logdebug(f'requests failed {i}time')
            print(f'requests failed {i} time','要获取的URL:',url)
    return response.text


def getProduct(https_li_href,brand_name,product_Sku,product_Price):
    db = "UPDATE `xxuan_car_jd_scp_product` SET "
    sql = {'skuid': '',
           'name': '',
           'brand': '',
           'price':'',
           'url': '',
           'commodity_Name':'',
           'image':'',
           'Additivetype':'',
           'TypesOfAdditives':'',
           'NetContent':'',
           'ArticleNumber':'',
           'boiling':'',
           'package':'',
           'GrossWeight':'',
           'CommodityOrigin':'',
           'process':'',
           'Installation':'',
           'type':'',
           'texture':''
           }
    sql['url']=https_li_href
    sql['brand']=brand_name
    sql['price']=product_Price
    sql['skuid']=product_Sku
    product_HTML = getHTML(https_li_href)
    produc_soup = BeautifulSoup(product_HTML, 'html.parser')
    # 商品标题名称
    sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
    if sku_name_wrap != None:
        sku_name = sku_name_wrap.find('div', attrs={'class': 'sku-name'})
        if sku_name != None:
            sku_name = sku_name.text
            sku_name = str(sku_name).strip()
            sql['commodity_Name'] = sku_name
    # 商品图片
    spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
    if spec_img == None:
        spec_img = 'NULL'
    else:
        spec_img = spec_img['data-origin']
    # print("https:",spec_img)
    imageURL = f"https:{spec_img}"
    if imageURL.__contains__('NULL'):
        sql['image'] = f"NULL"
    else:
        sql['image'] = imageURL
    # 商品规格信息
    parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
    if parameter_list != None:
        li_all_parameter = parameter_list.findAll('li')
        for li in li_all_parameter:
            # print(li)
            if str(li.text).__contains__('商品名称:'):
                if li.text == None:
                    sql['name'] = 'NULL',
                else:
                    sql['name'] = str(li.text).replace('商品名称:', '')
            elif str(li.text).__contains__('商品编号:'):
                if li.text == None:
                    sql['skuid'] = 'NULL',
                    # pass
                else:
                    sql['skuid'] =str(li.text).replace('商品编号:', '')
                    # pass
            elif str(li.text).__contains__('产品类别:'):
                if li.text == None:
                    sql['type'] = 'NULL',
                else:
                    sql['type'] = str(li.text).replace('产品类别:', '')
            elif str(li.text).__contains__('包装规格:'):
                if li.text == None:
                    sql['package'] = 'NULL',
                else:
                    sql['package'] = str(li.text).replace('包装规格:', '')
            elif str(li.text).__contains__('干湿沸点:'):
                if li.text == None:
                    sql['boiling'] = 'NULL',
                else:
                    sql['boiling'] = str(li.text).replace('干湿沸点:', '')
            elif str(li.text).__contains__('货号:'):
                if li.text == None:
                    sql['ArticleNumber'] = 'NULL',
                else:
                    sql['ArticleNumber'] = str(li.text).replace('货号:', '')
            elif str(li.text).__contains__('商品毛重:'):
                if li.text == None:
                    sql['GrossWeight'] = 'NULL',
                else:
                    sql['GrossWeight'] = str(li.text).replace('商品毛重:', '')
            elif str(li.text).__contains__('商品产地:'):
                if li.text == None:
                    sql['CommodityOrigin'] = 'NULL',
                else:
                    sql['CommodityOrigin'] = str(li.text).replace('商品产地:', '')
            elif str(li.text).__contains__('产品工艺:'):
                if li.text == None:
                    sql['process'] = 'NULL',
                else:
                    sql['process'] = str(li.text).replace('产品工艺:', '')
            elif str(li.text).__contains__('安装位置:'):
                if li.text == None:
                    sql['Installation'] = 'NULL',
                else:
                    sql['Installation'] = str(li.text).replace('安装位置:', '')
            elif str(li.text).__contains__('类别:'):
                if li.text == None:
                    sql['type'] = 'NULL',
                else:
                    sql['type'] = str(li.text).replace('类别:', '')
            elif str(li.text).__contains__('材质:'):
                if li.text == None:
                    sql['texture'] = 'NULL',
                else:
                    sql['texture'] = str(li.text).replace('材质:', '')
    # print(sql)
    for i in sql:
        if len(str(sql[i])) == 0:
            sql[i] = 'NULL'
        if i != "texture":

            db += f"{i}='{sql[i]}',"
        else:
            db += f"{i}='{sql[i]}' WHERE skuid='{product_Sku}' AND brand='{brand_name}' AND price='{product_Price}' AND url='{https_li_href}';"
    # print(db)
    ''' 首先生成插入语句,等写入直接source加载 '''
    # with open('E:\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
    # w.write(db + '\r')
    # print(db)
    ''' 直接插入 '''
    # print(db)
    # conneMysql(db)
    return db

def connectMysql():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='jd_qipei',
        charset='utf8',
        autocommit=True,  # 若是插入数据,, 是否自动提交? 和conn.commit()功能一致。
    )
    cur = conn.cursor()
    insert_sql = 'select * from `xxuan_car_jd_scp_product` where name ="NULL"'
    cur.execute(insert_sql)
    ALL_NULL=cur.fetchall()
    for Null in ALL_NULL:
        sku=str(Null).split(',')[1].replace("'","").strip()
        brand=str(Null).split(',')[3].replace("'","").strip()
        href=str(Null).split(',')[5].replace("'","").strip()
        price=str(Null).split(',')[4].replace("'","").strip()
        print('sku:',sku,'----brand:',brand,'----href:',href,'----price:',price)
        db=getProduct(https_li_href=href,brand_name=brand,product_Sku=sku,product_Price=price)
        print(db)
        cur.execute(db)
        conn.commit()


if __name__ == '__main__':
 connectMysql()

添加剂

import requests
from fake_useragent import UserAgent
import pymysql
from bs4 import BeautifulSoup
def get_proxy():
    return  requests.get('http://xxxxxxxxxxx/get/').json()['proxy']
def getHTML(url):
    proxy = get_proxy()  # 获取代理ip
    ua = UserAgent()  # 实例化
# 请求头就能够写成
    cookie = '__jdu=577937999; areaId=15; ipLoc-djd=15-1213-3410-0; PCSYCityID=CN_330000_330100_330105; shshshfpa=bbe2e678-8333-005c-d01a-b070738f7860-1597809413; shshshfpb=pqSL0Bsl%2FLma%20U3QU6OB1xw%3D%3D; mt_xid=V2_52007VwcUVFVaVFIXQSldVWJWFwVVX05cGx0eQAAyVhRODQhWWQNJH1gEY1QWBwhcWwovShhfBHsCG05eWUNaG0IcVA5mACJQbVhiUh9IGV4MYgMbU1xfV14eQR1bAVcDFFZZ; user-key=68c44d85-8cac-4072-8369-c117f62d8eb3; cn=0; unpl=V2_ZzNtbURfFhZwXEEAKx4OVWJTElsSUUoUdQsRAHkbWgFmCkEKclRCFnQUR11nGl0UZwQZWEVcQxxFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH8fWg1lBRpVSmdzEkU4dlN7EFQGZDMTbUNnAUEpCk5Weh5YSGMFFFVAUUsdfThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_79d24e6ea6ca4a17a78012fe337508bf|1597913171572; __jda=122270672.577937999.1597809411.1597906781.1597909689.7; __jdc=122270672; 3AB9D23F7A4B3C9B=3SJYGJUIMVOSXHMAT54Z7M54MSN7POALYPRYHXXL4OTIUAYWVYTBG6AFPA4L4Q5ED37GELWUAZFAMTA6KV6JQSFCHA; shshshfp=b65beee4eac3565989e568b588a5f619; shshshsID=1d65ff1a1d5a2ecc10479b3f2d1ce72b_39_1597913210747; __jdb=122270672.48.577937999|7.1597909689'
    headers = {"User-Agent": ua.random,
               'Cookie': cookie}
    trytimes = 100  # 重试的次数

    for i in range(trytimes):
        try:
            response = requests.get(url, headers=headers, proxies={"http": "https://{}".format(proxy)}, timeout=1)
            # response = requests.get(url, headers=headers,timeout=3)
            # 注意此处也多是302等状态码
            if response.status_code == 200:
                break
        except:
            # logdebug(f'requests failed {i}time')
            print(f'requests failed {i} time','要获取的URL:',url)
    return response.text


def getProduct(https_li_href,brand_name,product_Price):
    db = "UPDATE `xxuan_car_jd_tjj_product` SET "
    sql = {'skuid': '',
           'name': '',
           'brand': '',
           'price': '',
           'url': '',
           'commodity_Name': '',
           'image': '',
           'Additivetype': '',
           'TypesOfAdditives': '',
           'NetContent': '',
           'ArticleNumber': '',
           'GrossWeight': '',
           'CommodityOrigin': ''
           }
    sql['url'] = https_li_href
    sql['brand'] = brand_name
    sql['price'] = product_Price
    product_HTML = getHTML(https_li_href)
    produc_soup = BeautifulSoup(product_HTML, 'html.parser')
    # 商品标题名称
    sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
    if sku_name_wrap != None:
        sku_name = sku_name_wrap.find('div', attrs={'class': 'sku-name'})
        if sku_name != None:
            sku_name = sku_name.text
            sku_name = str(sku_name).strip()
            sql['commodity_Name'] = sku_name
    # 商品图片
    spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
    if spec_img == None:
        spec_img = 'NULL'
    else:
        spec_img = spec_img['data-origin']
    # print("https:",spec_img)
    imageURL = f"https:{spec_img}"
    if imageURL.__contains__('NULL'):
        sql['image'] = f"NULL"
    else:
        sql['image'] = imageURL
    # 商品规格信息
    parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
    if parameter_list != None:
        li_all_parameter = parameter_list.findAll('li')
        for li in li_all_parameter:
            # print(li)
            if str(li.text).__contains__('商品名称:'):
                if li.text == None:
                    sql['name'] = 'NULL',
                else:
                    sql['name'] = str(li.text).replace('商品名称:', '')
            elif str(li.text).__contains__('商品编号:'):
                if li.text == None:
                    sql['skuid'] = 'NULL',
                    # pass
                else:
                    sql['skuid'] = str(li.text).replace('商品编号:', '')
                    # pass
            elif str(li.text).__contains__('添加剂类型:'):
                if li.text == None:
                    sql['Additivetype'] = 'NULL',
                else:
                    sql['Additivetype'] = str(li.text).replace('添加剂类型:', '')
            elif str(li.text).__contains__('添加剂种类:'):
                if li.text == None:
                    sql['TypesOfAdditives'] = 'NULL',
                else:
                    sql['TypesOfAdditives'] = str(li.text).replace('添加剂种类:', '')
            elif str(li.text).__contains__('净含量:'):
                if li.text == None:
                    sql['NetContent'] = 'NULL',
                else:
                    sql['NetContent'] = str(li.text).replace('净含量:', '')
            elif str(li.text).__contains__('货号:'):
                if li.text == None:
                    sql['ArticleNumber'] = 'NULL',
                else:
                    sql['ArticleNumber'] = str(li.text).replace('货号:', '')
            elif str(li.text).__contains__('商品毛重:'):
                if li.text == None:
                    sql['GrossWeight'] = 'NULL',
                else:
                    sql['GrossWeight'] = str(li.text).replace('商品毛重:', '')
            elif str(li.text).__contains__('商品产地:'):
                if li.text == None:
                    sql['CommodityOrigin'] = 'NULL',
                else:
                    sql['CommodityOrigin'] = str(li.text).replace('商品产地:', '')
    # print(sql)
    for i in sql:
        if len(str(sql[i])) == 0:
            sql[i] = 'NULL'
        if i != "CommodityOrigin":

            db += f"{i}='{sql[i]}',"
        else:
            db += f"{i}='{sql[i]}' WHERE brand='{brand_name}' AND price='{product_Price}' AND url='{https_li_href}';"
    # print(db)
    ''' 首先生成插入语句,等写入直接source加载 '''
    # with open('E:\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
    # w.write(db + '\r')
    # print(db)
    ''' 直接插入 '''
    # print(db)
    # conneMysql(db)
    return db

def connectMysql():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='jd_qipei',
        charset='utf8',
        autocommit=True,  # 若是插入数据,, 是否自动提交? 和conn.commit()功能一致。
    )
    cur = conn.cursor()
    insert_sql = 'select * from `xxuan_car_jd_tjj_product` where skuid ="NULL"'
    cur.execute(insert_sql)
    ALL_NULL=cur.fetchall()
    for Null in ALL_NULL:
        brand=str(Null).split(',')[3].replace("'","").strip()
        href=str(Null).split(',')[5].replace("'","").strip()
        price=str(Null).split(',')[4].replace("'","").strip()

        db=getProduct(https_li_href=href,brand_name=brand,product_Price=price)
        print(db)
        cur.execute(db)
        conn.commit()



if __name__ == '__main__':
 connectMysql()

原厂件

import requests
from fake_useragent import UserAgent
import pymysql
from bs4 import BeautifulSoup
def get_proxy():
    return  requests.get('http://xxxxxxxxxxx/get/').json()['proxy']
def getHTML(url):
    proxy = get_proxy()  # 获取代理ip
    ua = UserAgent()  # 实例化
# 请求头就能够写成
    cookie = '__jdu=577937999; areaId=15; ipLoc-djd=15-1213-3410-0; PCSYCityID=CN_330000_330100_330105; shshshfpa=bbe2e678-8333-005c-d01a-b070738f7860-1597809413; shshshfpb=pqSL0Bsl%2FLma%20U3QU6OB1xw%3D%3D; mt_xid=V2_52007VwcUVFVaVFIXQSldVWJWFwVVX05cGx0eQAAyVhRODQhWWQNJH1gEY1QWBwhcWwovShhfBHsCG05eWUNaG0IcVA5mACJQbVhiUh9IGV4MYgMbU1xfV14eQR1bAVcDFFZZ; user-key=68c44d85-8cac-4072-8369-c117f62d8eb3; cn=0; unpl=V2_ZzNtbURfFhZwXEEAKx4OVWJTElsSUUoUdQsRAHkbWgFmCkEKclRCFnQUR11nGl0UZwQZWEVcQxxFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH8fWg1lBRpVSmdzEkU4dlN7EFQGZDMTbUNnAUEpCk5Weh5YSGMFFFVAUUsdfThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_79d24e6ea6ca4a17a78012fe337508bf|1597913171572; __jda=122270672.577937999.1597809411.1597906781.1597909689.7; __jdc=122270672; 3AB9D23F7A4B3C9B=3SJYGJUIMVOSXHMAT54Z7M54MSN7POALYPRYHXXL4OTIUAYWVYTBG6AFPA4L4Q5ED37GELWUAZFAMTA6KV6JQSFCHA; shshshfp=b65beee4eac3565989e568b588a5f619; shshshsID=1d65ff1a1d5a2ecc10479b3f2d1ce72b_39_1597913210747; __jdb=122270672.48.577937999|7.1597909689'
    headers = {"User-Agent": ua.random,
               'Cookie': cookie}
    trytimes = 100  # 重试的次数

    for i in range(trytimes):
        try:
            response = requests.get(url, headers=headers, proxies={"http": "https://{}".format(proxy)}, timeout=1)
            # response = requests.get(url, headers=headers,timeout=3)
            # 注意此处也多是302等状态码
            if response.status_code == 200:
                break
        except:
            # logdebug(f'requests failed {i}time')
            print(f'requests failed {i} time','要获取的URL:',url)
    return response.text


def getProduct(https_li_href,brand_name,product_sku,product_Price):
    db = "UPDATE `xxuan_car_jd_ycj_product` SET "
    sql = {'skuid': '',
           'name': '',
           'brand': '',
           'freezing': '',
           'url': '',
           'originplace': '',
           'netweight': '',
           'price': '',
           'commodity_Name': '',
           'image': '',
           'category': '',
           'package':'',
           'boiling':'',
           'sales':'',
           'installation':'',
           'transmission':''
           }
    sql['url']=https_li_href
    sql['brand']=brand_name
    sql['skuid']=product_sku
    sql['price']=product_Price
    product_HTML = getHTML(https_li_href)
    produc_soup = BeautifulSoup(product_HTML, 'html.parser')
    # 商品标题名称
    sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
    if sku_name_wrap != None:
        sku_name = sku_name_wrap.find('div', attrs={'class': 'sku-name'})
        if sku_name != None:
            sku_name = sku_name.text
            sku_name = str(sku_name).strip()
            sql['commodity_Name'] = sku_name
    # 商品图片
    spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
    if spec_img == None:
        spec_img = 'NULL'
    else:
        spec_img = spec_img['data-origin']
    # print("https:",spec_img)
    imageURL = f"https:{spec_img}"
    if imageURL.__contains__('NULL'):
        sql['image'] = f"NULL"
    else:
        sql['image'] = imageURL
    # 商品规格信息
    parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
    if parameter_list != None:
        li_all_parameter = parameter_list.findAll('li')
        for li in li_all_parameter:
            # print(li)
            if str(li.text).__contains__('商品名称:'):
                if li.text == None:
                    sql['name'] = 'NULL',
                else:
                    sql['name'] = str(li.text).replace('商品名称:', '')
            elif str(li.text).__contains__('商品编号:'):
                if li.text == None:
                    # sql['skuid'] = 'NULL',
                    pass
                else:
                    # sql['skuid'] =str(li.text).replace('商品编号:', '')
                    pass
            elif str(li.text).__contains__('商品毛重:'):
                if li.text == None:
                    sql['netweight'] = 'NULL',
                else:
                    sql['netweight'] = str(li.text).replace('商品毛重:', '')
            elif str(li.text).__contains__('商品产地:'):
                if li.text == None:
                    sql['originplace'] = 'NULL',
                else:
                    sql['originplace'] = str(li.text).replace('商品产地:', '')
            elif str(li.text).__contains__('产品类别:'):
                if li.text == None:
                    sql['category'] = 'NULL',
                else:
                    sql['category'] = str(li.text).replace('产品类别:', '')
            elif str(li.text).__contains__('冰点:'):
                if li.text == None:
                    sql['freezing'] = 'NULL',
                else:
                    sql['freezing'] = str(li.text).replace('冰点:', '')
            elif str(li.text).__contains__('包装规格:'):
                if li.text == None:
                    sql['package'] = 'NULL',
                else:
                    sql['package'] = str(li.text).replace('包装规格:', '')
            elif str(li.text).__contains__('干湿沸点:'):
                if li.text == None:
                    sql['boiling'] = 'NULL',
                else:
                    sql['boiling'] = str(li.text).replace('干湿沸点:', '')
            elif str(li.text).__contains__('销售规格:'):
                if li.text == None:
                    sql['sales'] = 'NULL',
                else:
                    sql['sales'] = str(li.text).replace('销售规格:', '')
            elif str(li.text).__contains__('安装位置:'):
                if li.text == None:
                    sql['installation'] = 'NULL',
                else:
                    sql['installation'] = str(li.text).replace('安装位置:', '')
            elif str(li.text).__contains__('变速箱类型:'):
                if li.text == None:
                    sql['transmission'] = 'NULL',
                else:
                    sql['transmission'] = str(li.text).replace('变速箱类型:', '')
    print(sql)
    for i in sql:
        if len(str(sql[i])) == 0:
            sql[i] = 'NULL'
        if i != "transmission":

            db += f"{i}='{sql[i]}',"
        else:
            db += f"{i}='{sql[i]}' WHERE skuid='{product_sku}' AND brand='{brand_name}' AND price='{product_Price}' AND url='{https_li_href}';"
    # print(db)
    ''' 首先生成插入语句,等写入直接source加载 '''
    # with open('E:\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
    # w.write(db + '\r')
    # print(db)
    ''' 直接插入 '''
    # print(db)
    # conneMysql(db)
    return db


def connectMysql():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='jd_qipei',
        charset='utf8',
        autocommit=True,  # 若是插入数据,, 是否自动提交? 和conn.commit()功能一致。
    )
    cur = conn.cursor()
    insert_sql = 'select * from `xxuan_car_jd_ycj_product` where name ="NULL"'
    cur.execute(insert_sql)
    ALL_NULL=cur.fetchall()
    for Null in ALL_NULL:
        sku=str(Null).split(',')[1].replace("'","").strip()
        brand=str(Null).split(',')[3].replace("'","").strip()
        href=str(Null).split(',')[5].replace("'","").strip()
        price=str(Null).split(',')[8].replace("'","").strip()
        db=getProduct(https_li_href=href,brand_name=brand,product_sku=sku,product_Price=price)
        cur.execute(db)
        conn.commit()


if __name__ == '__main__':
 connectMysql()

总结

但愿能帮助到你们谢谢sql