利用python语言下的pandas库存储爬取数据于mysql数据库,相比其余语法代码更为便捷;html
代码一:(爬取博客园储存于mysql数据库)
from lxml import etree
import requests
import pandas as pdpython
import pymysql
conn = pymysql.connect(host='localhost', user='root', password='admin',
db='spider', charset='utf8')mysql
cursor = conn.cursor()sql
recommed_url = 'https://www.cnblogs.com/aggsite/UserStats'
res = requests.get(url=recommed_url).content.decode('utf-8')数据库
ele = etree.HTML(res)
elements = ele.xpath("//[@id='blogger_list']//a/@href")
url_list = ['http:' + ele for ele in elements][:-2]
for url in url_list:
while True:
print(url)
res2 = requests.get(url).content.decode('utf-8')
ele2 = etree.HTML(res2)
word_urls = ele2.xpath('//[@id="mainContent"]/div/div/div[2]/a/@href')
for wordUrl in word_urls:
res3 = requests.get(wordUrl).content.decode('utf-8')
ele3 = etree.HTML(res3)
title = ele3.xpath('//[@id="cb_post_title_url"]/text()')[0]
body = etree.tostring(ele3.xpath('//[@id="cnblogs_post_body"]')[0], encoding='utf-8').decode('utf-8')
body = body[:10]
#mysql 插入语句(将title和boby插入cnblogs表中)
sql = 'insert into cnblogs value (%s,%s)'
parm = (title, body)
#execute(sql,args)args通常是list或tuple格式,若是只有一个参数,可直接传入 execute方法中sql语句占位符是%s
cursor.execute(sql, parm)
#提交数据 conn.commit()
conn.commit()ide
next_page = ele2.xpath("//*[@id='pager']/a|//*[@id='nav_next_page']/a/@href") if next_page: url = next_page[0] else: break break
代码二:爬取菜鸟教程python100例子储存于mysql数据库
from lxml import etree
import requests#导入请求库post
import pymysql
conn = pymysql.connect(host='localhost', user='root', password='admin',
db='spider', charset='utf8')url
cursor = conn.cursor().net
recommed_url='https://www.runoob.com/python3/python3-examples.html'code
res=requests.get(url=recommed_url).content.decode('utf-8')
ele=etree.HTML(res)
elements=ele.xpath('//*[@id="content"]/ul/li/a/@href')
url_list=['https://www.runoob.com/python3/'+ele for ele in elements]
url = url_list
for url in url_list:
print(url)
res2 = requests.get(url).content.decode('utf-8')
ele2=etree.HTML(res2)
title = ele2.xpath('//[@id="content"]/h1/text()')[0]
body = ele2.xpath('//[@id="content"]/p[2]/text()')[0]
# mysql 插入语句(将title和boby插入cnblogs表中) sql = 'insert into cainiao value (%s,%s)' parm = (title, body) # execute(sql,args)args通常是list或tuple格式,若是只有一个参数,可直接传入 execute方法中sql语句占位符是%s cursor.execute(sql, parm) # 提交数据 conn.commit() conn.commit()
————————————————
版权声明:本文为CSDN博主「IT~子民」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处连接及本声明。
原文连接:https://blog.csdn.net/qq_42633222/article/details/102632754代码一:(爬取博客园储存于mysql数据库)
from lxml import etree
import requests
import pandas as pd
import pymysql
conn = pymysql.connect(host='localhost', user='root', password='admin',
db='spider', charset='utf8')
cursor = conn.cursor()
recommed_url = 'https://www.cnblogs.com/aggsite/UserStats'
res = requests.get(url=recommed_url).content.decode('utf-8')
ele = etree.HTML(res)
elements = ele.xpath("//[@id='blogger_list']//a/@href")
url_list = ['http:' + ele for ele in elements][:-2]
for url in url_list:
while True:
print(url)
res2 = requests.get(url).content.decode('utf-8')
ele2 = etree.HTML(res2)
word_urls = ele2.xpath('//[@id="mainContent"]/div/div/div[2]/a/@href')
for wordUrl in word_urls:
res3 = requests.get(wordUrl).content.decode('utf-8')
ele3 = etree.HTML(res3)
title = ele3.xpath('//[@id="cb_post_title_url"]/text()')[0]
body = etree.tostring(ele3.xpath('//[@id="cnblogs_post_body"]')[0], encoding='utf-8').decode('utf-8')
body = body[:10]
#mysql 插入语句(将title和boby插入cnblogs表中)
sql = 'insert into cnblogs value (%s,%s)'
parm = (title, body)
#execute(sql,args)args通常是list或tuple格式,若是只有一个参数,可直接传入 execute方法中sql语句占位符是%s
cursor.execute(sql, parm)
#提交数据 conn.commit()
conn.commit()
next_page = ele2.xpath("//*[@id='pager']/a|//*[@id='nav_next_page']/a/@href") if next_page: url = next_page[0] else: break break
代码二:爬取菜鸟教程python100例子储存于mysql数据库
from lxml import etree
import requests#导入请求库
import pymysql
conn = pymysql.connect(host='localhost', user='root', password='admin',
db='spider', charset='utf8')
cursor = conn.cursor()
recommed_url='https://www.runoob.com/python3/python3-examples.html'
res=requests.get(url=recommed_url).content.decode('utf-8')
ele=etree.HTML(res)
elements=ele.xpath('//*[@id="content"]/ul/li/a/@href')
url_list=['https://www.runoob.com/python3/'+ele for ele in elements]
url = url_list
for url in url_list:
print(url)
res2 = requests.get(url).content.decode('utf-8')
ele2=etree.HTML(res2)
title = ele2.xpath('//[@id="content"]/h1/text()')[0]
body = ele2.xpath('//[@id="content"]/p[2]/text()')[0]
# mysql 插入语句(将title和boby插入cnblogs表中) sql = 'insert into cainiao value (%s,%s)' parm = (title, body) # execute(sql,args)args通常是list或tuple格式,若是只有一个参数,可直接传入 execute方法中sql语句占位符是%s cursor.execute(sql, parm) # 提交数据 conn.commit() conn.commit()