首先,附上代码html
import json import requests import bs4 def get_html(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.104 Safari/537.36', 'ContentType': 'text/html; charset=utf-8', 'Accept-Language': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Connection': 'keep-alive', } try: htmlcontent = requests.get(url, headers=headers, timeout=30) htmlcontent.raise_for_status() htmlcontent.encoding = 'utf-8' return htmlcontent.text except: return "请求失败!" def get_content(url): weather_list = [] html = get_html(url) soup = bs4.BeautifulSoup(html, 'lxml') content_ul = soup.find('div', class_='t').find('ul', class_='clearfix').find_all('li') for content in content_ul: try: weather = {} weather['day'] = content.find('h1').text weather['temperature'] = content.find('p', class_='tem').span.text + content.find('p', class_='tem').em.text weather_list.append(weather) except: print('查询不到') print(weather_list) if __name__ == '__main__': url = 'http://www.weather.com.cn/weather1d/101190401.shtml' get_content(url)在运行的过程当中出现以下问题:
主要是没有安装lxml包,主须要在电脑终端输入pip install lxml让电脑自行安装便可,运行结果以下所示:python
[{'day': '16日夜间', 'temperature': '5°C'}, {'day': '17日白天', 'temperature': '12°C'}]