因为以前写过一版12306爬虫,但以前的那个有点不灵活,此次作个完善,不详细讲,直接代码搞起(看了有不明白的能够qq:727733027 或者留言)html
from bs4 import BeautifulSoup import requests import json import time sta_url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002' sta_wb_data = requests.get(sta_url) sta_html = sta_wb_data.content sta_soup = BeautifulSoup(sta_html, 'lxml') sta_data = sta_soup.findAll('p') for letter in sta_data: v = letter.text st = v.split("'")[1] # print ('st:',st) City = {} City2 = {} def setCity(city): # print('city:',city) vv = city.split('@') for i in range(len(vv)): if vv[i] != '': City[vv[i].split('|')[1]] = vv[i].split('|')[2] City2[vv[i].split('|')[2]] = vv[i].split('|')[1] # print('City:',City) # print('City2:',City2) def getStation(Station): try: Station = City[Station] except Exception as e: print('City Error') return None return Station def getStation2(Station2): try: Station2 = City2[Station2] except Exception as e: return None return Station2 # 处理访问请求的url def setStation(from_station,to_station,queryDate,purpose_codes): url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=%s&leftTicketDTO.from_station=%s&leftTicketDTO.to_station=%s&purpose_codes=%s'%(queryDate,from_station,to_station,purpose_codes) return url # 处理页面上返回的数据,把json数据截取出来 def getList(url): wb_data = requests.get(url) html = wb_data.content soup = BeautifulSoup(html, 'lxml') data = soup.findAll('p') for letter in data: t = letter.text return t def sendToPhone(text): pass if __name__ == '__main__': # 出发地 from_station_f = '上海' # 目的地 to_station_f = '无锡' # 出发日期 queryDate = '2018-01-03' # 票的类型 purpose_codes = 'ADULT' yw_Count = 0 yz_Count = 0 # 得到城市字典 setCity(st) # 查询城市代码 from_station = getStation(from_station_f) to_station = getStation(to_station_f) url = setStation(from_station, to_station, queryDate, purpose_codes) # 进行get请求 获取数据 text = '' # 当 车票信息不存在 datas 下的数据为空 try: aa = getList(url) text = json.loads(aa) p = text['data'] Data = p['result'] # print(Data) bHaveTicket = True except Exception as e: print('没有查询到车辆信息') bHaveTicket = False # 解析车辆信息 if bHaveTicket: count = len(Data) info = '' for index in range(0, len(Data)): sResult = Data[index] # 车次 station = Data[index].split('|')[3] # 出发时间 departTime = Data[index].split('|')[8] # 到达时间 arriverTime = Data[index].split('|')[9] # 用时 userTime = Data[index].split('|')[10] # 软卧 way_23 = Data[index].split('|')[23] if(way_23 == ''): way_23 = '无' # 无座 way_26 = Data[index].split('|')[26] if(way_26 == ''): way_26 = '无' # 硬卧 way_28 = Data[index].split('|')[28] if(way_28 == ''): way_28 = '无' # 硬座 way_29 = Data[index].split('|')[29] if(way_29 == ''): way_29 = '无' # 一等座 way_31 = Data[index].split('|')[31] if(way_31 == ''): way_31 = '无' # 商务座|特等座 way_32 = Data[index].split('|')[32] if(way_32 == ''): way_32 = '无' # 二等座 way_30 = Data[index].split('|')[30] if(way_30 == ''): way_30 = '无' # 出发站 departStation = Data[index].split('|')[6] # print('departStation:',departStation) departStation_C = getStation2(departStation) # 到达站 arriverStation = Data[index].split('|')[7] # print('arriverStation:',arriverStation) arriverStation_C = getStation2(arriverStation) textmp = '车次: %s,出发站:%s,到达站:%s,出发时间:%s,到达时间:%s,历时:%s,商务座|特等座:%s,一等座:%s,二等座:%s,软卧:%s,无座:%s,硬卧:%s,硬座:%s \n'% (station,departStation_C,arriverStation_C,departTime,arriverTime,userTime,way_32,way_31,way_30,way_23,way_26,way_28,way_29) info = info + textmp header = '出发地: %s,目的地: %s,出发时间: %s,共计 %s 个车次 \n' %(from_station_f,to_station_f,queryDate,count) print('header:',header) print('info:') print(info) print('检测时间: %s' % time.strftime('%Y-%m-%d %H:%M:%S'))谢谢!!