初学者python爬虫(12306)

因为以前写过一版12306爬虫,但以前的那个有点不灵活,此次作个完善,不详细讲,直接代码搞起(看了有不明白的能够qq:727733027 或者留言)html

from bs4 import BeautifulSoup
import requests
import json
import time
sta_url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002'
sta_wb_data = requests.get(sta_url)
sta_html = sta_wb_data.content
sta_soup = BeautifulSoup(sta_html, 'lxml')
sta_data = sta_soup.findAll('p')
for letter in sta_data:
    v = letter.text
st = v.split("'")[1]
# print ('st:',st)
City = {}
City2 = {}
def setCity(city):
    # print('city:',city)
    vv = city.split('@')
    for i in range(len(vv)):
        if vv[i] != '':
            City[vv[i].split('|')[1]] = vv[i].split('|')[2]
            City2[vv[i].split('|')[2]] = vv[i].split('|')[1]
    # print('City:',City)
    # print('City2:',City2)

def getStation(Station):
    try:
        Station = City[Station]
    except Exception as e:
        print('City Error')
        return None
    return Station

def getStation2(Station2):
    try:
        Station2 = City2[Station2]
    except Exception as e:
        return None
    return Station2

# 处理访问请求的url
def setStation(from_station,to_station,queryDate,purpose_codes):
    url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=%s&leftTicketDTO.from_station=%s&leftTicketDTO.to_station=%s&purpose_codes=%s'%(queryDate,from_station,to_station,purpose_codes)
    return url
# 处理页面上返回的数据,把json数据截取出来
def getList(url):
    wb_data = requests.get(url)
    html = wb_data.content
    soup = BeautifulSoup(html, 'lxml')
    data = soup.findAll('p')
    for letter in data:
        t = letter.text
    return t

def sendToPhone(text):
    pass

if __name__ == '__main__':
    # 出发地
    from_station_f = '上海'
    # 目的地
    to_station_f = '无锡'
    # 出发日期
    queryDate = '2018-01-03'
    # 票的类型
    purpose_codes = 'ADULT'
    yw_Count = 0
    yz_Count = 0

    # 得到城市字典
    setCity(st)

    # 查询城市代码
    from_station = getStation(from_station_f)
    to_station = getStation(to_station_f)

    url = setStation(from_station, to_station, queryDate, purpose_codes)
    # 进行get请求 获取数据
    text = ''
    # 当 车票信息不存在 datas 下的数据为空
    try:
        aa = getList(url)
        text = json.loads(aa)
        p = text['data']
        Data = p['result']
        # print(Data)
        bHaveTicket = True
    except Exception as e:
        print('没有查询到车辆信息')
        bHaveTicket = False

    # 解析车辆信息
    if bHaveTicket:
        count = len(Data)
        info = ''
        for index in range(0, len(Data)):
            sResult = Data[index]
            # 车次
            station = Data[index].split('|')[3]
            # 出发时间
            departTime = Data[index].split('|')[8]
            # 到达时间
            arriverTime = Data[index].split('|')[9]
            # 用时
            userTime = Data[index].split('|')[10]
            # 软卧
            way_23 = Data[index].split('|')[23]
            if(way_23 == ''):
                way_23 = '无'
            # 无座
            way_26 = Data[index].split('|')[26]
            if(way_26 == ''):
                way_26 = '无'
            # 硬卧
            way_28 = Data[index].split('|')[28]
            if(way_28 == ''):
                way_28 = '无'
            # 硬座
            way_29 = Data[index].split('|')[29]
            if(way_29 == ''):
                way_29 = '无'
            # 一等座
            way_31 = Data[index].split('|')[31]
            if(way_31 == ''):
                way_31 = '无'
            # 商务座|特等座
            way_32 = Data[index].split('|')[32]
            if(way_32 == ''):
                way_32 = '无'
            # 二等座
            way_30 = Data[index].split('|')[30]
            if(way_30 == ''):
                way_30 = '无'
            # 出发站
            departStation = Data[index].split('|')[6]
            # print('departStation:',departStation)
            departStation_C = getStation2(departStation)
            # 到达站
            arriverStation = Data[index].split('|')[7]
            # print('arriverStation:',arriverStation)
            arriverStation_C = getStation2(arriverStation)
            textmp = '车次: %s,出发站:%s,到达站:%s,出发时间:%s,到达时间:%s,历时:%s,商务座|特等座:%s,一等座:%s,二等座:%s,软卧:%s,无座:%s,硬卧:%s,硬座:%s \n'% (station,departStation_C,arriverStation_C,departTime,arriverTime,userTime,way_32,way_31,way_30,way_23,way_26,way_28,way_29)

            info = info + textmp
        header = '出发地: %s,目的地: %s,出发时间: %s,共计 %s 个车次 \n' %(from_station_f,to_station_f,queryDate,count)
        print('header:',header)
        print('info:')
        print(info)
        print('检测时间: %s' % time.strftime('%Y-%m-%d %H:%M:%S'))
谢谢!!