#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver.common.keys import Keys import time import sys import pymysql from selenium.common.exceptions import NoSuchElementException reload(sys) sys.setdefaultencoding('utf-8') driver = webdriver.Firefox(executable_path="/Users/chanming/Desktop/geckodriver") # map={'拓展地域:':'dis','联 系 人 :':'userName','电 话:':'phone','邮 箱:':'email','地 址:':'address','所属行业:':'industry'} map={} flag=True keyWords=[u'西安',u"肇庆",u"汕尾",u"茂名",u"深圳",u"阳江",u"潮州",u"韶关",u"梅州",u"河源",u"清远",u"东莞",u"云浮",u"揭阳",u"广州",u"中山",u"东沙群岛",u"濮阳",u"洛阳",u"三门峡",u"郑州",u"许昌",u"南阳",u"漯河",u"济源",u"平顶山",u"信阳",u"焦做",u"安阳",u"驻马店",u"商丘",u"新乡",u"鹤壁",u"周口",u"开封",u"乌海",u"巴彦淖尔",u"呼伦贝尔",u"鄂尔多斯",u"包头",u"呼和浩特",u"通辽",u"赤峰",u"阿拉善盟",u"兴安盟",u"锡林郭勒盟",u"乌兰察布",u"大兴安岭地区",u"七台河",u"大庆",u"鹤岗",u"伊春",u"绥化",u"佳木斯",u"齐齐哈尔",u"鸡西",u"双鸭山",u"哈尔滨",u"黑河",u"牡丹江",u"北屯",u"双河",u"铁门关",u"博尔塔拉蒙古自治州",u"和田地区",u"塔城地区",u"昆玉",u"可克达拉",u"石河子",u"阿勒泰地区",u"克拉玛依",u"昌吉回族自治州",u"五家渠",u"巴音郭楞蒙古自治州",u"阿拉尔",u"图木舒克",u"喀什地区",u"伊犁哈萨克自治州",u"乌鲁木齐",u"阿克苏地区",u"克孜勒苏柯尔克孜自治州",u"哈密",u"吐鲁番",u"十堰",u"宜昌",u"孝感",u"武汉",u"恩施土家族苗族自治州",u"天门",u"黄冈",u"潜江",u"襄阳",u"荆门",u"仙桃",u"神农架林区",u"随州",u"荆州",u"咸宁",u"黄石",u"鄂州",u"大连",u"葫芦岛",u"锦州",u"丹东",u"抚顺",u"沈阳",u"鞍山",u"铁岭",u"辽阳",u"盘锦",u"营口",u"朝阳",u"阜新",u"本溪",u"威海",u"烟台",u"滨州",u"临沂",u"莱芜",u"淄博",u"青岛",u"聊城",u"德州",u"日照",u"菏泽",u"潍坊",u"济南",u"泰安",u"东营",u"济宁",u"枣庄",u"商洛",u"西安",u"汉中",u"安康",u"榆林",u"铜川",u"咸阳",u"宝鸡",u"延安",u"渭南",u"铜仁",u"六盘水",u"遵义",u"安顺",u"毕节",u"贵阳",u"重启",u"昌都",u"拉萨",u"那曲",u"日喀则",u"山南",u"林芝",u"阜阳",u"淮北",u"铜陵",u"蚌埠",u"马鞍山",u"池州",u"亳州",u"滁州",u"安庆",u"黄山",u"宣城",u"芜湖",u"六安",u"淮南",u"合肥",u"宿州",u"宁德",u"福州",u"龙岩",u"莆田",u"泉州",u"三明",u"厦门",u"漳州",u"南平",u"岳阳",u"衡阳",u"娄底",u"湘潭",u"益阳",u"长沙",u"常德",u"怀化",u"邵阳",u"张家界",u"株洲",u"永州",u"郴州",u"琼海",u"东方",u"文昌",u"五指山",u"三沙",u"三亚",u"万宁",u"儋州",u"海口",u"宿迁",u"连云港",u"扬州",u"南京",u"南通",u"无锡",u"镇江",u"淮安",u"泰州",u"徐州",u"苏州",u"常州",u"盐城",u"海东",u"西宁",u"钦州",u"桂林",u"百色",u"北海",u"贵港",u"河池",u"柳州",u"南宁",u"来宾",u"崇左",u"贺州",u"防城港",u"梧州",u"玉林",u"固原",u"中卫",u"银川",u"石嘴山",u"吴忠",u"景德镇",u"九江",u"抚州",u"上饶",u"新余",u"赣州",u"鹰潭",u"南昌",u"吉安",u"萍乡",u"宜春",u"宁波",u"台州",u"嘉兴",u"舟山",u"温州",u"衢州",u"金华",u"丽水",u"杭州",u"绍兴",u"湖州",u"邢台",u"邯郸",u"唐山",u"承德",u"张家口",u"廊坊",u"沧州",u"衡水",u"秦皇岛",u"保定",u"石家庄",u"深水埗区",u"西贡区",u"元朗区",u"油尖旺区",u"湾仔区",u"屯门区",u"黄大仙区",u"东区",u"中西区",u"大埔区",u"九龙城区",u"沙田区",u"南区",u"观塘区",u"北区",u"离岛区",u"荃湾区",u"葵青区",u"阳泉",u"太原",u"临汾",u"大同",u"晋城",u"忻州",u"长治",u"运城",u"朔州",u"晋中",u"吕梁",u"风顺堂区",u"花地玛堂区",u"圣方济各堂区",u"花王堂区",u"路凼填海区",u"大堂区",u"嘉模堂区",u"望德堂区",u"兰州",u"金昌",u"嘉峪关",u"酒泉",u"平凉",u"白银",u"张掖",u"甘南藏族自治州",u"临夏回族自治州",u"陇南",u"天水",u"武威",u"定西",u"庆阳",u"广元",u"南充",u"达州",u"眉山",u"德阳",u"遂宁",u"巴中",u"广安",u"资阳",u"成都",u"绵阳",u"内江",u"宜宾",u"自贡",u"雅安",u"攀枝花",u"乐山",u"甘孜藏族自治州",u"泸州",u"阿坝藏族羌族自治州",u"凉山彝族自治州",u"昭通",u"曲靖",u"红河哈尼族彝族自治州",u"丽江",u"西双版纳傣族自治州",u"保山",u"文山壮族苗族自治州",u"大理白族自治州",u"怒江傈僳族自治州",u"迪庆藏族自治州",u"玉溪",u"普洱",u"昆明",u"楚雄彝族自治州",u"德宏傣族景颇族自治州",u"临沧",u"长春",u"辽源",u"吉林",u"白城",u"松原",u"四平",u"延边朝鲜族自治州",u"白山",u"通化"] db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8") cursor = db.cursor() sql = 'insert into wx_pay_company(dis,userName,phone,email,address,industry,title,searchword) values(%s,%s,%s,%s,%s,%s,%s,%s)' for keyword in keyWords: print keyword driver.get("https://pay.weixin.qq.com/index.php/partner/public/search") time.sleep(3) alist=list() st=driver.find_element_by_id('searchPortalText') st.clear() st.send_keys(keyword) driver.find_element_by_id("searchPortalSubmit").click() time.sleep(1) try: hideMsg=driver.find_element_by_css_selector('.page-msg.mini.page-empty.hide') except NoSuchElementException: print keyword+'没有查询结果' continue pageClass=driver.find_element_by_id("service_provider_query_page").get_attribute('class') pindex=pageClass.find('hide') if pindex==-1: print '结果多页' totalpage=driver.find_element_by_name("totalpage") for x in xrange(1,int(totalpage.text)+1): print x nextInput=driver.find_element_by_css_selector('.goto-area input') nextInput.clear() nextInput.send_keys(x) nextButton=driver.find_element_by_css_selector('.goto-area a') nextButton.click() time.sleep(3) searchList=driver.find_elements_by_css_selector('#searchResultList dl') for comp in searchList: obj={} try: dt=comp.find_element_by_tag_name('dt') except Exception as e: continue obj['title']=dt.text propNameList=comp.find_elements_by_class_name('lbl') if flag: map[propNameList[0].text]='dis' map[propNameList[1].text]='userName' map[propNameList[2].text]='phone' map[propNameList[3].text]='email' map[propNameList[4].text]='address' map[propNameList[5].text]='industry' flag=False propList=comp.find_elements_by_class_name('ele') k=0 for propName in propNameList: obj[map[propName.text]]=propList[k].text k=k+1 alist.append(obj) else: print '结果单页' searchList=driver.find_elements_by_css_selector('#searchResultList dl') for comp in searchList: obj={} try: dt=comp.find_element_by_tag_name('dt') except Exception as e: continue obj['title']=dt.text propNameList=comp.find_elements_by_class_name('lbl') if flag: map[propNameList[0].text]='dis' map[propNameList[1].text]='userName' map[propNameList[2].text]='phone' map[propNameList[3].text]='email' map[propNameList[4].text]='address' map[propNameList[5].text]='industry' flag=False propList=comp.find_elements_by_class_name('ele') k=0 for propName in propNameList: obj[map[propName.text]]=propList[k].text k=k+1 alist.append(obj) for company in alist: print '插入数据库' if not company.has_key('dis'): company['dis']='' if not company.has_key('userName'): company['userName']='' if not company.has_key('phone'): company['phone']='' if not company.has_key('email'): company['email']='' if not company.has_key('address'): company['address']='' if not company.has_key('industry'): company['industry']='' cursor.execute(sql,(company['dis'],company['userName'],company['phone'],company['email'],company['address'],company['industry'],company['title'],keyword)) db.commit() cursor.close() db.close() driver.close() print '运行结束'