拉取微信全部服务商

#!/usr/bin/python
# -*- coding: UTF-8 -*-

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import sys
import pymysql
from selenium.common.exceptions import NoSuchElementException

reload(sys)
sys.setdefaultencoding('utf-8')

driver = webdriver.Firefox(executable_path="/Users/chanming/Desktop/geckodriver")
# map={'拓展地域:':'dis','联 系 人 :':'userName','电  话:':'phone','邮  箱:':'email','地  址:':'address','所属行业:':'industry'}
map={}
flag=True
keyWords=[u'西安',u"肇庆",u"汕尾",u"茂名",u"深圳",u"阳江",u"潮州",u"韶关",u"梅州",u"河源",u"清远",u"东莞",u"云浮",u"揭阳",u"广州",u"中山",u"东沙群岛",u"濮阳",u"洛阳",u"三门峡",u"郑州",u"许昌",u"南阳",u"漯河",u"济源",u"平顶山",u"信阳",u"焦做",u"安阳",u"驻马店",u"商丘",u"新乡",u"鹤壁",u"周口",u"开封",u"乌海",u"巴彦淖尔",u"呼伦贝尔",u"鄂尔多斯",u"包头",u"呼和浩特",u"通辽",u"赤峰",u"阿拉善盟",u"兴安盟",u"锡林郭勒盟",u"乌兰察布",u"大兴安岭地区",u"七台河",u"大庆",u"鹤岗",u"伊春",u"绥化",u"佳木斯",u"齐齐哈尔",u"鸡西",u"双鸭山",u"哈尔滨",u"黑河",u"牡丹江",u"北屯",u"双河",u"铁门关",u"博尔塔拉蒙古自治州",u"和田地区",u"塔城地区",u"昆玉",u"可克达拉",u"石河子",u"阿勒泰地区",u"克拉玛依",u"昌吉回族自治州",u"五家渠",u"巴音郭楞蒙古自治州",u"阿拉尔",u"图木舒克",u"喀什地区",u"伊犁哈萨克自治州",u"乌鲁木齐",u"阿克苏地区",u"克孜勒苏柯尔克孜自治州",u"哈密",u"吐鲁番",u"十堰",u"宜昌",u"孝感",u"武汉",u"恩施土家族苗族自治州",u"天门",u"黄冈",u"潜江",u"襄阳",u"荆门",u"仙桃",u"神农架林区",u"随州",u"荆州",u"咸宁",u"黄石",u"鄂州",u"大连",u"葫芦岛",u"锦州",u"丹东",u"抚顺",u"沈阳",u"鞍山",u"铁岭",u"辽阳",u"盘锦",u"营口",u"朝阳",u"阜新",u"本溪",u"威海",u"烟台",u"滨州",u"临沂",u"莱芜",u"淄博",u"青岛",u"聊城",u"德州",u"日照",u"菏泽",u"潍坊",u"济南",u"泰安",u"东营",u"济宁",u"枣庄",u"商洛",u"西安",u"汉中",u"安康",u"榆林",u"铜川",u"咸阳",u"宝鸡",u"延安",u"渭南",u"铜仁",u"六盘水",u"遵义",u"安顺",u"毕节",u"贵阳",u"重启",u"昌都",u"拉萨",u"那曲",u"日喀则",u"山南",u"林芝",u"阜阳",u"淮北",u"铜陵",u"蚌埠",u"马鞍山",u"池州",u"亳州",u"滁州",u"安庆",u"黄山",u"宣城",u"芜湖",u"六安",u"淮南",u"合肥",u"宿州",u"宁德",u"福州",u"龙岩",u"莆田",u"泉州",u"三明",u"厦门",u"漳州",u"南平",u"岳阳",u"衡阳",u"娄底",u"湘潭",u"益阳",u"长沙",u"常德",u"怀化",u"邵阳",u"张家界",u"株洲",u"永州",u"郴州",u"琼海",u"东方",u"文昌",u"五指山",u"三沙",u"三亚",u"万宁",u"儋州",u"海口",u"宿迁",u"连云港",u"扬州",u"南京",u"南通",u"无锡",u"镇江",u"淮安",u"泰州",u"徐州",u"苏州",u"常州",u"盐城",u"海东",u"西宁",u"钦州",u"桂林",u"百色",u"北海",u"贵港",u"河池",u"柳州",u"南宁",u"来宾",u"崇左",u"贺州",u"防城港",u"梧州",u"玉林",u"固原",u"中卫",u"银川",u"石嘴山",u"吴忠",u"景德镇",u"九江",u"抚州",u"上饶",u"新余",u"赣州",u"鹰潭",u"南昌",u"吉安",u"萍乡",u"宜春",u"宁波",u"台州",u"嘉兴",u"舟山",u"温州",u"衢州",u"金华",u"丽水",u"杭州",u"绍兴",u"湖州",u"邢台",u"邯郸",u"唐山",u"承德",u"张家口",u"廊坊",u"沧州",u"衡水",u"秦皇岛",u"保定",u"石家庄",u"深水埗区",u"西贡区",u"元朗区",u"油尖旺区",u"湾仔区",u"屯门区",u"黄大仙区",u"东区",u"中西区",u"大埔区",u"九龙城区",u"沙田区",u"南区",u"观塘区",u"北区",u"离岛区",u"荃湾区",u"葵青区",u"阳泉",u"太原",u"临汾",u"大同",u"晋城",u"忻州",u"长治",u"运城",u"朔州",u"晋中",u"吕梁",u"风顺堂区",u"花地玛堂区",u"圣方济各堂区",u"花王堂区",u"路凼填海区",u"大堂区",u"嘉模堂区",u"望德堂区",u"兰州",u"金昌",u"嘉峪关",u"酒泉",u"平凉",u"白银",u"张掖",u"甘南藏族自治州",u"临夏回族自治州",u"陇南",u"天水",u"武威",u"定西",u"庆阳",u"广元",u"南充",u"达州",u"眉山",u"德阳",u"遂宁",u"巴中",u"广安",u"资阳",u"成都",u"绵阳",u"内江",u"宜宾",u"自贡",u"雅安",u"攀枝花",u"乐山",u"甘孜藏族自治州",u"泸州",u"阿坝藏族羌族自治州",u"凉山彝族自治州",u"昭通",u"曲靖",u"红河哈尼族彝族自治州",u"丽江",u"西双版纳傣族自治州",u"保山",u"文山壮族苗族自治州",u"大理白族自治州",u"怒江傈僳族自治州",u"迪庆藏族自治州",u"玉溪",u"普洱",u"昆明",u"楚雄彝族自治州",u"德宏傣族景颇族自治州",u"临沧",u"长春",u"辽源",u"吉林",u"白城",u"松原",u"四平",u"延边朝鲜族自治州",u"白山",u"通化"]
db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")
cursor = db.cursor()
sql = 'insert into wx_pay_company(dis,userName,phone,email,address,industry,title,searchword) values(%s,%s,%s,%s,%s,%s,%s,%s)'
for keyword in keyWords:
	print keyword
	driver.get("https://pay.weixin.qq.com/index.php/partner/public/search")
	time.sleep(3)
	alist=list()
	st=driver.find_element_by_id('searchPortalText')
	st.clear()
	st.send_keys(keyword)
	driver.find_element_by_id("searchPortalSubmit").click()
	time.sleep(1)
	try:
		hideMsg=driver.find_element_by_css_selector('.page-msg.mini.page-empty.hide')
	except NoSuchElementException:
		print keyword+'没有查询结果'
		continue

	pageClass=driver.find_element_by_id("service_provider_query_page").get_attribute('class')
	pindex=pageClass.find('hide')
	if pindex==-1:
		print '结果多页'
		totalpage=driver.find_element_by_name("totalpage")
		for x in xrange(1,int(totalpage.text)+1):
			print x
			nextInput=driver.find_element_by_css_selector('.goto-area input')
			nextInput.clear()
			nextInput.send_keys(x)
			nextButton=driver.find_element_by_css_selector('.goto-area a')
			nextButton.click()
			time.sleep(3)
			searchList=driver.find_elements_by_css_selector('#searchResultList dl')
			for comp in searchList:
				obj={}
				try:
					dt=comp.find_element_by_tag_name('dt')
				except Exception as e:
					continue
				
				obj['title']=dt.text
				propNameList=comp.find_elements_by_class_name('lbl')
				if flag:
					map[propNameList[0].text]='dis'
					map[propNameList[1].text]='userName'
					map[propNameList[2].text]='phone'
					map[propNameList[3].text]='email'
					map[propNameList[4].text]='address'
					map[propNameList[5].text]='industry'
					flag=False
				propList=comp.find_elements_by_class_name('ele')
				k=0
				for propName in propNameList:
					obj[map[propName.text]]=propList[k].text
					k=k+1
				alist.append(obj)				
	else:
		print '结果单页'
		searchList=driver.find_elements_by_css_selector('#searchResultList dl')
		for comp in searchList:
			obj={}
			try:
				dt=comp.find_element_by_tag_name('dt')
			except Exception as e:
				continue
			obj['title']=dt.text
			propNameList=comp.find_elements_by_class_name('lbl')
			if flag:
				map[propNameList[0].text]='dis'
				map[propNameList[1].text]='userName'
				map[propNameList[2].text]='phone'
				map[propNameList[3].text]='email'
				map[propNameList[4].text]='address'
				map[propNameList[5].text]='industry'
				flag=False
			propList=comp.find_elements_by_class_name('ele')
			k=0
			for propName in propNameList:
				obj[map[propName.text]]=propList[k].text
				k=k+1
			alist.append(obj)





	for company in alist:
		print '插入数据库'
		if not company.has_key('dis'):
			company['dis']=''
		if not company.has_key('userName'):
			company['userName']=''
		if not company.has_key('phone'):
			company['phone']=''
		if not company.has_key('email'):
			company['email']=''
		if not company.has_key('address'):
			company['address']=''
		if not company.has_key('industry'):
			company['industry']=''
		cursor.execute(sql,(company['dis'],company['userName'],company['phone'],company['email'],company['address'],company['industry'],company['title'],keyword))
		db.commit()


cursor.close()
db.close()
driver.close()

print '运行结束'
相关文章
相关标签/搜索