很久没有写博客了,上一篇博客模拟登陆smartqq浏览量达到了2000+,也许你们对这些很猎奇吧,鉴于不少缘由,以前smartqq将百度云连接给关闭了,至于缘由,是由于核心代码已经给出了,拒绝伸手党,也是促进爬虫事业进步的一大原则性问题。好了,废话很少说,上代码! html
package com.login;java
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;web
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* 来自一个有理想的爬虫工程师
* 爬虫工程师群:142351055
* qq:1069478446
* @author lotenr
*
*/
public class Login {
private String iamgeImg="D:\\jj";
String redirect_uri="";
CloseableHttpClient https = HttpClients.createDefault();
public static void main(String[] args)
{
System.setProperty ("jsse.enableSNIExtension", "false");
Login l=new Login();
l.initpage();
String appid=l.getPng1();
if(!"".equals(appid))
{
l.getPng2(appid);
}
for(int i=0;;i++)
{
int cf=l.checklogin(appid);
if(cf==3)
{
System.out.println("已在手机端确认");
break;
}
if(cf==2)
{
appid=l.getPng1();
if(!"".equals(appid))
{
l.getPng2(appid);
}
}
if(cf==1)
{
continue;
}
try {
Thread.sleep(13000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
l.login();apache
}
/**
* 初始化页面
*/
public void initpage()
{
HttpGet httpPost=new HttpGet("https://wx.qq.com/");app
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
//System.out.println(html);
} catch (ClientProtocolException e) {分布式
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 下载二维码之获取参数
*/
public String getPng1()
{
String url="https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_="+System.currentTimeMillis();
HttpGet httpPost=new HttpGet(url);ui
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
System.out.println(html);
if(html.indexOf("window.QRLogin.code = 200")!=-1)
{
return html.replace("window.QRLogin.code = 200; window.QRLogin.uuid = \"", "").replace("\";", "");
}
} catch (ClientProtocolException e) {this
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return "";
}
/**
* 下载二维码
* @param appid
* @return
*/
public void getPng2(String appid)
{
String url="https://login.weixin.qq.com/qrcode/"+appid;
HttpGet httpget = new HttpGet(url);
System.out.println("获取二维码:Executing request " + httpget.getURI());//开始
String html="";
FileOutputStream fos;
try {
HttpResponse response = https.execute(httpget);
System.out.println(response.getStatusLine());
InputStream inputStream = response.getEntity().getContent();
File file = new File(this.iamgeImg);
if (!file.exists()) {
file.mkdirs();
}
fos = new FileOutputStream("D:\\jj\\test.jpg");
byte[] data = new byte[1024];
int len = 0;
while ((len = inputStream.read(data)) != -1) {
fos.write(data, 0, len);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(html);
}
public int checklogin(String appid)
{
String url="https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid="+appid+"&tip=0&r=123&_="+System.currentTimeMillis();
System.out.println(url);
HttpGet httpPost=new HttpGet(url);
httpPost.setHeader("Host", "login.wx.qq.com");
httpPost.setHeader("Pragma", "no-cache");
httpPost.setHeader("Referer", "https://wx.qq.com/");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36");
httpPost.setHeader("Connection", "keep-alive");
int timeout = 200000;
// System.out.println("Executing request " +
// httpget.getRequestLine());
RequestConfig config = RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout)
.setConnectionRequestTimeout(timeout).build();
httpPost.setConfig(config);
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
System.out.println(html);
if(html.indexOf("408")!=-1)
{
return 1;
}
if(html.indexOf("400")!=-1)
{url
return 2;
}
if(html.indexOf("200")!=-1)
{
int start=html.indexOf("https");
html=html.substring(start).replace("\";", "");
this.redirect_uri=html;
System.out.println(this.redirect_uri);
return 3;
}
} catch (ClientProtocolException e) {.net
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
public void login()
{
HttpGet httpPost=new HttpGet(this.redirect_uri);
httpPost.setHeader("Host", "wx.qq.com");
httpPost.setHeader("Pragma", "no-cache");
httpPost.setHeader("Referer", "https://wx.qq.com/?&lang=zh_CN");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36");
httpPost.setHeader("Connection", "keep-alive");
String html="";
try {
HttpResponse response = https.execute(httpPost);
HttpEntity entitySort = response.getEntity();
html=EntityUtils.toString(entitySort, "utf-8");
System.out.println(html);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} } 好,到这里,登陆功能已经实现了,是否是很简单呢?其实爬虫的难度并非在爬上,而是在爬虫系统的设计上,如何让爬虫高效,健壮,可分布式,使用起来跟家方便才是难点