原理不难,就是http client和 htmlParse的东西。html
豆瓣为了防止恶意发贴,在回复或者发新贴的时候,有时候须要验证码,这个验证码还不太好识别(若是有高手,请联系我!),不过,我发现了一个程序上的漏洞,能够绕过去。java
先打开IE或者其余浏览器进行登陆。node
贴上部分代码,仅供学习参考:apache
import java.util.ArrayList;
import java.util.List;
import java.util.Random;浏览器
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;安全
public class DoubanCCSUtils {app
public static void main(String[] args) {
comment();
}dom
public static void comment() {学习
// String httpUrl = "http://www.douban.com/group/M-P/new_topic";
String httpUrl = "http://www.douban.com/group/"; //个人小组里的最新贴子,只取第一页
// HttpPost链接对象
HttpGet httpGet = new HttpGet(httpUrl);url
httpGet.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
// httpGet.addHeader("Accept-Encoding","gzip,deflate,sdch"); 不能压缩,不然乱码,压缩须要浏览器支持
httpGet.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpGet.addHeader("Cache-Control", "max-age=0");
httpGet.addHeader("Connection", "keep-alive");
httpGet.addHeader("Content-Type", "application/x-www-form-urlencoded");
// ck,dbcly这两个参数会变化
httpGet.addHeader(
"Cookie",""); //Cookie,本身查浏览器
httpGet.addHeader("Host", "www.douban.com");
httpGet.addHeader("Origin", "http://www.douban.com");
httpGet.addHeader("Referer", "http://www.douban.com/group/");
httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");
httpGet.addHeader("Content-type", "text/html; charset=utf-8");
// 设置字符集
try {
// 取得默认的HttpClient
HttpClient httpclient = new DefaultHttpClient();
// 取得HttpResponse
HttpResponse httpResponse = httpclient.execute(httpGet);
// HttpStatus.SC_OK表示链接成功
if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
// 取得返回的字符串
String strResult = EntityUtils.toString(httpResponse.getEntity(), HTTP.UTF_8);
Parser parser = new Parser(strResult);
NodeFilter filter = new TagNameFilter("A");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
if (nodes != null) {
for (int i = 0; i < nodes.size(); i++) {
Node textnode = (Node) nodes.elementAt(i);
String s = textnode.getText();
if (s.contains("http://www.douban.com/group/topic") && s.contains("title=")) {
s = getTopicUrl(s);
addComment(s);
}
}
}
System.out.println("完成!");
}
} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}
public static String getTopicUrl(String text) {
text = text.replace("a href=", "");
text = text.replace("\"", "");
String[] arr = text.split(" ");
return arr[0];
}
public static void addComment(String httpUrl) {
System.out.println(httpUrl);
httpUrl = httpUrl + "add_comment#last";
// HttpPost链接对象
HttpPost httpPost = new HttpPost(httpUrl);
httpPost.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
httpPost.addHeader("Accept-Encoding", "gzip,deflate,sdch");
httpPost.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpPost.addHeader("Cache-Control", "max-age=0");
httpPost.addHeader("Connection", "keep-alive");
httpPost.addHeader("Content-Type", "application/x-www-form-urlencoded");
// ck,dbcly这两个参数会变化
httpPost.addHeader(
"Cookie",""); //这个Cookie,本身根据浏览器去查吧
httpPost.addHeader("Host", "www.douban.com");
httpPost.addHeader("Origin", "http://www.douban.com");
httpPost.addHeader("Referer", "http://www.douban.com/group/");
httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");
// 使用NameValuePair来保存要传递的Post参数
List<NameValuePair> params = new ArrayList<NameValuePair>();
// 添加要传递的参数
params.add(new BasicNameValuePair("ck", "VdIW"));
params.add(new BasicNameValuePair("rv_comment", getMyComment()));
params.add(new BasicNameValuePair("start", "0"));
params.add(new BasicNameValuePair("submit_btn", "加上去"));
//params.add(new BasicNameValuePair("captcha-solution", "monkey"));
//params.add(new BasicNameValuePair("captcha-id", "YTYPMnsapAJsXw0o2w6T5SY5"));
// 设置字符集
try {
HttpEntity httpentity = new UrlEncodedFormEntity(params, "utf-8");
// 请求httpPost
httpPost.setEntity(httpentity);
// 取得默认的HttpClient
HttpClient httpclient = new DefaultHttpClient();
// 取得HttpResponse
HttpResponse httpResponse = httpclient.execute(httpPost);
int status = httpResponse.getStatusLine().getStatusCode();
System.out.println(status);
if (status == 200) { //200实际意味着失败,须要验证码
DoubanVCUtils.getDoubanVC(); //解决验证码问题
}
if (status == 302) { //302转向意味着成功了
}
Thread.currentThread().sleep(5000); // 设置暂停毫秒,防止引发豆瓣注意, 这个时间可长可短,根据须要
} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}
public static String getMyComment() {
String[] comments = new String[20];
comments[0] = "帮顶一下。中国儿童安全网,关注儿童安全每一天!";
comments[1] = "支持楼主!中国儿童安全网,关注儿童安全每一天!";
comments[2] = "占个坑!中国儿童安全网,关注儿童安全每一天!";
省略部分。。。。
Random r = new Random();
int k = r.nextInt(20);
String s = "abcdefghijklmnopqrstuvwxyz"; char t[] = new char[26]; for (int x = 0; x < 26; x++) { t[x] = s.charAt(x); } int j = r.nextInt(26); return comments[k] + t[j]; //防止内容重复,豆瓣有检查机制 }}