花了两天时间，搞了个豆瓣自动回帖的程序。。。

原理不难，就是http client和 htmlParse的东西。

豆瓣为了防止恶意发贴，在回复或者发新贴的时候，有时候需要验证码，这个验证码还不太好识别（如果有高手，请联系我！），不过，我发现了一个程序上的漏洞，可以绕过去。

先打开IE或者其他浏览器进行登录。

贴上部分代码，仅供学习参考：

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;

public class DoubanCCSUtils {

public static void main(String[] args) {
comment();
}

public static void comment() {

// String httpUrl = "http://www.douban.com/group/M-P/new_topic";
String httpUrl = "http://www.douban.com/group/"; //我的小组里的最新贴子，只取第一页
// HttpPost连接对象
HttpGet httpGet = new HttpGet(httpUrl);

httpGet.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
// httpGet.addHeader("Accept-Encoding","gzip,deflate,sdch"); 不能压缩，否则乱码，压缩需要浏览器支持
httpGet.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpGet.addHeader("Cache-Control", "max-age=0");
httpGet.addHeader("Connection", "keep-alive");
httpGet.addHeader("Content-Type", "application/x-www-form-urlencoded");
// ck,dbcly这两个参数会变化
httpGet.addHeader(
"Cookie",""); //Cookie，自己查浏览器
httpGet.addHeader("Host", "www.douban.com");
httpGet.addHeader("Origin", "http://www.douban.com");
httpGet.addHeader("Referer", "http://www.douban.com/group/");
httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");

httpGet.addHeader("Content-type", "text/html; charset=utf-8");
// 设置字符集
try {
// 取得默认的HttpClient
HttpClient httpclient = new DefaultHttpClient();

// 取得HttpResponse
HttpResponse httpResponse = httpclient.execute(httpGet);
// HttpStatus.SC_OK表示连接成功
if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
// 取得返回的字符串
String strResult = EntityUtils.toString(httpResponse.getEntity(), HTTP.UTF_8);
Parser parser = new Parser(strResult);
NodeFilter filter = new TagNameFilter("A");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
if (nodes != null) {
for (int i = 0; i < nodes.size(); i++) {
Node textnode = (Node) nodes.elementAt(i);
String s = textnode.getText();
if (s.contains("http://www.douban.com/group/topic") && s.contains("title=")) {
s = getTopicUrl(s);
addComment(s);
}

}
}
System.out.println("完成！");
}

} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}

public static String getTopicUrl(String text) {
text = text.replace("a href=", "");
text = text.replace("\"", "");
String[] arr = text.split(" ");

return arr[0];
}

public static void addComment(String httpUrl) {

System.out.println(httpUrl);
httpUrl = httpUrl + "add_comment#last";
// HttpPost连接对象
HttpPost httpPost = new HttpPost(httpUrl);

httpPost.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
httpPost.addHeader("Accept-Encoding", "gzip,deflate,sdch");
httpPost.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpPost.addHeader("Cache-Control", "max-age=0");
httpPost.addHeader("Connection", "keep-alive");
httpPost.addHeader("Content-Type", "application/x-www-form-urlencoded");
// ck,dbcly这两个参数会变化
httpPost.addHeader(
"Cookie",""); //这个Cookie，自己根据浏览器去查吧
httpPost.addHeader("Host", "www.douban.com");
httpPost.addHeader("Origin", "http://www.douban.com");
httpPost.addHeader("Referer", "http://www.douban.com/group/");
httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");

// 使用NameValuePair来保存要传递的Post参数
List<NameValuePair> params = new ArrayList<NameValuePair>();

// 添加要传递的参数
params.add(new BasicNameValuePair("ck", "VdIW"));
params.add(new BasicNameValuePair("rv_comment", getMyComment()));
params.add(new BasicNameValuePair("start", "0"));
params.add(new BasicNameValuePair("submit_btn", "加上去"));

//params.add(new BasicNameValuePair("captcha-solution", "monkey"));
//params.add(new BasicNameValuePair("captcha-id", "YTYPMnsapAJsXw0o2w6T5SY5"));

// 设置字符集
try {
HttpEntity httpentity = new UrlEncodedFormEntity(params, "utf-8");
// 请求httpPost
httpPost.setEntity(httpentity);
// 取得默认的HttpClient
HttpClient httpclient = new DefaultHttpClient();
// 取得HttpResponse
HttpResponse httpResponse = httpclient.execute(httpPost);
int status = httpResponse.getStatusLine().getStatusCode();
System.out.println(status);
if (status == 200) { //200实际意味着失败，需要验证码
DoubanVCUtils.getDoubanVC(); //解决验证码问题
}
if (status == 302) { //302转向意味着成功了

}

Thread.currentThread().sleep(5000); // 设置暂停毫秒，防止引起豆瓣注意，这个时间可长可短，根据需要

} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}

public static String getMyComment() {
String[] comments = new String[20];
comments[0] = "帮顶一下。中国儿童安全网，关注儿童安全每一天！";
comments[1] = "支持楼主！中国儿童安全网，关注儿童安全每一天！";
comments[2] = "占个坑！中国儿童安全网，关注儿童安全每一天！";
省略部分。。。。

Random r = new Random();
int k = r.nextInt(20);

String s = "abcdefghijklmnopqrstuvwxyz";
char t[] = new char[26];
for (int x = 0; x < 26; x++) {
t[x] = s.charAt(x);
}
int j = r.nextInt(26);
return comments[k] + t[j]; //防止内容重复，豆瓣有检查机制
}
}

posted @ 2013-09-15 16:53 神州第一媒阅读(2043) 评论(1) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

花了两天时间，搞了个豆瓣自动回帖的程序。。。

公告