花了两天时间,搞了个豆瓣自动回帖的程序。。。

原理不难,就是http client和 htmlParse的东西。

豆瓣为了防止恶意发贴,在回复或者发新贴的时候,有时候需要验证码,这个验证码还不太好识别(如果有高手,请联系我!),不过,我发现了一个程序上的漏洞,可以绕过去。

先打开IE或者其他浏览器进行登录。

 

贴上部分代码,仅供学习参考:

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;

public class DoubanCCSUtils {

public static void main(String[] args) {
comment();
}

public static void comment() {

// String httpUrl = "http://www.douban.com/group/M-P/new_topic";
String httpUrl = "http://www.douban.com/group/";   //我的小组里的最新贴子,只取第一页
// HttpPost连接对象
HttpGet httpGet = new HttpGet(httpUrl);

httpGet.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
// httpGet.addHeader("Accept-Encoding","gzip,deflate,sdch"); 不能压缩,否则乱码,压缩需要浏览器支持
httpGet.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpGet.addHeader("Cache-Control", "max-age=0");
httpGet.addHeader("Connection", "keep-alive");
httpGet.addHeader("Content-Type", "application/x-www-form-urlencoded");
// ck,dbcly这两个参数会变化
httpGet.addHeader(
"Cookie","");  //Cookie,自己查浏览器
httpGet.addHeader("Host", "www.douban.com");
httpGet.addHeader("Origin", "http://www.douban.com");
httpGet.addHeader("Referer", "http://www.douban.com/group/");
httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");

httpGet.addHeader("Content-type", "text/html; charset=utf-8");
// 设置字符集
try {
// 取得默认的HttpClient
HttpClient httpclient = new DefaultHttpClient();

// 取得HttpResponse
HttpResponse httpResponse = httpclient.execute(httpGet);
// HttpStatus.SC_OK表示连接成功
if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
// 取得返回的字符串
String strResult = EntityUtils.toString(httpResponse.getEntity(), HTTP.UTF_8);
Parser parser = new Parser(strResult);
NodeFilter filter = new TagNameFilter("A");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
if (nodes != null) {
for (int i = 0; i < nodes.size(); i++) {
Node textnode = (Node) nodes.elementAt(i);
String s = textnode.getText();
if (s.contains("http://www.douban.com/group/topic") && s.contains("title=")) {
s = getTopicUrl(s);
addComment(s);
}

}
}
System.out.println("完成!");
}

} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}

public static String getTopicUrl(String text) {
text = text.replace("a href=", "");
text = text.replace("\"", "");
String[] arr = text.split(" ");

return arr[0];
}

public static void addComment(String httpUrl) {

System.out.println(httpUrl);
httpUrl = httpUrl + "add_comment#last";
// HttpPost连接对象
HttpPost httpPost = new HttpPost(httpUrl);

httpPost.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
httpPost.addHeader("Accept-Encoding", "gzip,deflate,sdch");
httpPost.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpPost.addHeader("Cache-Control", "max-age=0");
httpPost.addHeader("Connection", "keep-alive");
httpPost.addHeader("Content-Type", "application/x-www-form-urlencoded");
// ck,dbcly这两个参数会变化
httpPost.addHeader(
"Cookie","");  //这个Cookie,自己根据浏览器去查吧
httpPost.addHeader("Host", "www.douban.com");
httpPost.addHeader("Origin", "http://www.douban.com");
httpPost.addHeader("Referer", "http://www.douban.com/group/");
httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");

// 使用NameValuePair来保存要传递的Post参数
List<NameValuePair> params = new ArrayList<NameValuePair>();

// 添加要传递的参数
params.add(new BasicNameValuePair("ck", "VdIW"));
params.add(new BasicNameValuePair("rv_comment", getMyComment()));
params.add(new BasicNameValuePair("start", "0"));
params.add(new BasicNameValuePair("submit_btn", "加上去"));

//params.add(new BasicNameValuePair("captcha-solution", "monkey"));
//params.add(new BasicNameValuePair("captcha-id", "YTYPMnsapAJsXw0o2w6T5SY5"));

// 设置字符集
try {
HttpEntity httpentity = new UrlEncodedFormEntity(params, "utf-8");
// 请求httpPost
httpPost.setEntity(httpentity);
// 取得默认的HttpClient
HttpClient httpclient = new DefaultHttpClient();
// 取得HttpResponse
HttpResponse httpResponse = httpclient.execute(httpPost);
int status = httpResponse.getStatusLine().getStatusCode();
System.out.println(status);
if (status == 200) { //200实际意味着失败,需要验证码
DoubanVCUtils.getDoubanVC();  //解决验证码问题
}
if (status == 302) { //302转向意味着成功了

}

Thread.currentThread().sleep(5000); // 设置暂停毫秒,防止引起豆瓣注意, 这个时间可长可短,根据需要

} catch (Exception e) {
System.out.println(e.getLocalizedMessage());
}
}

public static String getMyComment() {
String[] comments = new String[20];
comments[0] = "帮顶一下。中国儿童安全网,关注儿童安全每一天!";
comments[1] = "支持楼主!中国儿童安全网,关注儿童安全每一天!";
comments[2] = "占个坑!中国儿童安全网,关注儿童安全每一天!";
省略部分。。。。

Random r = new Random();
int k = r.nextInt(20);

String s = "abcdefghijklmnopqrstuvwxyz";
char t[] = new char[26];
for (int x = 0; x < 26; x++) {
t[x] = s.charAt(x);
}
int j = r.nextInt(26);
return comments[k] + t[j];   //防止内容重复,豆瓣有检查机制
}
}

posted @ 2013-09-15 16:53  神州第一媒  阅读(2043)  评论(1编辑  收藏  举报