转 百度空间文章列表提取工具
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<title>百度空间文章列表提取工具 by join</title>
<style>
*{
font-size:12px
}
#head{
text-align:center;
}
#choose{
display: none;
}
#error{
display:none;
}
#savelist{
display:none;
}
.count{
width:50px;float: left; display: block;
}
.url{
width:180px;float: left; display: block;
}
.tit{
width:400px;float: left; display: block;+overflow:hidden;text-overflow:ellipsis;white-space:nowrap;
}
.date{
width:150px;float: left; display: block;
}
.clg{
width:150px;float: left; display: block;
}
</style>
</head>
<script language="javascript" type="text/javascript">
var XmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
var count;
var timer = 1500;
var i;
var start;
var mid;
var end;
var text = "";
var text2 = "";
var texts;
var user = "";
var BlogUrl = "";
var BlogListUrl = "";
var isArtclg;
var page = 0;
var page_end;
var page_min;
var page_max;
var page_min_value;
var page_max_value;
var artclg;
var artclg_a = "<div id=\"m_artclg\" class=\"modbox\">";//文章类别
var artclg_b = "<div id=\"mod_filed\" class=\"mod\">";
var artclg_i = "<div class=\"line\">";
var artclg_c = "title=\"查看该分类中所有文章\">";
var artclg_d = "</a>";
var page_a = "[下一页]";//获取页数
var page_b = "[尾页]";
var page_c = "/blog/index/";
var page_d = "\">";
var bloglist_a = "<div id=\"m_blog\" class=\"modbox\">";//获取文章列表
var bloglist_b = "function setpv(allnum)";
var bloglist_i = "<div class=\"line\">";
var bloglist_mid = "innerHTML = tps;";
var tit;
var tit_a = "target=\"_blank\">";
var tit_b = "</a></div>";
var url;
var url_a = "/blog/item/";
var url_b = ".html";
var date ;
var date_a = "<div class=\"date\">";
var date_b = "</div>";
var clg;
var clg_a = ">类别:";
var clg_b = "</a>";
var bloglist_list;
function $(id){
return document.getElementById(id);
}
window.onload = function Listening(){
$("initialize").onclick = Initialize;
$("getList").onclick = function(){
page_min_value = $("page_min").value;
page_max_value = $("page_max").value;
page = page_min_value;
isArtclg = $("isArtclg").checked;
artclg = $("artclg").value;
count = 0;
getList();
};
$("savelist").onclick = saveList;
}
function getHtml(url,fun){
XmlHttp.Open("get",url,true);
XmlHttp.Send(null);
XmlHttp.onReadyStateChange = function(){
if(XmlHttp.readyState==4){
if(XmlHttp.status==200){
text = XmlHttp.responsetext;
setTimeout(fun,timer);
}else{
$("error").style.cssText = "display:block;text-align:center;";
$("error").insertAdjacentHTML("beforeEnd",url+"<br />");
}
}
};
}
function Initialize(){
if(user != $("blogurl").value) user = $("blogurl").value;
else user += "/";
BlogUrl = "http://hi.baidu.com/" + user + "/blog";
getHtml(BlogUrl,"getArtclg()");
}
function getArtclg(){
artclg_list = $("artclg");
page_min = $("page_min");
page_max = $("page_max");
start = text.indexOf(artclg_a);
end = text.indexOf(artclg_b);
text2 = text.substring(start,end);
texts = text2.split(artclg_i);
for(i=0;i<texts.length-1;i++){
start = texts[i].indexOf(artclg_c) + artclg_c.length;
end = texts[i].indexOf(artclg_d);
text2 = texts[i].substring(start,end);
artclg_list.options[i] = new Option(text2, text2);
}
start = text.indexOf(page_a);
end = text.indexOf(page_b);
text2 = text.substring(start,end);
start = text2.indexOf(page_c) + page_c.length;
end = text2.indexOf(page_d);
page_end = text2.substring(start,end);
for(i=0;i<=page_end;i++){
page_min.options[i] = new Option("第"+(i+1)+"页",i);
page_max.options[i] = new Option("第"+(i+1)+"页",i);
}
$("choose").style.cssText = "display:block;text-align:center;";
}
function getList(){
BlogList = "http://hi.baidu.com/" + user + "/blog/index/" + page;
if(isArtclg) BlogList = "http://hi.baidu.com/" + user + "/blog/category/" + UrlEncode(artclg) + "/index/"+ page;
if(page<=page_max_value){
getHtml(BlogList,"doList();getList();");
}else{
alert("获取完成");
$("savelist").style.cssText = "display:block;";
}
page++;
}
function doList(){
start = text.indexOf(bloglist_a);
end = text.indexOf(bloglist_b);
text2 = text.substring(start,end);
texts = text2.split(bloglist_i);
for(i=0;i<texts.length-1;i++){
start = texts[i].indexOf(url_a) + url_a.length;
end = texts[i].indexOf(url_b);
url = texts[i].substring(start,end);
start = texts[i].indexOf(tit_a) + tit_a.length;
end = texts[i].indexOf(tit_b);
tit = texts[i].substring(start,end);
start = texts[i].indexOf(date_a);
mid = texts[i].indexOf(bloglist_mid) + bloglist_mid.length;
texts[i] = texts[i].substring(start,mid);
start = texts[i].indexOf(date_a) + date_a.length;
end = texts[i].indexOf(date_b);
date = texts[i].substring(start,end);
start = texts[i].indexOf(clg_a);
mid = texts[i].indexOf(bloglist_mid);
texts[i] = texts[i].substring(start,mid);
start = texts[i].indexOf(clg_a) + clg_a.length;
end = texts[i].indexOf(clg_b);
clg = texts[i].substring(start,end);
count++;
text2 = "<br><div>";
text2 += "<span class='count'>" + count + "</span>";
text2 += "<span class='url'>" + url + "</span>";
text2 += "<span class='tit'><a href='http://hi.baidu.com/"+user+"/blog/item/"+url+".html' target='blank'>" + tit + "</a></span>";
text2 += "<span class='date'>" + date + "</span>";
text2 += "<span class='clg'>" + clg + "</span>";
text2 += "</div>";
$("list").insertAdjacentHTML("afterBegin",text2);
}
}
function UrlEncode(str){
var ret="",tt="";
var strSpecial="!\"#$%&'()*+,/:;<=>?[]^`{|}~%";
for(var i=0;i<str.length;i++){
var chr = str.charAt(i);
var c=str2asc(chr);
tt+= chr+":"+c+"n";
if(parseInt("0x"+c) > 0x7f){
ret+="%"+c.slice(0,2)+"%"+c.slice(-2);
}else{
if(chr==" ")
ret+="+";
else if(strSpecial.indexOf(chr)!=-1)
ret+="%"+c.toString(16);
else
ret+=chr;
}
}
return ret;
}
//保存代码
function formatList(){
text = $("list").innerHTML;
text = text.replace(/<BR>/g,"");
text = text.replace(/DIV/g,"tr");
text = text.replace(/SPAN/g,"td");
text = "<table>" + text + "</table>";
return text;
}
function saveList() {
var winname = window.open('', '_blank', '');
winname.document.open('text/html', 'replace');
winname.document.writeln(formatList());
winname.document.close();
winname.document.execCommand('saveas','','join.html');
winname.close();
}
</script>
<body>
<script language="vbscript" type="text/vbscript">
Function str2asc(strstr)
str2asc = hex(asc(strstr))
End Function
</script>
<div id="head">
name="blogurl" type="text" id="blogurl" value="join" />
<input name="initialize" type="button" id="initialize" value="初始化" />
</div>
<div id="choose">
开始页
<select name="page_min" id="page_min">
</select>
结束页
<select name="page_max" id="page_max">
</select><br />
按分类提取?
<input name="isArtclg" type="checkbox" id="isArtclg" value="checkbox" />
<select name="artclg" id="artclg">
</select><br />
<input name="getList" type="button" id="getList" value="提取文章列表" />
<input name="savelist" type="button" id="savelist" value="保存文章列表" />
</div>
<div id="list">
</div>
<div id="error">
因网络问题,未获取的url列表:<br />
</div>
</body>
</html>