转 百度空间文章列表提取工具

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=gb2312" /> <title>百度空间文章列表提取工具 by join</title> <style> *{ font-size:12px } #head{ text-align:center; } #choose{ display: none; } #error{ display:none; } #savelist{ display:none; } .count{ width:50px;float: left; display: block; } .url{ width:180px;float: left; display: block; } .tit{ width:400px;float: left; display: block;+overflow:hidden;text-overflow:ellipsis;white-space:nowrap; } .date{ width:150px;float: left; display: block; } .clg{ width:150px;float: left; display: block; } </style> </head> <script language="javascript" type="text/javascript"> var XmlHttp = new ActiveXObject("Microsoft.XMLHTTP"); var count; var timer = 1500; var i; var start; var mid; var end; var text = ""; var text2 = ""; var texts; var user = ""; var BlogUrl = ""; var BlogListUrl = ""; var isArtclg; var page = 0; var page_end; var page_min; var page_max; var page_min_value; var page_max_value; var artclg; var artclg_a = "<div id=\"m_artclg\" class=\"modbox\">";//文章类别 var artclg_b = "<div id=\"mod_filed\" class=\"mod\">"; var artclg_i = "<div class=\"line\">"; var artclg_c = "title=\"查看该分类中所有文章\">"; var artclg_d = "</a>"; var page_a = "[下一页]";//获取页数 var page_b = "[尾页]"; var page_c = "/blog/index/"; var page_d = "\">"; var bloglist_a = "<div id=\"m_blog\" class=\"modbox\">";//获取文章列表 var bloglist_b = "function setpv(allnum)"; var bloglist_i = "<div class=\"line\">"; var bloglist_mid = "innerHTML = tps;"; var tit; var tit_a = "target=\"_blank\">"; var tit_b = "</a></div>"; var url; var url_a = "/blog/item/"; var url_b = ".html"; var date ; var date_a = "<div class=\"date\">"; var date_b = "</div>"; var clg; var clg_a = ">类别:"; var clg_b = "</a>"; var bloglist_list; function $(id){ return document.getElementById(id); } window.onload = function Listening(){ $("initialize").onclick = Initialize; $("getList").onclick = function(){ page_min_value = $("page_min").value; page_max_value = $("page_max").value; page = page_min_value; isArtclg = $("isArtclg").checked; artclg = $("artclg").value; count = 0; getList(); }; $("savelist").onclick = saveList; } function getHtml(url,fun){ XmlHttp.Open("get",url,true); XmlHttp.Send(null); XmlHttp.onReadyStateChange = function(){ if(XmlHttp.readyState==4){ if(XmlHttp.status==200){ text = XmlHttp.responsetext; setTimeout(fun,timer); }else{ $("error").style.cssText = "display:block;text-align:center;"; $("error").insertAdjacentHTML("beforeEnd",url+"<br />"); } } }; } function Initialize(){ if(user != $("blogurl").value) user = $("blogurl").value; else user += "/"; BlogUrl = "http://hi.baidu.com/" + user + "/blog"; getHtml(BlogUrl,"getArtclg()"); } function getArtclg(){ artclg_list = $("artclg"); page_min = $("page_min"); page_max = $("page_max"); start = text.indexOf(artclg_a); end = text.indexOf(artclg_b); text2 = text.substring(start,end); texts = text2.split(artclg_i); for(i=0;i<texts.length-1;i++){ start = texts[i].indexOf(artclg_c) + artclg_c.length; end = texts[i].indexOf(artclg_d); text2 = texts[i].substring(start,end); artclg_list.options[i] = new Option(text2, text2); } start = text.indexOf(page_a); end = text.indexOf(page_b); text2 = text.substring(start,end); start = text2.indexOf(page_c) + page_c.length; end = text2.indexOf(page_d); page_end = text2.substring(start,end); for(i=0;i<=page_end;i++){ page_min.options[i] = new Option("第"+(i+1)+"页",i); page_max.options[i] = new Option("第"+(i+1)+"页",i); } $("choose").style.cssText = "display:block;text-align:center;"; } function getList(){ BlogList = "http://hi.baidu.com/" + user + "/blog/index/" + page; if(isArtclg) BlogList = "http://hi.baidu.com/" + user + "/blog/category/" + UrlEncode(artclg) + "/index/"+ page; if(page<=page_max_value){ getHtml(BlogList,"doList();getList();"); }else{ alert("获取完成"); $("savelist").style.cssText = "display:block;"; } page++; } function doList(){ start = text.indexOf(bloglist_a); end = text.indexOf(bloglist_b); text2 = text.substring(start,end); texts = text2.split(bloglist_i); for(i=0;i<texts.length-1;i++){ start = texts[i].indexOf(url_a) + url_a.length; end = texts[i].indexOf(url_b); url = texts[i].substring(start,end); start = texts[i].indexOf(tit_a) + tit_a.length; end = texts[i].indexOf(tit_b); tit = texts[i].substring(start,end); start = texts[i].indexOf(date_a); mid = texts[i].indexOf(bloglist_mid) + bloglist_mid.length; texts[i] = texts[i].substring(start,mid); start = texts[i].indexOf(date_a) + date_a.length; end = texts[i].indexOf(date_b); date = texts[i].substring(start,end); start = texts[i].indexOf(clg_a); mid = texts[i].indexOf(bloglist_mid); texts[i] = texts[i].substring(start,mid); start = texts[i].indexOf(clg_a) + clg_a.length; end = texts[i].indexOf(clg_b); clg = texts[i].substring(start,end); count++; text2 = "<br><div>"; text2 += "<span class='count'>" + count + "</span>"; text2 += "<span class='url'>" + url + "</span>"; text2 += "<span class='tit'><a href='http://hi.baidu.com/"+user+"/blog/item/"+url+".html' target='blank'>" + tit + "</a></span>"; text2 += "<span class='date'>" + date + "</span>"; text2 += "<span class='clg'>" + clg + "</span>"; text2 += "</div>"; $("list").insertAdjacentHTML("afterBegin",text2); } } function UrlEncode(str){ var ret="",tt=""; var strSpecial="!\"#$%&'()*+,/:;<=>?[]^`{|}~%"; for(var i=0;i<str.length;i++){ var chr = str.charAt(i); var c=str2asc(chr); tt+= chr+":"+c+"n"; if(parseInt("0x"+c) > 0x7f){ ret+="%"+c.slice(0,2)+"%"+c.slice(-2); }else{ if(chr==" ") ret+="+"; else if(strSpecial.indexOf(chr)!=-1) ret+="%"+c.toString(16); else ret+=chr; } } return ret; } //保存代码 function formatList(){ text = $("list").innerHTML; text = text.replace(/<BR>/g,""); text = text.replace(/DIV/g,"tr"); text = text.replace(/SPAN/g,"td"); text = "<table>" + text + "</table>"; return text; } function saveList() { var winname = window.open('', '_blank', ''); winname.document.open('text/html', 'replace'); winname.document.writeln(formatList()); winname.document.close(); winname.document.execCommand('saveas','','join.html'); winname.close(); } </script> <body> <script language="vbscript" type="text/vbscript"> Function str2asc(strstr) str2asc = hex(asc(strstr)) End Function </script> <div id="head"> name="blogurl" type="text" id="blogurl" value="join" /> <input name="initialize" type="button" id="initialize" value="初始化" /> </div> <div id="choose"> 开始页 <select name="page_min" id="page_min"> </select> 结束页 <select name="page_max" id="page_max"> </select><br /> 按分类提取? <input name="isArtclg" type="checkbox" id="isArtclg" value="checkbox" /> <select name="artclg" id="artclg"> </select><br /> <input name="getList" type="button" id="getList" value="提取文章列表" /> <input name="savelist" type="button" id="savelist" value="保存文章列表" /> </div> <div id="list"> </div> <div id="error"> 因网络问题,未获取的url列表:<br /> </div> </body> </html>
posted @ 2012-07-16 13:15  adodo1  Views(101)  Comments(0Edit  收藏  举报