抓取RSS源中链接图片问题REDIRECT_LOCATIONS

抓取链接里的图片时通常和rss原理的url不是对应的,中间可能会有跳转,用到了如下方法:

   private String getWebRealPath(String src, String link) {
try {
if (src.startsWith("/")) {
link = getRealLink(link);
src = link.substring(0, link.indexOf("/", 7)) + src;
}
if (src.startsWith("./")) {
link = getRealLink(link);
src = link.substring(0, link.lastIndexOf("/")) + src.substring(1);
}
if (src.startsWith("../")) {
link = getRealLink(link);
String[] str = link.split("/");
int len = src.split("\\.\\./").length;
String s = "";
if (link.split("/").length != 3) {
for (int i = 0; i < str.length - len; i++) {
s += str[i] + "/";
}
src = s + src.replaceAll("\\.\\./", "");
} else {
src = link + "/" + src.replaceAll("\\.\\./", "");
}
}
} catch (Exception e) {
return src;
}
return src;
}
private String getRealLink(String link) {
try {
HttpContext httpContext = new BasicHttpContext();
HttpUtil.get(link, "iso-8859-1", httpContext);
RedirectLocations redirectLocations = (RedirectLocations) httpContext
.getAttribute(DefaultRedirectStrategy.REDIRECT_LOCATIONS);
if (redirectLocations != null) {
List uriList = redirectLocations.getAll();
URI uri = uriList.get(uriList.size() - 1);
link = uri.toString();
}
} catch (Exception e) {
return link;
}
return link;
}

 

 

posted @ 2012-12-03 17:29  悟寰轩-叶秋  阅读(565)  评论(0编辑  收藏  举报