抓取RSS源中链接图片问题REDIRECT_LOCATIONS
抓取链接里的图片时通常和rss原理的url不是对应的,中间可能会有跳转,用到了如下方法:
private String getWebRealPath(String src, String link) { try { if (src.startsWith("/")) { link = getRealLink(link); src = link.substring(0, link.indexOf("/", 7)) + src; } if (src.startsWith("./")) { link = getRealLink(link); src = link.substring(0, link.lastIndexOf("/")) + src.substring(1); } if (src.startsWith("../")) { link = getRealLink(link); String[] str = link.split("/"); int len = src.split("\\.\\./").length; String s = ""; if (link.split("/").length != 3) { for (int i = 0; i < str.length - len; i++) { s += str[i] + "/"; } src = s + src.replaceAll("\\.\\./", ""); } else { src = link + "/" + src.replaceAll("\\.\\./", ""); } } } catch (Exception e) { return src; } return src; } private String getRealLink(String link) { try { HttpContext httpContext = new BasicHttpContext(); HttpUtil.get(link, "iso-8859-1", httpContext); RedirectLocations redirectLocations = (RedirectLocations) httpContext .getAttribute(DefaultRedirectStrategy.REDIRECT_LOCATIONS); if (redirectLocations != null) { List uriList = redirectLocations.getAll(); URI uri = uriList.get(uriList.size() - 1); link = uri.toString(); } } catch (Exception e) { return link; } return link; }