/**
* 通过w3c方式 读取xml内容
* @param lablenames 要读取的节点名称
* @param file_path_name 文件绝对路径
* @return
*/
public static WebMagic readXML(List<String> lablenames, String file_path_name) {
WebMagic webMagic = new WebMagic();
try {
DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
org.w3c.dom.Document document = builder.parse(new File(file_path_name));
org.w3c.dom.Element rootElement = document.getDocumentElement();
for (int i = 0; i < lablenames.size(); i++) {
NodeList list = rootElement.getElementsByTagName(lablenames.get(i));
org.w3c.dom.Element element = (org.w3c.dom.Element) list .item(0);
System.out.println(element.getChildNodes().item(0).getNodeValue());
if ("starturl".equals(element.getNodeName())) {
webMagic.setStarturl(element.getChildNodes().item(0)
.getNodeValue());
}
if ("regexstarturl".equals(element.getNodeName())) {
webMagic.setRegexstarturl(element.getChildNodes().item(0)
.getNodeValue());
}
if ("labelName".equals(element.getNodeName())) {
webMagic.setLabelName(element.getChildNodes().item(0)
.getNodeValue());
}
if ("labeltype".equals(element.getNodeName())) {
webMagic.setLabeltype(element.getChildNodes().item(0)
.getNodeValue());
}
if ("regexdescendants".equals(element.getNodeName())) {
webMagic.setRegexdescendants(element.getChildNodes()
.item(0).getNodeValue());
}
}
} catch (Exception e) {
System.out.println("exception:" + e.getMessage());
}
return webMagic;
}