byte[] bytes = Files.readAllBytes(Paths.get("E:\\pdf\\aaa\\html\\text.txt").normalize()); String text = IOUtils.toString(bytes); String xml = text.substring(text.indexOf("<tbody>")); InputSource inputXML = new InputSource( new StringReader( xml ) ); XPath xPath = XPathFactory.newInstance().newXPath(); NodeList nodes = (NodeList) xPath.evaluate("/tbody/tr", inputXML, XPathConstants.NODESET); int length = nodes.getLength(); Path file = Paths.get("E:\\pdf\\aaa\\html\\out.txt"); try (BufferedWriter writer = Files.newBufferedWriter(file, Charset.defaultCharset(), StandardOpenOption.CREATE)) { for (int i = 0; i < length; i++) { Node node = nodes.item(i); NodeList childList = (NodeList) xPath.evaluate("td", node, XPathConstants.NODESET); for (int j = 0; j < childList.getLength(); j++) { Node child = childList.item(j); String content = child.getTextContent(); //System.out.print(content); writer.write(content); if (j <childList.getLength() - 1) { writer.write("\t"); } } writer.newLine(); } }
text.txt内容
输出内容:
本博客文章绝大多数为原创,少量为转载,代码经过测试验证,如果有疑问直接留言或者私信我。
创作文章不容易,转载文章必须注明文章出处;如果这篇文章对您有帮助,点击右侧打赏,支持一下吧。
创作文章不容易,转载文章必须注明文章出处;如果这篇文章对您有帮助,点击右侧打赏,支持一下吧。