xml解析器_Jsoup_快速入门、Jsoup对象

xml解析器_Jsoup_快速入门

导jar包,右击libs,点击添加为库,选择模块库即可

package cn.itcast.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;

public class jsoupDemo1 {
    public static void main(String[] args) throws IOException {
        //获取Document对象,根据xml文档获取
        //获取student.xml的path
        String path = jsoupDemo1.class.getClassLoader().getResource("student.xml").getPath();
        //解析xml文档,加载文档进内存,获取dom树
        Document parse = Jsoup.parse(new File(path), "utf-8");
        //获取元素对象
        Elements element = parse.getElementsByTag("name");
        System.out.println(element.size());
        //获取第一个那么的Element对象
        Element element1 = element.get(0);
        //获取数据
        String text = element1.text();
        System.out.println(text);
    }
}

xml解析器_Jsoup对象

Jsoup:工具类,可以解析html或xml文档,返回Document

  parse:解析html或xml文档,返回Document

    parse(File  in,String  charsetName):解析xml或html文件的

    parse(String  html):解析xml或html字符串

    parse(URL  url,int  timeoutMillis):通过网络路径获取指定的html或xml的文档对象

Dovument:文档对象,代表内存中的dom树

Elements:元素 Element对象的集合,可以当做ArrayList<Element>来使用

Element:元素对象

Node:节点对象

package cn.itcast.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.net.URL;

public class jsoupDemo2 {
    public static void main(String[] args) throws IOException {
        //获取Document对象,根据xml文档获取
        //获取student.xml的path
        String path = jsoupDemo2.class.getClassLoader().getResource("student.xml").getPath();
        String str = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
                "\n" +
                "<students>\n" +
                "\n" +
                "\t<student number=\"heima_0001\">\n" +
                "\t\t<name>tom</name>\n" +
                "\t\t<age>18</age>\n" +
                "\t\t<sex>male</sex>\n" +
                "\t</student>\n" +
                "\t<student number=\"heima_0002\">\n" +
                "\t\t<name>jack</name>\n" +
                "\t\t<age>18</age>\n" +
                "\t\t<sex>female</sex>\n" +
                "\t</student>\n" +
                "\n" +
                "</students>";
        Document parse = Jsoup.parse(str);
        System.out.println(parse);
       
    }
}
package cn.itcast.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.net.URL;

public class jsoupDemo2 {
    public static void main(String[] args) throws IOException {
        //获取Document对象,根据xml文档获取
        //获取student.xml的path
        String path = jsoupDemo2.class.getClassLoader().getResource("student.xml").getPath();

        /*String str = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
                "\n" +
                "<students>\n" +
                "\n" +
                "\t<student number=\"heima_0001\">\n" +
                "\t\t<name>tom</name>\n" +
                "\t\t<age>18</age>\n" +
                "\t\t<sex>male</sex>\n" +
                "\t</student>\n" +
                "\t<student number=\"heima_0002\">\n" +
                "\t\t<name>jack</name>\n" +
                "\t\t<age>18</age>\n" +
                "\t\t<sex>female</sex>\n" +
                "\t</student>\n" +
                "\n" +
                "</students>";
        Document parse = Jsoup.parse(str);
        System.out.println(parse);*/
        URL url = new URL("https://www.1688.com/");
        Document parse = Jsoup.parse(url, 10000);
        System.out.println(parse);

    }
}
posted @ 2023-02-01 09:26  肥宅快乐水~  阅读(40)  评论(0编辑  收藏  举报