爬虫爬取网站

1.建数据库product 表和product_data表

CREATE TABLE product (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    url VARCHAR(255) NOT NULL
);

CREATE TABLE product_data (
    id INT AUTO_INCREMENT PRIMARY KEY,
    product_id INT,
    price VARCHAR(255),
    sale INT,
    FOREIGN KEY (product_id) REFERENCES product(id)
);

 

2.插入maven

<dependencies>
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>5.1.47</version>
    </dependency>

    <dependency>
        <groupId>com.google.code.gson</groupId>
        <artifactId>gson</artifactId>
        <version>2.8.8</version>
    </dependency>

    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.14.3</version>
    </dependency>

</dependencies>

 

 

3.新建类WebCrawler 

public class WebCrawler {
    public static void main(String[] args) {
        String url = "https://www.leadongshop.com/products.html";
        try {
            // Connect to MySQL database
            Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/webcrawler?useSSL=true&requireSSL=true&verifyServerCertificate=false&characterEncoding=UTF-8", "root", "password");

            // Parse HTML from the URL
            Document doc = Jsoup.connect(url).get();

            // Select product elements
            Elements products = doc.select("div.bodyList > a");
            // Iterate over each product
            for (Element product : products) {
                // Extract product name and URL
                String name = product.select("div.name").text();
                String url1 = product.attr("href");
                String productUrl = "https://www.leadongshop.com" + url1;

                String data = product.select("div.price").text();
                String[] s = data.split(" ");
                String price = s[0];
                String sale1 = s[1];
                int sale = Integer.parseInt(sale1);

                // Insert product into 'product' table
                String insertProductQuery = "INSERT INTO product (name, url) VALUES (?, ?)";
                PreparedStatement insertProductStmt = conn.prepareStatement(insertProductQuery);
                insertProductStmt.setString(1, name);
                insertProductStmt.setString(2, productUrl);
                insertProductStmt.executeUpdate();
                insertProductStmt.close();

                String insertProductQuery1 = "INSERT INTO product_data (price,sale) VALUES (?, ?)";
                PreparedStatement insertProductStmt1 = conn.prepareStatement(insertProductQuery1);
                insertProductStmt1.setString(1, price);
                insertProductStmt1.setInt(2, sale);
                insertProductStmt1.executeUpdate();
                insertProductStmt1.close();
            }

            conn.close();
        } catch (IOException | SQLException e) {
            e.printStackTrace();
        }
  }
}

 

  

4.数据库中查询。通过SQL查询出商品列表并按销量从高到低、价格从高到低排序

SELECT p.name, pd.price, pd.sale
FROM product p
JOIN product_data pd ON p.id = pd.product_id
ORDER BY pd.sale DESC;

 

  

  

posted @ 2024-04-22 20:12  秋雨欲来风满楼  阅读(5)  评论(0编辑  收藏  举报