Python爬虫爬取html中div下的多个class标签并存入数据库
使用python爬虫爬取html页面div中的多个class标签,获取后将数据存成列表,然后存入数据库
import mysql.connector import pymysql import requests from bs4 import BeautifulSoup # Connect to the database conn = mysql.connector.connect(user='root', password='123456', host='127.0.0.1', database='listdb') cursor = conn.cursor() # Create table if not exists cursor.execute('''CREATE TABLE IF NOT EXISTS data ( id INT AUTO_INCREMENT PRIMARY KEY, date VARCHAR(255), text VARCHAR(255) )''') conn.commit() # Sample data url = "https://url/" response = requests.get(url) html_content = response.text # 使用 BeautifulSoup 解析 HTML soup = BeautifulSoup(html_content, "html.parser") # 获取 div 元素下多个 class 的数据 data = [] for div in soup.find_all("div", class_=["post-meta", "post-title"]): content = div.text.strip() data.append(content) # Iterate over the data for i in range(0, len(data), 2): date = data[i] text = data[i + 1] # Check if the data already exists cursor.execute(f"SELECT id FROM data WHERE date='{date}' AND text='{text}'") result = cursor.fetchone() # Insert if it does not exist if not result: cursor.execute(f"INSERT INTO data (date, text) VALUES ('{date}', '{text}')") # Commit and close the connection conn.commit() conn.close()