使用Python根据网页生成RSS

pip install requests beautifulsoup4 lxml

  

import requests  
from bs4 import BeautifulSoup  
import xml.etree.ElementTree as ET  
  
def fetch_news_from_url(url):  
    # 1. 网页抓取  
    response = requests.get(url)  
    response.raise_for_status()  # 确保请求成功  
    soup = BeautifulSoup(response.text, 'html.parser')  
  
    # 假设你已经知道如何提取新闻数据(这里只是示例)  
    news_items = []  
    for item in soup.select('.list li'):  # 假设新闻项有类名'.news-item'  
        title = item.select_one('a').text  # 假设标题有类名'.title'  
        link = item.select_one('a')['href']  # 假设链接在'.link a'中  
        # description = item.select_one('.description').text  # 假设描述有类名'.description'  
        # time = item.select_one('.time').text
        news_items.append({'title': title, 'link': link, 'description': ''})  
  
    return news_items  
  
def generate_rss(news_items, rss_filename):  
    root = ET.Element("rss")  
    root.set("version", "2.0")  
    channel = ET.SubElement(root, "channel")  
  
    for item in news_items:  
        item_elem = ET.SubElement(channel, "item")  
        ET.SubElement(item_elem, "title").text = item['title']  
        ET.SubElement(item_elem, "link").text = item['link']  
        ET.SubElement(item_elem, "description").text = item['description']  
  
    tree = ET.ElementTree(root)  
    tree.write(rss_filename, encoding='utf-8', xml_declaration=True)  
  
# 使用示例  
news_url = "https://gdstc.gd.gov.cn/zwgk_n/tzgg/index.html"  # 替换为实际的新闻网页URL  
news_items = fetch_news_from_url(news_url)  
generate_rss(news_items, "gdkxjsnews.rss")

  

posted on 2024-05-11 22:32  Tencent/Tim  阅读(44)  评论(0编辑  收藏  举报

导航