Python 解析RSS xml文本

用python解析博客园RSS订阅的xml文本

源码

#!/usr/bin/python
# -*- coding: UTF-8 -*-
# Author:Jruing
# FileName:RSS
# DateTime:2020/5/29 13:59
# SoftWare: PyCharm

from xml.dom.minidom import parseString
import requests


class RSS():
    def __init__(self, rss_url):
        self.rss_url = rss_url

    def get_context(self):
        response = requests.get(self.rss_url).text
        self.parse_context(response)

    def parse_context(self, response):
        # 创建解析对象
        domtree = parseString(response)
        collect = domtree.documentElement
        # 根据标签获取博主名称
        author = collect.getElementsByTagName('author')
        print(author[0].getElementsByTagName('name')[0].childNodes[0].data)
        tags = collect.getElementsByTagName('entry')
        # 解析文章属性信息
        for info in tags:
            art_url = info.getElementsByTagName('id')[0].childNodes[0].data
            art_title = info.getElementsByTagName('title')[0].childNodes[0].data
            art_publish = info.getElementsByTagName('published')[0].childNodes[0].data
            art_update = info.getElementsByTagName('updated')[0].childNodes[0].data
            for j in info.getElementsByTagName('author'):
                art_author = j.getElementsByTagName('name')[0].childNodes[0].data
            data = f"""
            文章标题:{'-'.join(art_title.split('-')[:-1])}
            文章作者:{art_author}
            文章地址:{art_url}
            发布时间:{art_publish}
            更新时间:{art_update}
            """
            data = {"art_url": '-'.join(art_title.split('-')[:-1]),
                    "art_title": art_title,
                    "art_publish": art_publish,
                    "art_update": art_update,
                    "art_author": art_author}
            print(data)
            return data


if __name__ == '__main__':
    rss = RSS("http://feed.cnblogs.com/blog/u/565725/rss/")
    rss.get_context()
posted @ 2020-05-29 17:05  Jruing  阅读(484)  评论(0编辑  收藏  举报