Loading

【Day3】3.提取商城分类结构

import re
with open('index.html','r',encoding='utf-8') as f:
        html = re.sub('\n','',f.read())
        section_pattern = '<section class="main_section">(.*?)</section>'
        section_s = re.findall(section_pattern,html)
        category_pattern = '<h1>(.*?)</h1>'
        course_pattern='<span class="course_name">(.*?)</span>'
        data_s = []
        for section in section_s:
            category = re.findall(category_pattern,section)[0]
            # print(category)
            course_s = re.findall(course_pattern,section)
            data_s.append(
                {
                    'category':category,
                    'course_s':course_s
                }
            )

        for data in data_s:
            print(data.get('category'))
            for course in data.get('course_s'):
                print('    ',course)
posted @ 2019-11-19 14:09  5572  阅读(115)  评论(0编辑  收藏  举报