import re
with open('index.html','r',encoding='utf-8') as f:
html = re.sub('\n','',f.read())
section_pattern = '<section class="main_section">(.*?)</section>'
section_s = re.findall(section_pattern,html)
category_pattern = '<h1>(.*?)</h1>'
course_pattern='<span class="course_name">(.*?)</span>'
data_s = []
for section in section_s:
category = re.findall(category_pattern,section)[0]
# print(category)
course_s = re.findall(course_pattern,section)
data_s.append(
{
'category':category,
'course_s':course_s
}
)
for data in data_s:
print(data.get('category'))
for course in data.get('course_s'):
print(' ',course)