web信息收集:获取所有url

from urllib.request import urlopen
from lxml.html import parse

parsed = parse(urlopen("https://www.cnblogs.com/nicole-zhang/"))
doc = parsed.getroot()

# 获取全部含有"nicole-zhang"的url
# 变量名 = [表达式 for 变量 in 列表 if 条件]
links = [lnk.get('href') for lnk in doc.findall('.//a') if "nicole-zhang" in str(lnk.get('href'))]
print(links)

 

posted @ 2021-02-20 15:58  OTAKU_nicole  阅读(192)  评论(0编辑  收藏  举报