从txt中提取子域名
import re DOMAIN =[] f = open('test.txt','r',encoding='UTF-8') w = open('domain.txt','w') for data in f.readlines(): domain = re.findall(r"\w+\.+baidu.com", data) for i in domain: if i not in DOMAIN: DOMAIN.append(i) domain = re.findall(r"\w+\.+\w+\.+baidu.com", data) for i in domain: if i not in DOMAIN: DOMAIN.append(i) domain = re.findall(r"\w+\.+\w+\.+\w+\.+baidu.com", data) for i in domain: if i not in DOMAIN: DOMAIN.append(i) for i in DOMAIN: w.write(str(i)+'\n') w.close()
丑是丑了点,懒得改了