用正则表达式做替换

import re

#将<script>……</script>删掉
f=open(os.path.join(cur_path,'growth.html'),'r')
text = f.read()

script=re.compile("<script>.*?</script>",re.M)   #re.M多行匹配
b=script.sub('',text)

对于这样的文本,要取到¥168这个值: 

<td class="total"><span class="money">¥</span>168</td>

money = re.findall(ur"<td class=\"total\">(.*?)</td>", text)
print money[0]     #返回<span class="money">¥</span>168
#删掉<span class=\"money\"></span>标签
m = re.sub("(<span class=\"money\">|</span>)", "", money[0])
print m   #返回¥168

注意:sub返回的不是list,而是字符串

 

posted @ 2017-07-18 09:41  桃乐丝  阅读(264)  评论(0编辑  收藏  举报