辣鸡

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::

'功能:使用正则表示式对字符串进行替换
复制内容到剪贴板 程序代码

  1. Function RegReplace(Str, PatternStr, RepStr)
  2.     Dim NewStr, regEx
  3.     NewStr = Str
  4.     If IsNull(NewStr) Then
  5.         RegReplace = ""
  6.         Exit Function
  7.     End If
  8.     Set regEx = New RegExp
  9.     regEx.IgnoreCase = True
  10.     regEx.Global = True
  11.     regEx.Pattern = PatternStr
  12.     NewStr = regEx.Replace(NewStr, RepStr)
  13.     RegReplace = NewStr
  14. End Function
复制代码

'过滤HTML各种标签样式脚本

  1. Function HTMLFilter(sHTML, sFilters)
  2.     If sHTML & "" = "" Then Exit Function
  3.     If sFilters & "" = "" Then sFilters = "SCRIPT,OBJECT"
  4.     Dim aFilters
  5.      aFilters = Split(UCase(sFilters), ",")
  6.     For i = 0 To UBound(aFilters)
  7.         Select Case UCase(Trim(aFilters(i)))
  8.             Case "JORKIN"
  9.                 Do While InStr(sHTML, "     ") >0
  10.                     sHTML = Replace(sHTML, "     ", "    ")
  11.                 Loop
  12.             Case "SCRIPT"
  13.                 '// 去除脚本<scr ipt></scr ipt>及 onload 等
  14.                 sHTML = RegReplace(sHTML, "<SCRIPT[\s\S]*?</SCRIPT>", "")
  15.                 sHTML = RegReplace(sHTML, "\s[on].+?=\s+?([\""|\'])(.*?)\1", "")
  16.                 sHTML = RegReplace(sHTML, "(JAVASCRIPT|JSCRIPT|VBSCRIPT|VBS):", "$1:")
  17.             Case "FIXIMG"
  18.                 sHTML = RegReplace(sHTML, "<IMG.*?\sSRC=([^\""\'\s][^\""\'\s>]*).*?>", "<img src=$2 border=0>")
  19.                 sHTML = RegReplace(sHTML, "<IMG.*SRC=([\""\']?)(.\1\S+).*?>", "<img src=$2 border=0>")
  20.             Case "TABLE"
  21.                 '// 去除表格<table><tr><td><th>
  22.                 sHTML = RegReplace(sHTML, "</?TABLE[^>]*>", "")
  23.                 sHTML = RegReplace(sHTML, "</?TBODY[^>]*>", "")
  24.                 sHTML = RegReplace(sHTML, "<(/?)TR[^>]*>", "<$1p>")
  25.                 sHTML = RegReplace(sHTML, "</?TH[^>]*>", " ")
  26.                 sHTML = RegReplace(sHTML, "</?TD[^>]*>", " ")
  27.             Case "CLASS"
  28.                 '// 去除样式类class=""
  29.                 sHTML = RegReplace(sHTML, "(<[^>]+) CLASS=[^ |^>]+([^>]*>)", "$1 $2")
  30.                 sHTML = RegReplace(sHTML, "\sCLASS\s*?=\s*?([\""|\'])(.*?)\1", "")
  31.             Case "STYLE"
  32.                 '// 去除样式
  33.                 sHTML = RegReplace(sHTML, "(<[^>]+) STYLE=[^ |^>]+([^>]*>)", "$1 $2")
  34.                 sHTML = RegReplace(sHTML, "\sSTYLE\s*?=\s*?([\""|\'])(.*?)\1", "")
  35.             Case "XML"
  36.                 '// 去除XML<?xml>
  37.                 sHTML = RegReplace(sHTML, "<\\?XML[^>]*>", "")
  38.             Case "NAMESPACE"
  39.                 '// 去除命名空间<o:p></o:p>
  40.                 sHTML = RegReplace(sHTML, "<\/?[a-z]+:[^>]*>", "")
  41.             Case "FONT"
  42.                 '// 去除字体<font></font>
  43.                 sHTML = RegReplace(sHTML, "</?FONT[^>]*>", "")
  44.             Case "MARQUEE"
  45.                 '// 去除字幕<marquee></marquee>
  46.                 sHTML = RegReplace(sHTML, "</?MARQUEE[^>]*>", "")
  47.             Case "OBJECT"
  48.                 '// 去除对象<object><param><embed></object>
  49.                 sHTML = RegReplace(sHTML, "</?OBJECT[^>]*>", "")
  50.                 sHTML = RegReplace(sHTML, "</?PARAM[^>]*>", "")
  51.                 sHTML = RegReplace(sHTML, "</?EMBED[^>]*>", "")
  52.             Case "COMMENT"
  53.                 '// 去除HTML注释, 会处理<script>和<style>内注释, 慎用
  54.                 sHTML = RegReplace(sHTML, "<!--[\s\S]*?-->", "")
  55.             Case Else
  56.                 '// 去除其它标签
  57.                 sHTML = RegReplace(sHTML, "</?" & aFilters(i) & "[^>]*?>", "")
  58.         End Select
  59.     Next
  60.     HTMLFilter = sHTML
  61. End Function
复制代码

过滤全部html

复制内容到剪贴板 程序代码

<\/*[^<>]*>


过滤 style

复制内容到剪贴板 程序代码

(<style)+[^<>]*>[^\0]*(<\/style>)+


过滤 层 div

复制内容到剪贴板 程序代码

<(\/){0,1}div[^<>]*>


过滤 链接 a :

复制内容到剪贴板 程序代码

<(\/){0,1}a[^<>]*>


过滤 字体 font

复制内容到剪贴板 程序代码

<(\/){0,1}font[^<>]*>


过滤 span 

复制内容到剪贴板 程序代码

<(\/){0,1}span[^<>]*>


过滤 object 

复制内容到剪贴板 程序代码

<object.*?/object>

过滤 iframe

复制内容到剪贴板 程序代码

(<iframe){1,}[^<>]*>[^\0]*(<\/iframe>){1,}

过滤 script:

复制内容到剪贴板 程序代码

(<script){1,}[^<>]*>[^\0]*(<\/script>){1,}


过滤 Class 

复制内容到剪贴板 程序代码

(class=){1,}(""|\'){0,1}\S+(""|\'|>|\s){0,1}过滤 style 和 strong

复制内容到剪贴板 程序代码

<(style|strong)[^>]*>|<\/(style|strong)>

过滤 img

复制内容到剪贴板 程序代码

<(img)[^>]*>|<\/(img)>


过滤 table tr td 等

复制内容到剪贴板 程序代码

<(table|tbody|tr|td|th)[^>]*>|<\/(table|tbody|tr|td|th)>


过滤

复制内容到剪贴板 程序代码

<(div|blockquote|fieldset|legend)[^>]*>|<\/(div|blockquote|fieldset|legend)>


过滤


复制内容到剪贴板 程序代码

<(font|i|u|h[1-9]|s)[^>]*>|<\/(font|i|u|h[1-9]|s)>


过滤


复制内容到剪贴板 程序代码

<(style|strong)[^>]*>|<\/(style|strong)>


过滤

复制内容到剪贴板 程序代码

<a[^>]*>|<\/a>


过滤

复制内容到剪贴板 程序代码

<(meta|iframe|frame|span|tbody|layer)[^>]*>|<\/(iframe|frame|meta|span|tbody|layer)>


过滤

复制内容到剪贴板 程序代码

<br[^>]*

posted on 2011-12-23 17:23  辣鸡  阅读(222)  评论(0编辑  收藏  举报