清除word格式的技巧
//过滤html,输入参数:
//html:要转换的数据字符串,filter:要过滤掉的单个格式
function DecodeFilter(html, filter)
{
switch(filter.toUpperCase())
{
case "SCRIPT": // 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,...
html = execRE("</?script[^>]*>", "", html);
html = execRE("(javascript|jscript|vbscript|vbs):", "$1:", html);
html = execRE("on(mouse|exit|error|click|key)", "<I>on$1</I>", html);
html = execRE("&#", "<I>&#</I>", html);
break;
case "TABLE": // 去除表格<table><tr><td><th>
html = execRE("</?table[^>]*>", "", html);
html = execRE("</?tr[^>]*>", "", html);
html = execRE("</?th[^>]*>", "", html);
html = execRE("</?td[^>]*>", "", html);
break;
case "CLASS": // 去除样式类class=""
html = execRE("(<[^>]+) class=[^ |^>]*([^>]*>)", "$1 $2", html) ;
break;
case "STYLE": // 去除样式style=""
html = execRE("(<[^>]+) style=\"[^\"]*\"([^>]*>)", "$1 $2", html);
break;
case "XML": // 去除XML<?xml>
html = execRE("<\\?xml[^>]*>", "", html);
break;
case "NAMESPACE": // 去除命名空间<o:p></o:p>
html = execRE("<\/?[a-z]+:[^>]*>", "", html);
break;
case "FONT": // 去除字体<font></font>
html = execRE("</?font[^>]*>", "", html);
break;
case "MARQUEE": // 去除字幕<marquee></marquee>
html = execRE("</?marquee[^>]*>", "", html);
break;
case "OBJECT": // 去除对象<object><param><embed></object>
html = execRE("</?object[^>]*>", "", html);
html = execRE("</?param[^>]*>", "", html);
html = execRE("</?embed[^>]*>", "", html);
break;
default:
}
return html;
}
// 执行正则表达式替换
function execRE(re, rp, content)
{
var oReg = new RegExp(re, "ig");
var r = content.replace(oReg, rp);
return r;
}