textContent 与 innerText
innerText是IE的私有实现,但也被除FF之外的浏览器所实现,textContent 则是w3c的标准API,现在IE9也实现了。
它们区别只要有两点
- innerText不能返回script标签里面的源码,textContent则可以,在不支持textContent的浏览器,我们可以使用text与innerHTML代替。
- textContent会保留空行与空格与换行符,innerText则只会保留换行符。
为了屏蔽两者的差异,外国人写了以下脚本:
//http://clubajax.org/plain-text-vs-innertext-vs-textcontent/ //http://d.hatena.ne.jp/cou929_la/20110517/1305644081 getPlainText = function(node){ // used for testing: //return node.innerText || node.textContent; var normalize = function(a){ // clean up double line breaks and spaces if(!a) return ""; return a.replace(/ +/g, " ") .replace(/[\t]+/gm, "") .replace(/[ ]+$/gm, "") .replace(/^[ ]+/gm, "") .replace(/\n+/g, "\n") .replace(/\n+$/, "") .replace(/^\n+/, "") .replace(/\nNEWLINE\n/g, "\n\n") .replace(/NEWLINE\n/g, "\n\n"); // IE } var removeWhiteSpace = function(node){ // getting rid of empty text nodes var isWhite = function(node) { return !(/[^\t\n\r ]/.test(node.nodeValue)); } var ws = []; var findWhite = function(node){ for(var i=0; i<node.childNodes.length;i++){ var n = node.childNodes[i]; if (n.nodeType==3 && isWhite(n)){ ws.push(n) }else if(n.hasChildNodes()){ findWhite(n); } } } findWhite(node); for(var i=0;i<ws.length;i++){ ws[i].parentNode.removeChild(ws[i]) } } var sty = function(n, prop){ // Get the style of the node. // Assumptions are made here based on tagName. if(n.style[prop]) return n.style[prop]; var s = n.currentStyle || n.ownerDocument.defaultView.getComputedStyle(n, null); if(n.tagName == "SCRIPT") return "none"; if(!s[prop]) return "LI,P,TR".indexOf(n.tagName) > -1 ? "block" : n.style[prop]; if(s[prop] =="block" && n.tagName=="TD") return "feaux-inline"; return s[prop]; } var blockTypeNodes = "table-row,block,list-item"; var isBlock = function(n){ // diaply:block or something else var s = sty(n, "display") || "feaux-inline"; if(blockTypeNodes.indexOf(s) > -1) return true; return false; } var recurse = function(n){ // Loop through all the child nodes // and collect the text, noting whether // spaces or line breaks are needed. if(/pre/.test(sty(n, "whiteSpace"))) { t += n.innerHTML .replace(/\t/g, " ") .replace(/\n/g, " "); // to match IE return ""; } var s = sty(n, "display"); if(s == "none") return ""; var gap = isBlock(n) ? "\n" : " "; t += gap; for(var i=0; i<n.childNodes.length;i++){ var c = n.childNodes[i]; if(c.nodeType == 3) t += c.nodeValue; if(c.childNodes.length) recurse(c); } t += gap; return t; } // Use a copy because stuff gets changed node = node.cloneNode(true); // Line breaks aren't picked up by textContent node.innerHTML = node.innerHTML.replace(/<br>/g, "\n"); // Double line breaks after P tags are desired, but would get // stripped by the final RegExp. Using placeholder text. var paras = node.getElementsByTagName("p"); for(var i=0; i<paras.length;i++){ paras[i].innerHTML += "NEWLINE"; } var t = ""; removeWhiteSpace(node); // Make the call! return normalize(recurse(node)); }
但于拥有多层嵌套关系的父元素来说,对其进行如何复杂的操作无疑是吃力不讨好,因此许多框架都无视之!
下面是我的框架元素文本的操作函数
//by 司徒正美 text:function(value){ var node = this[0]; if(value === void 0){ if(!node){ return "" }else if(node.tagName == "OPTION" || node.tagName === "SCRIPT"){ return node.text; }else{ return node.textContent || node.innerText || dom.getText([ node ]); } }else{ return this.empty().append( (node && node.ownerDocument || DOC).createTextNode( value )); } },
机器瞎学/数据掩埋/模式混淆/人工智障/深度遗忘/神经掉线/计算机幻觉/专注单身二十五年