[php-dom] php-dom使用注意事项
/* 注意事项: 1. 在loadHTML之前,应该将内容转义为UTF-8编码的,这样子避免出现entity等等的报错; 2. 已经使用了php函数htmlspecialchars()转换的html实体,再经过DOM解析转换后,会被直接还原为标签,如:<br/> 在dom解析之后,直接被还原为 "<br/>"; */
$doc = new DOMDocument('1.0' , 'UTF-8'); //var_dump($doc); libXml_use_internal_errors(true); $doc->loadHTML(mb_convert_encoding($content , 'HTML-ENTITIES', 'UTF-8')); $node = $doc->getElementsByTagName('div'); $arr_return = array(); // 只发了一条动态的情况还没有考虑清楚 for($c = 0; $c<$node->length; $c++){ $arr_return[$c]['time'] = $node->item($c)->getAttribute('hnb-time'); $arr_return[$c]['ftime'] = date('H:i' , $arr_return[$c]['time']); $arr_return[$c]['nation'] = $node->item($c)->getAttribute('hnb-nation'); $arr_return[$c]['nation_info'] = Hnb_Model_Tag::getInstance()->getCndNationalInfoByID($node->item($c)->getAttribute('hnb-nation')); $p_nodes = $node->item($c)->getElementsByTagName('p'); //$doc->encoding = 'UTF-8'; //echo iconv("UTF-8", "GB18030//TRANSLIT", $dom->saveXML($n) ); //$arr_return[$c]['content'] = iconv("UTF-8", "UTF-8", $doc->saveXML($p_nodes->item(0))); //var_dump($p_nodes->item(0)); // 默认将第一个p节点作为内容来处理 $arr_return[$c]['content'] = $doc->saveXML($p_nodes->item(0)); $arr_return[$c]['raw_content'] = $p_nodes->item(0)->textContent; $arr_img_list = []; for($p = 1; $p<$p_nodes->length; $p++){ $img = $doc->saveXML($p_nodes->item($p)); if(preg_match('/jpg|png|gif|jpeg/i' , $img)) { $arr_img_list[] = $img; } } $arr_return[$c]['img_list'] = $arr_img_list; } return $arr_return;
posted on 2015-12-28 16:39 smelikecat 阅读(301) 评论(0) 编辑 收藏 举报