博客园 首页 私信博主 显示目录 隐藏目录 管理 动画

使用phpword 获取word内容

use PhpOffice\PhpWord\IOFactory;


   //获取文件名

   function get_fileName($file_path){
       //1、先获取带文件部分
       $file_base_name = basename($file_path);
       //2、查找截取即可
       $f_name = substr($file_base_name,0,strrpos($file_base_name,'.'));
       return $f_name;

   }

   //解析word内容并返回html
   function wordParsing($source)
   {
       //加载word文件 并 通过getSections获取word文档的全部元素
       $sections = IOFactory::load($source)->getSections();

       //定义html变量用于存储word文本内容
       $html = '';

       //循环所有元素
       foreach ($sections as $section) {

           //获取当前元素的所有子元素
           $elements = $section->getElements();

           //循环当前子元素
           foreach ($elements as $eky => $evl) {
               $html .= '<p>';
               if ($evl instanceof \PhpOffice\PhpWord\Element\TextRun) { //判断是否普通文本

                   $content_elements = $evl->getElements();
                   foreach ($content_elements as $eky2 => $evl2) {
                       $html .= elementHandler($evl2, $evl);
                   }

               } elseif ($evl instanceof \PhpOffice\PhpWord\Element\PreserveText) { //判断是否保留元素(如自动生成链接的网址元素)
                   $data = $evl->getText();
                   $find = array('{', 'HYPERLINK', '}', ' ', '"', 'f', 'g');
                   $replace = '';
                   $resText = str_replace($find, $replace, $data);
                   if (isset($resText)) {
                       $html .= $resText[0];
                   }
               } elseif ($evl instanceof \PhpOffice\PhpWord\Element\Table) {
                   $all_table_elements = $evl->getRows();
                   $html .= '<table style="margin:0;padding:0;border-collapse:collapse;border-spacing:0;" >';
                   foreach ($all_table_elements as $tky => $tvl) {
                       $html .= '<tr style="padding:0">';
                       $all_table_cells = $tvl->getCells();
                       foreach ($all_table_cells as $cky => $cvl) {
                           $cell_elements = $cvl->getElements();

                           //获取表格宽度(返回单位为:缇)
                           $td_width = $cvl->getWidth();
                           $td_width_px = round($cvl->getWidth() / 15, 0);

                           $html .= '<td style="border: 1px solid #777777;padding:2px 5px;width:' . $td_width_px . '">';
                           foreach ($cell_elements as $cl) {

                               //判断当存在elements属性时执行
                               if (property_exists($cl, 'elements')) {
                                   $content_elements = $cl->getElements();
                                   foreach ($content_elements as $eky2 => $evl2) {
                                       $html .= elementHandler($evl2, $cl);
                                   }
                               }

                           }
                           $html .= '</td>';
                       }
                       $html .= '</tr>';
                   }
                   $html .= '</table>';
               }
               $html .= '</p>';
           }
           return $html;

       }
   }

   //元素内容数据处理,$end_element最末级元素,是$parent_element的子元素;$parent_element为当前元素
   function elementHandler($end_element, $parent_element)
   {
       $html = '';
       if ($end_element instanceof \PhpOffice\PhpWord\Element\Text) { //判断是否普通文本

           $style = $end_element->getFontStyle();
           //$fontFamily = mb_convert_encoding($style->getName(), 'GBK', 'UTF-8');
           $fontFamily = $style->getName();
           $fontSize = $style->getSize() ? ($style->getSize() / 72) * 96 : '';
           $isBold = $style->isBold();
           $fontcolor = $style->getColor();
           $styleString = '';
           $fontFamily && $styleString .= "font-family:{$fontFamily};";
           $fontSize && $styleString .= "font-size:{$fontSize}px;";
           $isBold && $styleString .= "font-weight:bold;";
           $fontcolor && $styleString .= "color:{$fontcolor};";
           $html .= sprintf('<span style="%s">%s</span>',
               $styleString, $end_element->getText()
           //mb_convert_encoding($evl2->getText(), 'GBK', 'UTF-8')
           );//dump($end_element->getText());

       } elseif ($end_element instanceof \PhpOffice\PhpWord\Element\Link) {  //判断是否链接
           $style = $end_element->getFontStyle();
           //$fontFamily = mb_convert_encoding($style->getName(), 'GBK', 'UTF-8');
           $fontFamily = $style->getName();
           $fontSize = $style->getSize() ? ($style->getSize() / 72) * 96 : '';
           $isBold = $style->isBold();
           $fontcolor = $style->getColor();
           $styleString = '';
           $fontFamily && $styleString .= "font-family:{$fontFamily};";
           $fontSize && $styleString .= "font-size:{$fontSize}px;";
           $isBold && $styleString .= "font-weight:bold;";
           $fontcolor && $styleString .= "color:{$fontcolor};";
           $html .= sprintf('<a href="%s" style="%s">%s</a>',
               $end_element->getSource(), $styleString, $end_element->getText()
           //mb_convert_encoding($evl2->getText(), 'GBK', 'UTF-8')
           );

       } elseif ($end_element instanceof \PhpOffice\PhpWord\Element\Image) { //判断是否图片
           //可以在这里执行自定义方法将图片上传到OSS或者图片服务器哈
           $imageDataTmp = $end_element->getImageStringData(true);
           $imageType = $end_element->getImageType() ? $end_element->getImageType() : 'image/jpg';
           $imageData = 'data:' . $imageType . ';base64,' . str_replace(array("\r\n", "\r", "\n"), "", $imageDataTmp);

           // 创建777权限
           if(!is_dir('uploads/docx/image/')){
               $flag = mkdir('uploads/docx/image/',0777,true);
           }

           //保存文件
           $imageSrc = 'uploads/docx/image/' . md5($end_element->getSource()) . '.' . $end_element->getImageExtension();
           file_put_contents($imageSrc,base64_decode(explode(',',$imageData)[1]));
           $html .= '<img src="' . \think\Config::get('default_url').$imageSrc . '" style="width:100%;height:auto">';
       }
       return $html;
   }


安装phpword
composer require phpoffice/phpword

posted @   八月情  阅读(349)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 清华大学推出第四讲使用 DeepSeek + DeepResearch 让科研像聊天一样简单!
· 推荐几款开源且免费的 .NET MAUI 组件库
· 实操Deepseek接入个人知识库
· 易语言 —— 开山篇
· Trae初体验
点击右上角即可分享
微信分享提示