PhpSpreadsheet 读取Excel 中XLS / XLSX测试代码

<?php

namespace App\Http\Controllers\Home;

use Illuminate\Http\Request;
use App\Http\Controllers\Controller;
use Log;
use Illuminate\Support\Facades\Redis;
use PhpOffice\PhpSpreadsheet\IOFactory;


use App\Services\ReadRowFilter;
use App\Services\ChunkReadFilter;

class ExcelController extends Controller
{
//
public function read_row()
{
// ini_set('max_execution_time','0');
// ini_set('memory_limit', '1G');
$start = '====开始执行内存:'.(memory_get_usage()/1024/1024)."MB\r\n";
$start .= '==开始峰值内存:'.(memory_get_peak_usage()/1024/1024)."MB\r\n";
Log::debug($start);
$file = storage_path('excel_file') . '/' . '1w.xls';
if (!file_exists($file)) {
dd('文件不存在!');
}
$deadrowfilter = new ReadRowFilter();
// $reader = new \PhpOffice\PhpSpreadsheet\Reader\Xlsx();
$reader = new \PhpOffice\PhpSpreadsheet\Reader\Xls();
$reader->setLoadSheetsOnly('Sheet1');
$reader->setReadFilter($deadrowfilter);
$spreadsheet = $reader->load("$file");
$sheet = $spreadsheet->getSheet(0); // 读取第一個工作表
$highestRow = $sheet->getHighestRow(); // 取得总行数
$highestColumm = $sheet->getHighestColumn(); // 取得总列数
dd($highestRow);

$end = '===结束执行内存:'.(memory_get_usage()/1024/1024)."MB\r\n"; // 返回分配给 PHP 的内存量
$end .= '==结束峰值内存:'.(memory_get_peak_usage()/1024/1024)."MB\r\n"; // 返回分配给 PHP 内存的峰值
Log::debug($end);
dd($spreadsheet);
}

public function read_chunk()
{
ini_set('max_execution_time','0');
ini_set('memory_limit', '1G');

$memory = '====开始执行内存:'.(memory_get_usage()/1024/1024)."MB ";
$memory .= '开始峰值内存====:'.(memory_get_peak_usage()/1024/1024)."MB\r\n";
Log::debug($memory);

$inputFileName = storage_path('excel_file') . '/' . '1w.xlsx';
Log::debug('文件路径:' . $inputFileName);
if (!file_exists($inputFileName)) {
Log::debug('文件不存在!');
}
$file_size = (filesize($inputFileName) / 1024 / 1024) . "MB";
if ($file_size) {
Log::debug('Excel file size: '.$file_size);
}

/** Identify the type of $inputFileName **/
$inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($inputFileName);
/** Create a new Reader of the type defined in $inputFileType **/
$reader = \PhpOffice\PhpSpreadsheet\IOFactory::createReader($inputFileType);
$worksheetNames = $reader->listWorksheetNames($inputFileName);

$memory = '当前内存-1:'.(memory_get_usage()/1024/1024)."MB";
$memory .= ' 当前峰值内存-1:'.(memory_get_peak_usage()/1024/1024)."MB";
Log::debug($memory);

/** Define how many rows we want to read for each "chunk" **/
$chunkSize = 10;
Log::debug('[chunk size]: '.$chunkSize);
/** Create a new Instance of our Read Filter **/
$chunkFilter = new ChunkReadFilter();
/** Tell the Reader that we want to use the Read Filter **/
$reader->setLoadSheetsOnly('Sheet1');

/** Loop to read our worksheet in "chunk size" blocks **/
$startRow = 2;
$excel_sheet_field = [];
$excel_sheet_data = [];
$excel_highest_row = 0;

do{
$end = $startRow + $chunkSize;
Log::debug('第[' . $startRow . '--' . $end .'] 开始:');
/** Tell the Read Filter which rows we want this iteration **/
$chunkFilter->setRows($startRow, $chunkSize);
$reader->setReadFilter($chunkFilter);
/** Load only the rows that match our filter **/
$spreadsheet = $reader->load($inputFileName);

$memory = '[当前内存]:'.(memory_get_usage()/1024/1024)."MB";
$memory .= ' [当前峰值内存]:'.(memory_get_peak_usage()/1024/1024)."MB";
Log::debug($memory);

$sheetData = $spreadsheet->getActiveSheet()->toArray();

if ($startRow == 2) {
// get all row count.
$excel_highest_row = $chunkFilter->lastRow;
// get sheet field
$excel_sheet_field = $sheetData[0];
}
$spreadsheet->disconnectWorksheets();
unset($spreadsheet);
if (strtolower($inputFileType) == 'xls') {
$sheetData = array_slice($sheetData,($startRow - 1),$chunkSize);
}else{
array_shift($sheetData);
$sheetData = array_slice($sheetData, -$chunkSize, $chunkSize);
// dd($sheetData);
}
Log::debug('本次共遍历行数:'. count($sheetData));
$excel_sheet_data[] = $sheetData;
unset($sheetData);

Log::debug('第[' .$startRow . '--' . $end .'] 结束, Excel总行数:' . $excel_highest_row);
$memory = '[结束执行内存]:'.(memory_get_usage()/1024/1024)."MB";
$memory .= ' [结束峰值内存]:'.(memory_get_peak_usage()/1024/1024)."MB";
Log::debug($memory);
unset($memory);

$startRow = $end;
// } while ($end <= $excel_highest_row);
} while ($end <= 50);

$memory = '【全部结束执行内存:'.(memory_get_usage()/1024/1024)."MB";
$memory .= ' 全部结束峰值内存】:'.(memory_get_peak_usage()/1024/1024)."MB";
Log::debug($memory);

dd($excel_sheet_data);
$row_index = 0;
foreach ($excel_sheet_data as $rows)
{
foreach ($rows as $row)
{
// $row = array_combine($excel_sheet_field ,$row);
Log::debug('row_read_parames : $row ' . ',' . $row_index);
$row_index ++;
}
}

// dd($excel_sheet_data);
// $acount_row = count($excel_sheet_data);
// Log::debug($excel_sheet_data);
// Log::debug('All rows:' . $acount_row);
Log::debug('All end!');







/*
for ($startRow = 2; $startRow <= 65536; $startRow += $chunkSize) {
Log::debug(date('Y-m-d H:i:s') . '_' .$startRow . ' 开始:');
// Tell the Read Filter which rows we want this iteration
$chunkFilter->setRows($startRow,$chunkSize);
// Load only the rows that match our filter
$spreadsheet = $reader->load($inputFileName);
// Do some processing here
$sheetData = $spreadsheet->setActiveSheetIndexByName('Sheet1')->toArray();

$start = '【结束执行内存:'.(memory_get_usage()/1024/1024)."MB\r\n";
$start .= '结束峰值内存】:'.(memory_get_peak_usage()/1024/1024)."MB\r\n";
Log::debug($start);

$write_json = storage_path('json') . '/' . date('Ymd_His_') . $startRow . '.json';
file_put_contents("$write_json", json_encode($sheetData));
$spreadsheet->disconnectWorksheets();
unset($spreadsheet);

Log::debug( date('Y-m-d H:i:s') . '_' .$startRow . ' 结束:');
unset($sheetData);
}
*/
}






// 普通正常按单元格读取
public function index()
{
ini_set('memory_limit', '1G');
ini_set('max_execution_time','0');

$file_path = storage_path('excel_file') . '/' . '1w.xls';
Log::debug($file_path);
if (!file_exists($file_path)) {
Log::debug('Excel file not found: '.$file_path);
}
$file_size = (filesize($file_path) / 1024 / 1024) . "MB";
if ($file_size) {
Log::debug('Excel file size: '.$file_size);
}

$monery = '=== 【开始内存:'.(memory_get_usage()/1024/1024)." MB";
$monery .= '峰值内存:'.(memory_get_peak_usage()/1024/1024)." MB】 ===";
Log::debug($monery);

$spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::createReaderForFile($file_path);
$spreadsheet->setReadDataOnly(TRUE);
$excel_data = $spreadsheet->load($file_path);

$str11 = '执行内存:'.(memory_get_usage()/1024/1024)."MB";
$str11 .= '峰值内存:'.(memory_get_peak_usage()/1024/1024)."MB\r\n";
Log::debug($str11);

// 打印所有Sheet表
$sheet_list = $spreadsheet->listWorksheetNames($file_path);
$sheet = $excel_data->getSheet(0);
// $sheet = $excel_data->getSheetByName('Sheet1');
$highestRow = $sheet->getHighestRow(); // e.g. 10
$highestColumm = $sheet->getHighestColumn(); // e.g 'F'

$excel_lines = $highestRow - 1;
if ($excel_lines <= 0) {
Log::debug('Excel file data empty.');
}

$excel_sheet_result = [];
$highestColumnIndex = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::columnIndexFromString($highestColumm);
$excel_sheet_field = [];
for ($row = 1; $row <= $highestRow; ++$row) {
// set each row data array.
$excel_sheet_row_data = [];
Log::debug('Start looping excel row: '. $row);
for ($col = 1; $col <= $highestColumnIndex; ++$col) {
// get each cell by Column And Row index.
$value = $sheet->getCellByColumnAndRow($col, $row)->getValue();
$excel_sheet_row_data[] = ($row == 1 ? trim($value) : $value);
}

if ($row == 1) {
$excel_sheet_field = $excel_sheet_row_data;
}
$excel_sheet_result[] = array_combine($excel_sheet_field, $excel_sheet_row_data);
}
array_shift($excel_sheet_result);
$rows = $excel_sheet_result;
foreach ($rows as $row_index => $row) {
Log::debug("Start read row :".$row_index);
}
dd($rows);
Log::debug('end!');


}




/*
NULL, // Value that should be returned for empty cells
TRUE, // Should formulas be calculated (the equivalent of getCalculatedValue() for each cell)
TRUE, // Should values be formatted (the equivalent of getFormattedValue() for each cell)
TRUE // Should the array be indexed by cell row and cell column
*/


}

--------------------

<?php

/**
* Created by PhpStorm.
* Date: 2019/7/1
* Time: 11:56
*/

namespace App\Services;
use Log;
use PhpOffice\PhpSpreadsheet\Reader\IReadFilter;

class ChunkReadFilter implements IReadFilter
{
private $startRow = 0;
private $endRow = 0;
// self set add $lastRow
public $lastRow = '';

/** Set the list of rows that we want to read */
public function setRows($startRow, $chunkSize) {
$this->startRow = $startRow;
$this->endRow = $startRow + $chunkSize;
}

public function readCell($column, $row, $worksheetName = '') {
// Only read the heading row, and the configured rows
$this->lastRow = $row;
if (($row == 1) || ($row >= $this->startRow && $row < $this->endRow) ) {
return true;
}
return false;
}
}

posted on   春分夏至  阅读(54)  评论(0编辑  收藏  举报

(评论功能已被禁用)
编辑推荐:
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示