diff --git a/composer.json b/composer.json index 775763f58a..c53bbebfff 100644 --- a/composer.json +++ b/composer.json @@ -120,7 +120,8 @@ "autoload-dev": { "psr-4": { "PhpOffice\\PhpSpreadsheetTests\\": "tests/PhpSpreadsheetTests", - "PhpOffice\\PhpSpreadsheetInfra\\": "infra" + "PhpOffice\\PhpSpreadsheetInfra\\": "infra", + "PhpOffice\\PhpSpreadsheetBenchmarks\\": "tests/Benchmark" } } } diff --git a/phpunit.xml.dist b/phpunit.xml.dist index d3e845c1ea..5c9ef847dc 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,13 +1,18 @@ - + - - ./tests/PhpSpreadsheetTests - + + + ./tests/PhpSpreadsheetTests + + + ./tests/Benchmark + + ./src diff --git a/src/PhpSpreadsheet/Cell/Cell.php b/src/PhpSpreadsheet/Cell/Cell.php index fd0a608b04..8c39d81a7d 100644 --- a/src/PhpSpreadsheet/Cell/Cell.php +++ b/src/PhpSpreadsheet/Cell/Cell.php @@ -1010,6 +1010,19 @@ public function setXfIndex(int $indexValue): self return $this->updateInCollection(); } + /** + * Set the XF index without triggering updateInCollection(). + * + * This is intended for use by readers that will immediately follow with + * setValueExplicit(), avoiding a redundant cache write. + * + * @internal + */ + public function setXfIndexNoUpdate(int $indexValue): void + { + $this->xfIndex = $indexValue; + } + /** * Set the formula attributes. * diff --git a/src/PhpSpreadsheet/Reader/Xls.php b/src/PhpSpreadsheet/Reader/Xls.php index fb639e16d0..74bd4ebfdd 100644 --- a/src/PhpSpreadsheet/Reader/Xls.php +++ b/src/PhpSpreadsheet/Reader/Xls.php @@ -99,6 +99,18 @@ class Xls extends XlsBase */ protected Worksheet $phpSheet; + /** + * Cached sheet title for the current sheet being parsed. + * Avoids repeated getTitle() calls in per-cell read filter checks. + */ + protected string $phpSheetTitle = ''; + + /** + * Cached read filter reference. + * Avoids repeated getReadFilter() calls in per-cell read filter checks. + */ + protected IReadFilter $cachedReadFilter; + /** * BIFF version. */ @@ -1944,12 +1956,11 @@ protected function readSst(): void $nm = self::getInt4d($recordData, 4); $pos += 4; - // look up limit position - foreach ($spliceOffsets as $spliceOffset) { - // it can happen that the string is empty, therefore we need - // <= and not just < - if ($pos <= $spliceOffset) { - $limitposSST = $spliceOffset; + // look up limit position (last splice offset where pos fits) + $spliceCount = count($spliceOffsets); + for ($si = 0; $si < $spliceCount; ++$si) { + if ($pos <= $spliceOffsets[$si]) { + $limitposSST = $spliceOffsets[$si]; } } @@ -1992,13 +2003,11 @@ protected function readSst(): void // expected byte length of character array if not split $len = ($isCompressed) ? $numChars : $numChars * 2; - // look up limit position - Check it again to be sure that no error occurs when parsing SST structure + // look up limit position - find the first splice offset at or beyond current pos $limitpos = null; - foreach ($spliceOffsets as $spliceOffset) { - // it can happen that the string is empty, therefore we need - // <= and not just < - if ($pos <= $spliceOffset) { - $limitpos = $spliceOffset; + for ($si = 0; $si < $spliceCount; ++$si) { + if ($pos <= $spliceOffsets[$si]) { + $limitpos = $spliceOffsets[$si]; break; } @@ -2025,10 +2034,10 @@ protected function readSst(): void // keep reading the characters while ($charsLeft > 0) { - // look up next limit position, in case the string span more than one continue record - foreach ($spliceOffsets as $spliceOffset) { - if ($pos < $spliceOffset) { - $limitpos = $spliceOffset; + // look up next limit position, in case the string spans more than one continue record + for ($si = 0; $si < $spliceCount; ++$si) { + if ($pos < $spliceOffsets[$si]) { + $limitpos = $spliceOffsets[$si]; break; } @@ -2060,22 +2069,16 @@ protected function readSst(): void } elseif (!$isCompressed && ($option == 0)) { // 1st fragment uncompressed // this fragment compressed - $len = min($charsLeft, $limitpos - $pos); - for ($j = 0; $j < $len; ++$j) { - $retstr .= $recordData[$pos + $j] - . chr(0); - } + $len = (int) min($charsLeft, $limitpos - $pos); + // Pad each byte with a null byte to expand to UTF-16LE + $retstr .= chunk_split(substr($recordData, $pos, $len), 1, "\x00"); $charsLeft -= $len; $isCompressed = false; } else { // 1st fragment compressed // this fragment uncompressed - $newstr = ''; - $jMax = strlen($retstr); - for ($j = 0; $j < $jMax; ++$j) { - $newstr .= $retstr[$j] . chr(0); - } - $retstr = $newstr; + // Pad existing compressed string bytes with null bytes + $retstr = chunk_split($retstr, 1, "\x00"); /** @var int */ $len = min($charsLeft * 2, $limitpos - $pos); $retstr .= substr($recordData, $pos, $len); @@ -2726,9 +2729,10 @@ protected function readRk(): void // offset: 2; size: 2; index to column $column = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($column + 1); + $cellCoordinate = $columnString . ($row + 1); // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $row + 1, $this->phpSheetTitle)) { // offset: 4; size: 2; index to XF record $xfIndex = self::getUInt2d($recordData, 4); @@ -2736,10 +2740,10 @@ protected function readRk(): void $rknum = self::getInt4d($recordData, 6); $numValue = self::getIEEE754($rknum); - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); + $cell = $this->phpSheet->getCell($cellCoordinate); if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { // add style information - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } // add cell @@ -2770,22 +2774,28 @@ protected function readLabelSst(): void // offset: 2; size: 2; index to column $column = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($column + 1); + $cellCoordinate = $columnString . ($row + 1); - $cell = null; // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $row + 1, $this->phpSheetTitle)) { // offset: 4; size: 2; index to XF record $xfIndex = self::getUInt2d($recordData, 4); // offset: 6; size: 4; index to SST record $index = self::getInt4d($recordData, 6); + // cache SST entry locally to avoid repeated array lookups + $sstValue = $this->sst[$index]['value']; + $fmtRuns = $this->sst[$index]['fmtRuns']; + // add cell - if (($fmtRuns = $this->sst[$index]['fmtRuns']) && !$this->readDataOnly) { + if ($fmtRuns && !$this->readDataOnly) { // then we should treat as rich text $richText = new RichText(); $charPos = 0; - $sstCount = count($this->sst[$index]['fmtRuns']); + $sstCount = count($fmtRuns); + $sstValueLength = StringHelper::countCharacters($sstValue); + $lastFontIndex = count($this->objFonts) - 1; for ($i = 0; $i <= $sstCount; ++$i) { /** @var mixed[][] $fmtRuns */ if (isset($fmtRuns[$i])) { @@ -2793,10 +2803,10 @@ protected function readLabelSst(): void $temp = $fmtRuns[$i]; $temp = $temp['charPos']; /** @var int $charPos */ - $text = StringHelper::substring($this->sst[$index]['value'], $charPos, $temp - $charPos); + $text = StringHelper::substring($sstValue, $charPos, $temp - $charPos); $charPos = $temp; } else { - $text = StringHelper::substring($this->sst[$index]['value'], $charPos, StringHelper::countCharacters($this->sst[$index]['value'])); + $text = StringHelper::substring($sstValue, $charPos, $sstValueLength); } if (StringHelper::countCharacters($text) > 0) { @@ -2815,8 +2825,8 @@ protected function readLabelSst(): void $temp = $fmtRuns[$i - 1]['fontIndex']; $fontIndex = $temp - 1; } - if (array_key_exists($fontIndex, $this->objFonts) === false) { - $fontIndex = count($this->objFonts) - 1; + if ($fontIndex > $lastFontIndex) { + $fontIndex = $lastFontIndex; } $textRun->setFont(clone $this->objFonts[$fontIndex]); } @@ -2824,20 +2834,21 @@ protected function readLabelSst(): void } } if ($this->readEmptyCells || trim($richText->getPlainText()) !== '') { - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); + $cell = $this->phpSheet->getCell($cellCoordinate); + if (isset($this->mapCellXfIndex[$xfIndex])) { + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); + } $cell->setValueExplicit($richText, DataType::TYPE_STRING); } } else { - if ($this->readEmptyCells || trim($this->sst[$index]['value']) !== '') { - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); - $cell->setValueExplicit($this->sst[$index]['value'], DataType::TYPE_STRING); + if ($this->readEmptyCells || trim($sstValue) !== '') { + $cell = $this->phpSheet->getCell($cellCoordinate); + if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); + } + $cell->setValueExplicit($sstValue, DataType::TYPE_STRING); } } - - if (!$this->readDataOnly && $cell !== null && isset($this->mapCellXfIndex[$xfIndex])) { - // add style information - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); - } } } @@ -2870,20 +2881,21 @@ protected function readMulRk(): void // offset within record data $offset = 4; + $rowIndex = $row + 1; for ($i = 1; $i <= $columns; ++$i) { $columnString = Coordinate::stringFromColumnIndex($colFirst + $i); // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $rowIndex, $this->phpSheetTitle)) { // offset: var; size: 2; index to XF record $xfIndex = self::getUInt2d($recordData, $offset); // offset: var; size: 4; RK value $numValue = self::getIEEE754(self::getInt4d($recordData, $offset + 2)); - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); + $cell = $this->phpSheet->getCell($columnString . $rowIndex); if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { // add style - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } // add cell value @@ -2916,18 +2928,19 @@ protected function readNumber(): void // offset: 2; size 2; index to column $column = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($column + 1); + $cellCoordinate = $columnString . ($row + 1); // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $row + 1, $this->phpSheetTitle)) { // offset 4; size: 2; index to XF record $xfIndex = self::getUInt2d($recordData, 4); $numValue = self::extractNumber(substr($recordData, 6, 8)); - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); + $cell = $this->phpSheet->getCell($cellCoordinate); if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { // add cell style - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } // add cell value @@ -2957,6 +2970,7 @@ protected function readFormula(): void // offset: 2; size: 2; col index $column = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($column + 1); + $cellCoordinate = $columnString . ($row + 1); // offset: 20: size: variable; formula structure $formulaStructure = substr($recordData, 20); @@ -2984,10 +2998,10 @@ protected function readFormula(): void } // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $row + 1, $this->phpSheetTitle)) { if ($isPartOfSharedFormula) { // formula is added to this cell after the sheet has been read - $this->sharedFormulaParts[$columnString . ($row + 1)] = $this->baseCell; + $this->sharedFormulaParts[$cellCoordinate] = $this->baseCell; } // offset: 16: size: 4; not used @@ -2996,7 +3010,9 @@ protected function readFormula(): void $xfIndex = self::getUInt2d($recordData, 4); // offset: 6; size: 8; result of the formula - if ((ord($recordData[6]) == 0) && (ord($recordData[12]) == 255) && (ord($recordData[13]) == 255)) { + $resultType = ord($recordData[6]); + $isSpecialResult = (ord($recordData[12]) == 255) && (ord($recordData[13]) == 255); + if (($resultType == 0) && $isSpecialResult) { // String formula. Result follows in appended STRING record $dataType = DataType::TYPE_STRING; @@ -3008,27 +3024,15 @@ protected function readFormula(): void // read STRING record $value = $this->readString(); - } elseif ( - (ord($recordData[6]) == 1) - && (ord($recordData[12]) == 255) - && (ord($recordData[13]) == 255) - ) { + } elseif (($resultType == 1) && $isSpecialResult) { // Boolean formula. Result is in +2; 0=false, 1=true $dataType = DataType::TYPE_BOOL; $value = (bool) ord($recordData[8]); - } elseif ( - (ord($recordData[6]) == 2) - && (ord($recordData[12]) == 255) - && (ord($recordData[13]) == 255) - ) { + } elseif (($resultType == 2) && $isSpecialResult) { // Error formula. Error code is in +2 $dataType = DataType::TYPE_ERROR; $value = Xls\ErrorCode::lookup(ord($recordData[8])); - } elseif ( - (ord($recordData[6]) == 3) - && (ord($recordData[12]) == 255) - && (ord($recordData[13]) == 255) - ) { + } elseif (($resultType == 3) && $isSpecialResult) { // Formula result is a null string $dataType = DataType::TYPE_NULL; $value = ''; @@ -3038,10 +3042,10 @@ protected function readFormula(): void $value = self::extractNumber(substr($recordData, 6, 8)); } - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); + $cell = $this->phpSheet->getCell($cellCoordinate); if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { // add cell style - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } // store the formula @@ -3148,9 +3152,10 @@ protected function readBoolErr(): void // offset: 2; size: 2; column index $column = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($column + 1); + $cellCoordinate = $columnString . ($row + 1); // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $row + 1, $this->phpSheetTitle)) { // offset: 4; size: 2; index to XF record $xfIndex = self::getUInt2d($recordData, 4); @@ -3160,7 +3165,11 @@ protected function readBoolErr(): void // offset: 7; size: 1; 0=boolean; 1=error $isError = ord($recordData[7]); - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); + $cell = $this->phpSheet->getCell($cellCoordinate); + if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { + // add cell style + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); + } switch ($isError) { case 0: // boolean $value = (bool) $boolErr; @@ -3177,11 +3186,6 @@ protected function readBoolErr(): void break; } - - if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { - // add cell style - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); - } } } @@ -3210,14 +3214,15 @@ protected function readMulBlank(): void // offset: 4; size: 2 x nc; list of indexes to XF records // add style information if (!$this->readDataOnly && $this->readEmptyCells) { + $rowIndex = $row + 1; for ($i = 0; $i < $length / 2 - 3; ++$i) { $columnString = Coordinate::stringFromColumnIndex($fc + $i + 1); // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $rowIndex, $this->phpSheetTitle)) { $xfIndex = self::getUInt2d($recordData, 4 + 2 * $i); if (isset($this->mapCellXfIndex[$xfIndex])) { - $this->phpSheet->getCell($columnString . ($row + 1))->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $this->phpSheet->getCell($columnString . $rowIndex)->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } } } @@ -3250,9 +3255,10 @@ protected function readLabel(): void // offset: 2; size: 2; index to column $column = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($column + 1); + $cellCoordinate = $columnString . ($row + 1); // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $row + 1, $this->phpSheetTitle)) { // offset: 4; size: 2; XF index $xfIndex = self::getUInt2d($recordData, 4); @@ -3267,13 +3273,12 @@ protected function readLabel(): void } /** @var string $value */ if ($this->readEmptyCells || trim($value) !== '') { - $cell = $this->phpSheet->getCell($columnString . ($row + 1)); - $cell->setValueExplicit($value, DataType::TYPE_STRING); - + $cell = $this->phpSheet->getCell($cellCoordinate); if (!$this->readDataOnly && isset($this->mapCellXfIndex[$xfIndex])) { // add cell style - $cell->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $cell->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } + $cell->setValueExplicit($value, DataType::TYPE_STRING); } } } @@ -3296,14 +3301,16 @@ protected function readBlank(): void $col = self::getUInt2d($recordData, 2); $columnString = Coordinate::stringFromColumnIndex($col + 1); + $rowIndex = $row + 1; + // Read cell? - if ($this->getReadFilter()->readCell($columnString, $row + 1, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($columnString, $rowIndex, $this->phpSheetTitle)) { // offset: 4; size: 2; XF index $xfIndex = self::getUInt2d($recordData, 4); // add style information if (!$this->readDataOnly && $this->readEmptyCells && isset($this->mapCellXfIndex[$xfIndex])) { - $this->phpSheet->getCell($columnString . ($row + 1))->setXfIndex($this->mapCellXfIndex[$xfIndex]); + $this->phpSheet->getCell($columnString . $rowIndex)->setXfIndexNoUpdate($this->mapCellXfIndex[$xfIndex]); } } } @@ -3605,7 +3612,7 @@ private function includeCellRangeFiltered(string $cellRangeAddress): bool StringHelper::stringIncrement($rangeBoundaries[1][0]); for ($row = $rangeBoundaries[0][1]; $row <= $rangeBoundaries[1][1]; ++$row) { for ($column = $rangeBoundaries[0][0]; $column != $rangeBoundaries[1][0]; StringHelper::stringIncrement($column)) { - if ($this->getReadFilter()->readCell($column, $row, $this->phpSheet->getTitle())) { + if ($this->cachedReadFilter->readCell($column, $row, $this->phpSheetTitle)) { $includeCellRange = true; break 2; diff --git a/src/PhpSpreadsheet/Reader/Xls/LoadSpreadsheet.php b/src/PhpSpreadsheet/Reader/Xls/LoadSpreadsheet.php index 0cfe9b1a72..9640c30bd4 100644 --- a/src/PhpSpreadsheet/Reader/Xls/LoadSpreadsheet.php +++ b/src/PhpSpreadsheet/Reader/Xls/LoadSpreadsheet.php @@ -175,6 +175,8 @@ protected function loadSpreadsheetFromFile2(string $filename, Xls $xls): Spreads // name in line with the formula, not the reverse $xls->phpSheet->setTitle($sheet['name'], false, false); $xls->phpSheet->setSheetState($sheet['sheetState']); + $xls->phpSheetTitle = $sheet['name']; + $xls->cachedReadFilter = $xls->getReadFilter(); $xls->pos = $sheet['offset']; diff --git a/tests/Benchmark/XlsReaderBenchmarkTest.php b/tests/Benchmark/XlsReaderBenchmarkTest.php new file mode 100644 index 0000000000..f03f90e604 --- /dev/null +++ b/tests/Benchmark/XlsReaderBenchmarkTest.php @@ -0,0 +1,310 @@ +tempFiles as $file) { + if (file_exists($file)) { + unlink($file); + } + } + $this->tempFiles = []; + } + + /** + * Benchmark reading a set of real XLS test fixtures. + * + * Exercises the full reader path: SST parsing, cell reads with styles, + * formulas, rich text, conditional formatting, and data validation. + */ + public function testRealXlsFixtures(): void + { + $fixtures = [ + self::XLS_FIXTURES_DIR . '/biff8cover.xls', + self::XLS_FIXTURES_DIR . '/formulas.xls', + self::XLS_FIXTURES_DIR . '/RichTextFontSize.xls', + self::XLS_FIXTURES_DIR . '/Colours.xls', + self::XLS_FIXTURES_DIR . '/PageSetup.xls', + self::XLS_FIXTURES_DIR . '/DataValidation.xls', + ]; + + foreach ($fixtures as $fixture) { + self::assertFileExists($fixture, "Fixture missing: {$fixture}"); + } + + $iterations = 5; + $reader = new XlsReader(); + + // Warm up + $warmup = $reader->load($fixtures[0]); + $warmup->disconnectWorksheets(); + unset($warmup); + + gc_collect_cycles(); + $memBefore = memory_get_usage(true); + $start = hrtime(true); + + for ($iter = 0; $iter < $iterations; ++$iter) { + foreach ($fixtures as $fixture) { + $spreadsheet = $reader->load($fixture); + $spreadsheet->disconnectWorksheets(); + unset($spreadsheet); + } + } + + $end = hrtime(true); + $memAfter = memory_get_usage(true); + gc_collect_cycles(); + + $totalMs = ($end - $start) / 1_000_000; + $avgMs = $totalMs / $iterations; + $memDeltaMb = ($memAfter - $memBefore) / 1024 / 1024; + + fwrite(STDERR, "\n"); + fwrite(STDERR, sprintf("=== XLS Real Fixtures Benchmark (%d files, %d iterations) ===\n", count($fixtures), $iterations)); + fwrite(STDERR, sprintf(" PHP version: %s (%s)\n", PHP_VERSION, PHP_OS)); + fwrite(STDERR, sprintf(" Total: %.2f ms (%.2f ms avg per iteration)\n", $totalMs, $avgMs)); + fwrite(STDERR, sprintf(" Per file avg: %.2f ms\n", $avgMs / count($fixtures))); + fwrite(STDERR, sprintf(" Memory delta: %.2f MB\n", $memDeltaMb)); + fwrite(STDERR, "\n"); + + // Verify the reader still produces valid output + $spreadsheet = $reader->load($fixtures[0]); + self::assertGreaterThan(0, $spreadsheet->getSheetCount()); + $spreadsheet->disconnectWorksheets(); + } + + /** + * Benchmark reading a synthetic XLS file with many cells. + * + * Targets per-cell overhead: setXfIndexNoUpdate(), cached read filter, + * pre-computed cell coordinates. + */ + public function testSyntheticManyCells(): void + { + $xlsFile = $this->createSyntheticXls( + sheets: 3, + rowsPerSheet: 500, + columnsPerSheet: 8, + withStyles: true, + withFormulas: true + ); + + $iterations = 5; + $reader = new XlsReader(); + + // Warm up + $warmup = $reader->load($xlsFile); + $totalCells = 0; + foreach ($warmup->getWorksheetIterator() as $sheet) { + $totalCells += count($sheet->getCellCollection()->getCoordinates()); + } + $warmup->disconnectWorksheets(); + unset($warmup); + + gc_collect_cycles(); + $memBefore = memory_get_usage(true); + $start = hrtime(true); + + for ($iter = 0; $iter < $iterations; ++$iter) { + $spreadsheet = $reader->load($xlsFile); + $spreadsheet->disconnectWorksheets(); + unset($spreadsheet); + } + + $end = hrtime(true); + $memAfter = memory_get_usage(true); + gc_collect_cycles(); + + $totalMs = ($end - $start) / 1_000_000; + $avgMs = $totalMs / $iterations; + $memDeltaMb = ($memAfter - $memBefore) / 1024 / 1024; + + fwrite(STDERR, "\n"); + fwrite(STDERR, sprintf("=== XLS Synthetic Many-Cell Benchmark (%d cells, %d iterations) ===\n", $totalCells, $iterations)); + fwrite(STDERR, sprintf(" PHP version: %s (%s)\n", PHP_VERSION, PHP_OS)); + fwrite(STDERR, sprintf(" Total: %.2f ms (%.2f ms avg per iteration)\n", $totalMs, $avgMs)); + fwrite(STDERR, sprintf(" Memory delta: %.2f MB\n", $memDeltaMb)); + fwrite(STDERR, "\n"); + + // Verify correctness + $spreadsheet = $reader->load($xlsFile); + self::assertSame(3, $spreadsheet->getSheetCount()); + $firstSheet = $spreadsheet->getSheet(0); + self::assertNotNull($firstSheet->getCell('A1')->getValue()); + $spreadsheet->disconnectWorksheets(); + } + + /** + * Benchmark reading a synthetic XLS file heavy on shared strings (SST). + * + * Targets SST parsing: chunk_split optimization for compressed-to-uncompressed + * expansion and CONTINUE record handling. + */ + public function testSyntheticSstHeavy(): void + { + $xlsFile = $this->createSstHeavyXls(rows: 1000, columns: 10); + + $iterations = 5; + $reader = new XlsReader(); + + // Warm up + $warmup = $reader->load($xlsFile); + $totalCells = 0; + foreach ($warmup->getWorksheetIterator() as $sheet) { + $totalCells += count($sheet->getCellCollection()->getCoordinates()); + } + $warmup->disconnectWorksheets(); + unset($warmup); + + gc_collect_cycles(); + $memBefore = memory_get_usage(true); + $start = hrtime(true); + + for ($iter = 0; $iter < $iterations; ++$iter) { + $spreadsheet = $reader->load($xlsFile); + $spreadsheet->disconnectWorksheets(); + unset($spreadsheet); + } + + $end = hrtime(true); + $memAfter = memory_get_usage(true); + gc_collect_cycles(); + + $totalMs = ($end - $start) / 1_000_000; + $avgMs = $totalMs / $iterations; + $memDeltaMb = ($memAfter - $memBefore) / 1024 / 1024; + + fwrite(STDERR, "\n"); + fwrite(STDERR, sprintf("=== XLS SST-Heavy Benchmark (%d string cells, %d iterations) ===\n", $totalCells, $iterations)); + fwrite(STDERR, sprintf(" PHP version: %s (%s)\n", PHP_VERSION, PHP_OS)); + fwrite(STDERR, sprintf(" Total: %.2f ms (%.2f ms avg per iteration)\n", $totalMs, $avgMs)); + fwrite(STDERR, sprintf(" Memory delta: %.2f MB\n", $memDeltaMb)); + fwrite(STDERR, "\n"); + + // Verify correctness + $spreadsheet = $reader->load($xlsFile); + $sheet = $spreadsheet->getActiveSheet(); + self::assertIsString($sheet->getCell('A1')->getValue()); + self::assertGreaterThan(0, strlen((string) $sheet->getCell('A1')->getValue())); + $spreadsheet->disconnectWorksheets(); + } + + /** + * Create a synthetic XLS file with mixed data, styles, and formulas. + */ + private function createSyntheticXls( + int $sheets, + int $rowsPerSheet, + int $columnsPerSheet, + bool $withStyles, + bool $withFormulas, + ): string { + $spreadsheet = new Spreadsheet(); + $columns = array_map( + fn (int $i) => \PhpOffice\PhpSpreadsheet\Cell\Coordinate::stringFromColumnIndex($i), + range(1, $columnsPerSheet) + ); + + for ($s = 0; $s < $sheets; ++$s) { + $sheet = ($s === 0) ? $spreadsheet->getActiveSheet() : $spreadsheet->createSheet(); + $sheet->setTitle("Sheet{$s}"); + + for ($row = 1; $row <= $rowsPerSheet; ++$row) { + foreach ($columns as $colIdx => $col) { + if ($withFormulas && $colIdx === $columnsPerSheet - 1 && $row > 1) { + // Last column: SUM formula + $firstCol = $columns[0]; + $lastCol = $columns[$columnsPerSheet - 2]; + $sheet->getCell("{$col}{$row}") + ->setValue("=SUM({$firstCol}{$row}:{$lastCol}{$row})"); + } else { + $sheet->getCell("{$col}{$row}") + ->setValue($row * ($colIdx + 1) + $s * 1000); + } + } + + if ($withStyles && $row % 5 === 0) { + $sheet->getStyle("A{$row}")->applyFromArray([ + 'font' => ['bold' => true, 'color' => ['argb' => 'FF003366']], + 'fill' => [ + 'fillType' => Fill::FILL_SOLID, + 'startColor' => ['argb' => 'FFD9E2F3'], + ], + 'borders' => [ + 'bottom' => [ + 'borderStyle' => Border::BORDER_THIN, + 'color' => ['argb' => Color::COLOR_BLACK], + ], + ], + ]); + } + } + } + + $filename = File::temporaryFilename(); + $this->tempFiles[] = $filename; + $writer = new XlsWriter($spreadsheet); + $writer->save($filename); + $spreadsheet->disconnectWorksheets(); + + return $filename; + } + + /** + * Create a synthetic XLS file with many distinct string values + * to exercise SST (Shared String Table) parsing. + */ + private function createSstHeavyXls(int $rows, int $columns): string + { + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + + $colLetters = array_map( + fn (int $i) => \PhpOffice\PhpSpreadsheet\Cell\Coordinate::stringFromColumnIndex($i), + range(1, $columns) + ); + + for ($row = 1; $row <= $rows; ++$row) { + foreach ($colLetters as $colIdx => $col) { + // Use varied-length strings with enough uniqueness to populate the SST + $sheet->getCell("{$col}{$row}") + ->setValue("StringValue_R{$row}_C{$colIdx}_" . str_repeat('x', ($row + $colIdx) % 20 + 5)); + } + } + + $filename = File::temporaryFilename(); + $this->tempFiles[] = $filename; + $writer = new XlsWriter($spreadsheet); + $writer->save($filename); + $spreadsheet->disconnectWorksheets(); + + return $filename; + } +}