From f55520661e5b85a5c9b83f60c3058085ced25c9f Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Thu, 12 Nov 2015 13:55:25 -0800 Subject: [PATCH] Various speed improvements --- src/Spout/Reader/XLSX/Helper/CellHelper.php | 56 ++++++++++--------- .../FileBasedStrategy.php | 4 +- src/Spout/Reader/XLSX/Helper/StyleHelper.php | 6 +- src/Spout/Reader/XLSX/SheetIterator.php | 3 +- .../Common/Helper/AbstractStyleHelper.php | 4 +- src/Spout/Writer/Common/Helper/CellHelper.php | 29 +++++++--- src/Spout/Writer/ODS/Internal/Worksheet.php | 3 +- 7 files changed, 65 insertions(+), 40 deletions(-) diff --git a/src/Spout/Reader/XLSX/Helper/CellHelper.php b/src/Spout/Reader/XLSX/Helper/CellHelper.php index b3ac982..c2f21f7 100644 --- a/src/Spout/Reader/XLSX/Helper/CellHelper.php +++ b/src/Spout/Reader/XLSX/Helper/CellHelper.php @@ -12,6 +12,14 @@ use Box\Spout\Common\Exception\InvalidArgumentException; */ class CellHelper { + // Using ord() is super slow... Using a pre-computed hash table instead. + private static $columnLetterToIndexMapping = [ + 'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6, + 'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13, + 'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20, + 'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25, + ]; + /** * Fills the missing indexes of an array with a given value. * For instance, $dataArray = []; $a[1] = 1; $a[3] = 3; @@ -50,34 +58,31 @@ class CellHelper } $columnIndex = 0; - $capitalAAsciiValue = ord('A'); - $capitalZAsciiValue = ord('Z'); - $step = $capitalZAsciiValue - $capitalAAsciiValue + 1; // Remove row information - $column = preg_replace('/\d/', '', $cellIndex); - $columnLength = strlen($column); + $columnLetters = preg_replace('/\d/', '', $cellIndex); - /* - * This is how the following loop will process the data: - * A => 0 - * Z => 25 - * AA => 26 : (26^(2-1) * (0+1)) + 0 - * AB => 27 : (26^(2-1) * (0+1)) + 1 - * BC => 54 : (26^(2-1) * (1+1)) + 2 - * BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25 - */ - foreach (str_split($column) as $single_cell_index) - { - $currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue; + // strlen() is super slow too... Using isset() is way faster and not too unreadable, + // since we checked before that there are between 1 and 3 letters. + $columnLength = isset($columnLetters[1]) ? (isset($columnLetters[2]) ? 3 : 2) : 1; - if ($columnLength === 1) { - $columnIndex += $currentColumnIndex; - } else { - $columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1); - } - - $columnLength--; + // Looping over the different letters of the column is slower than this method. + // Also, not using the pow() function because it's slooooow... + switch ($columnLength) { + case 1: + $columnIndex = (self::$columnLetterToIndexMapping[$columnLetters]); + break; + case 2: + $firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26; + $secondLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[1]]; + $columnIndex = $firstLetterIndex + $secondLetterIndex; + break; + case 3: + $firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676; + $secondLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26; + $thirdLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[2]]; + $columnIndex = $firstLetterIndex + $secondLetterIndex + $thirdLetterIndex; + break; } return $columnIndex; @@ -86,12 +91,13 @@ class CellHelper /** * Returns whether a cell index is valid, in an Excel world. * To be valid, the cell index should start with capital letters and be followed by numbers. + * There can only be 3 letters, as there can only be 16,384 rows, which is equivalent to 'XFE'. * * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) * @return bool */ protected static function isValidCellIndex($cellIndex) { - return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1); + return (preg_match('/^[A-Z]{1,3}\d+$/', $cellIndex) === 1); } } diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php index 9f1f19f..b4e699f 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php @@ -143,7 +143,9 @@ class FileBasedStrategy implements CachingStrategyInterface } $sharedString = null; - if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { + + // Using isset here because it is way faster than array_key_exists... + if (isset($this->inMemoryTempFileContents[$indexInFile])) { $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; $sharedString = $this->unescapeLineFeed($escapedSharedString); } diff --git a/src/Spout/Reader/XLSX/Helper/StyleHelper.php b/src/Spout/Reader/XLSX/Helper/StyleHelper.php index 52d1844..403d647 100644 --- a/src/Spout/Reader/XLSX/Helper/StyleHelper.php +++ b/src/Spout/Reader/XLSX/Helper/StyleHelper.php @@ -148,7 +148,8 @@ class StyleHelper // Default style (0) does not format numeric values as timestamps. Only custom styles do. // Also if the style ID does not exist in the styles.xml file, format as numeric value. - if ($styleId === self::DEFAULT_STYLE_ID || !array_key_exists($styleId, $stylesAttributes)) { + // Using isset here because it is way faster than array_key_exists... + if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) { return false; } @@ -193,7 +194,8 @@ class StyleHelper { $customNumberFormats = $this->getCustomNumberFormats(); - if (!array_key_exists($numFmtId, $customNumberFormats)) { + // Using isset here because it is way faster than array_key_exists... + if (!isset($customNumberFormats[$numFmtId])) { return false; } diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php index db034f0..7b3d3dd 100644 --- a/src/Spout/Reader/XLSX/SheetIterator.php +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -67,7 +67,8 @@ class SheetIterator implements IteratorInterface */ public function next() { - if (array_key_exists($this->currentSheetIndex, $this->sheets)) { + // Using isset here because it is way faster than array_key_exists... + if (isset($this->sheets[$this->currentSheetIndex])) { $currentSheet = $this->sheets[$this->currentSheetIndex]; $currentSheet->getRowIterator()->end(); diff --git a/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php b/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php index 70ee8c9..7cf0eed 100644 --- a/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php +++ b/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php @@ -56,7 +56,9 @@ abstract class AbstractStyleHelper protected function hasStyleAlreadyBeenRegistered($style) { $serializedStyle = $style->serialize(); - return array_key_exists($serializedStyle, $this->serializedStyleToStyleIdMappingTable); + + // Using isset here because it is way faster than array_key_exists... + return isset($this->serializedStyleToStyleIdMappingTable[$serializedStyle]); } /** diff --git a/src/Spout/Writer/Common/Helper/CellHelper.php b/src/Spout/Writer/Common/Helper/CellHelper.php index 2349437..b32e9ac 100644 --- a/src/Spout/Writer/Common/Helper/CellHelper.php +++ b/src/Spout/Writer/Common/Helper/CellHelper.php @@ -10,6 +10,9 @@ namespace Box\Spout\Writer\Common\Helper; */ class CellHelper { + /** @var array Cache containing the mapping column index => cell index */ + private static $columnIndexToCellIndexCache = []; + /** * Returns the cell index (base 26) associated to the base 10 column index. * Excel uses A to Z letters for column indexing, where A is the 1st column, @@ -21,18 +24,26 @@ class CellHelper */ public static function getCellIndexFromColumnIndex($columnIndex) { - $cellIndex = ''; - $capitalAAsciiValue = ord('A'); + $originalColumnIndex = $columnIndex; - do { - $modulus = $columnIndex % 26; - $cellIndex = chr($capitalAAsciiValue + $modulus) . $cellIndex; + // Using isset here because it is way faster than array_key_exists... + if (!isset(self::$columnIndexToCellIndexCache[$originalColumnIndex])) { + $cellIndex = ''; + $capitalAAsciiValue = ord('A'); - // substracting 1 because it's zero-based - $columnIndex = intval($columnIndex / 26) - 1; - } while ($columnIndex >= 0); + do { + $modulus = $columnIndex % 26; + $cellIndex = chr($capitalAAsciiValue + $modulus) . $cellIndex; - return $cellIndex; + // substracting 1 because it's zero-based + $columnIndex = intval($columnIndex / 26) - 1; + + } while ($columnIndex >= 0); + + self::$columnIndexToCellIndexCache[$originalColumnIndex] = $cellIndex; + } + + return self::$columnIndexToCellIndexCache[$originalColumnIndex]; } /** diff --git a/src/Spout/Writer/ODS/Internal/Worksheet.php b/src/Spout/Writer/ODS/Internal/Worksheet.php index 19305f0..3a65726 100644 --- a/src/Spout/Writer/ODS/Internal/Worksheet.php +++ b/src/Spout/Writer/ODS/Internal/Worksheet.php @@ -146,7 +146,8 @@ class Worksheet implements WorksheetInterface for ($i = 0; $i < $cellsCount; $i++) { $currentCellValue = $dataRow[$currentCellIndex]; - if (!array_key_exists($nextCellIndex, $dataRow) || $currentCellValue !== $dataRow[$nextCellIndex]) { + // Using isset here because it is way faster than array_key_exists... + if (!isset($dataRow[$nextCellIndex]) || $currentCellValue !== $dataRow[$nextCellIndex]) { $numTimesValueRepeated = ($nextCellIndex - $currentCellIndex); $data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated);