Merge pull request #145 from box/speed_up

Various speed improvements
This commit is contained in:
Adrien Loison 2015-11-12 14:09:52 -08:00
commit 90cbb7b5a6
7 changed files with 65 additions and 40 deletions

View File

@ -12,6 +12,14 @@ use Box\Spout\Common\Exception\InvalidArgumentException;
*/ */
class CellHelper class CellHelper
{ {
// Using ord() is super slow... Using a pre-computed hash table instead.
private static $columnLetterToIndexMapping = [
'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6,
'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13,
'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20,
'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25,
];
/** /**
* Fills the missing indexes of an array with a given value. * Fills the missing indexes of an array with a given value.
* For instance, $dataArray = []; $a[1] = 1; $a[3] = 3; * For instance, $dataArray = []; $a[1] = 1; $a[3] = 3;
@ -50,34 +58,31 @@ class CellHelper
} }
$columnIndex = 0; $columnIndex = 0;
$capitalAAsciiValue = ord('A');
$capitalZAsciiValue = ord('Z');
$step = $capitalZAsciiValue - $capitalAAsciiValue + 1;
// Remove row information // Remove row information
$column = preg_replace('/\d/', '', $cellIndex); $columnLetters = preg_replace('/\d/', '', $cellIndex);
$columnLength = strlen($column);
/* // strlen() is super slow too... Using isset() is way faster and not too unreadable,
* This is how the following loop will process the data: // since we checked before that there are between 1 and 3 letters.
* A => 0 $columnLength = isset($columnLetters[1]) ? (isset($columnLetters[2]) ? 3 : 2) : 1;
* Z => 25
* AA => 26 : (26^(2-1) * (0+1)) + 0
* AB => 27 : (26^(2-1) * (0+1)) + 1
* BC => 54 : (26^(2-1) * (1+1)) + 2
* BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25
*/
foreach (str_split($column) as $single_cell_index)
{
$currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue;
if ($columnLength === 1) { // Looping over the different letters of the column is slower than this method.
$columnIndex += $currentColumnIndex; // Also, not using the pow() function because it's slooooow...
} else { switch ($columnLength) {
$columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1); case 1:
} $columnIndex = (self::$columnLetterToIndexMapping[$columnLetters]);
break;
$columnLength--; case 2:
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26;
$secondLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[1]];
$columnIndex = $firstLetterIndex + $secondLetterIndex;
break;
case 3:
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676;
$secondLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26;
$thirdLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[2]];
$columnIndex = $firstLetterIndex + $secondLetterIndex + $thirdLetterIndex;
break;
} }
return $columnIndex; return $columnIndex;
@ -86,12 +91,13 @@ class CellHelper
/** /**
* Returns whether a cell index is valid, in an Excel world. * Returns whether a cell index is valid, in an Excel world.
* To be valid, the cell index should start with capital letters and be followed by numbers. * To be valid, the cell index should start with capital letters and be followed by numbers.
* There can only be 3 letters, as there can only be 16,384 rows, which is equivalent to 'XFE'.
* *
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
* @return bool * @return bool
*/ */
protected static function isValidCellIndex($cellIndex) protected static function isValidCellIndex($cellIndex)
{ {
return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1); return (preg_match('/^[A-Z]{1,3}\d+$/', $cellIndex) === 1);
} }
} }

View File

@ -143,7 +143,9 @@ class FileBasedStrategy implements CachingStrategyInterface
} }
$sharedString = null; $sharedString = null;
if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) {
// Using isset here because it is way faster than array_key_exists...
if (isset($this->inMemoryTempFileContents[$indexInFile])) {
$escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile];
$sharedString = $this->unescapeLineFeed($escapedSharedString); $sharedString = $this->unescapeLineFeed($escapedSharedString);
} }

View File

@ -148,7 +148,8 @@ class StyleHelper
// Default style (0) does not format numeric values as timestamps. Only custom styles do. // Default style (0) does not format numeric values as timestamps. Only custom styles do.
// Also if the style ID does not exist in the styles.xml file, format as numeric value. // Also if the style ID does not exist in the styles.xml file, format as numeric value.
if ($styleId === self::DEFAULT_STYLE_ID || !array_key_exists($styleId, $stylesAttributes)) { // Using isset here because it is way faster than array_key_exists...
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
return false; return false;
} }
@ -193,7 +194,8 @@ class StyleHelper
{ {
$customNumberFormats = $this->getCustomNumberFormats(); $customNumberFormats = $this->getCustomNumberFormats();
if (!array_key_exists($numFmtId, $customNumberFormats)) { // Using isset here because it is way faster than array_key_exists...
if (!isset($customNumberFormats[$numFmtId])) {
return false; return false;
} }

View File

@ -67,7 +67,8 @@ class SheetIterator implements IteratorInterface
*/ */
public function next() public function next()
{ {
if (array_key_exists($this->currentSheetIndex, $this->sheets)) { // Using isset here because it is way faster than array_key_exists...
if (isset($this->sheets[$this->currentSheetIndex])) {
$currentSheet = $this->sheets[$this->currentSheetIndex]; $currentSheet = $this->sheets[$this->currentSheetIndex];
$currentSheet->getRowIterator()->end(); $currentSheet->getRowIterator()->end();

View File

@ -56,7 +56,9 @@ abstract class AbstractStyleHelper
protected function hasStyleAlreadyBeenRegistered($style) protected function hasStyleAlreadyBeenRegistered($style)
{ {
$serializedStyle = $style->serialize(); $serializedStyle = $style->serialize();
return array_key_exists($serializedStyle, $this->serializedStyleToStyleIdMappingTable);
// Using isset here because it is way faster than array_key_exists...
return isset($this->serializedStyleToStyleIdMappingTable[$serializedStyle]);
} }
/** /**

View File

@ -10,6 +10,9 @@ namespace Box\Spout\Writer\Common\Helper;
*/ */
class CellHelper class CellHelper
{ {
/** @var array Cache containing the mapping column index => cell index */
private static $columnIndexToCellIndexCache = [];
/** /**
* Returns the cell index (base 26) associated to the base 10 column index. * Returns the cell index (base 26) associated to the base 10 column index.
* Excel uses A to Z letters for column indexing, where A is the 1st column, * Excel uses A to Z letters for column indexing, where A is the 1st column,
@ -21,6 +24,10 @@ class CellHelper
*/ */
public static function getCellIndexFromColumnIndex($columnIndex) public static function getCellIndexFromColumnIndex($columnIndex)
{ {
$originalColumnIndex = $columnIndex;
// Using isset here because it is way faster than array_key_exists...
if (!isset(self::$columnIndexToCellIndexCache[$originalColumnIndex])) {
$cellIndex = ''; $cellIndex = '';
$capitalAAsciiValue = ord('A'); $capitalAAsciiValue = ord('A');
@ -30,9 +37,13 @@ class CellHelper
// substracting 1 because it's zero-based // substracting 1 because it's zero-based
$columnIndex = intval($columnIndex / 26) - 1; $columnIndex = intval($columnIndex / 26) - 1;
} while ($columnIndex >= 0); } while ($columnIndex >= 0);
return $cellIndex; self::$columnIndexToCellIndexCache[$originalColumnIndex] = $cellIndex;
}
return self::$columnIndexToCellIndexCache[$originalColumnIndex];
} }
/** /**

View File

@ -146,7 +146,8 @@ class Worksheet implements WorksheetInterface
for ($i = 0; $i < $cellsCount; $i++) { for ($i = 0; $i < $cellsCount; $i++) {
$currentCellValue = $dataRow[$currentCellIndex]; $currentCellValue = $dataRow[$currentCellIndex];
if (!array_key_exists($nextCellIndex, $dataRow) || $currentCellValue !== $dataRow[$nextCellIndex]) { // Using isset here because it is way faster than array_key_exists...
if (!isset($dataRow[$nextCellIndex]) || $currentCellValue !== $dataRow[$nextCellIndex]) {
$numTimesValueRepeated = ($nextCellIndex - $currentCellIndex); $numTimesValueRepeated = ($nextCellIndex - $currentCellIndex);
$data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated); $data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated);