Merge pull request #145 from box/speed_up

Various speed improvements
This commit is contained in:
Adrien Loison 2015-11-12 14:09:52 -08:00
commit 90cbb7b5a6
7 changed files with 65 additions and 40 deletions

View File

@ -12,6 +12,14 @@ use Box\Spout\Common\Exception\InvalidArgumentException;
*/
class CellHelper
{
// Using ord() is super slow... Using a pre-computed hash table instead.
private static $columnLetterToIndexMapping = [
'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6,
'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13,
'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20,
'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25,
];
/**
* Fills the missing indexes of an array with a given value.
* For instance, $dataArray = []; $a[1] = 1; $a[3] = 3;
@ -50,34 +58,31 @@ class CellHelper
}
$columnIndex = 0;
$capitalAAsciiValue = ord('A');
$capitalZAsciiValue = ord('Z');
$step = $capitalZAsciiValue - $capitalAAsciiValue + 1;
// Remove row information
$column = preg_replace('/\d/', '', $cellIndex);
$columnLength = strlen($column);
$columnLetters = preg_replace('/\d/', '', $cellIndex);
/*
* This is how the following loop will process the data:
* A => 0
* Z => 25
* AA => 26 : (26^(2-1) * (0+1)) + 0
* AB => 27 : (26^(2-1) * (0+1)) + 1
* BC => 54 : (26^(2-1) * (1+1)) + 2
* BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25
*/
foreach (str_split($column) as $single_cell_index)
{
$currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue;
// strlen() is super slow too... Using isset() is way faster and not too unreadable,
// since we checked before that there are between 1 and 3 letters.
$columnLength = isset($columnLetters[1]) ? (isset($columnLetters[2]) ? 3 : 2) : 1;
if ($columnLength === 1) {
$columnIndex += $currentColumnIndex;
} else {
$columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1);
}
$columnLength--;
// Looping over the different letters of the column is slower than this method.
// Also, not using the pow() function because it's slooooow...
switch ($columnLength) {
case 1:
$columnIndex = (self::$columnLetterToIndexMapping[$columnLetters]);
break;
case 2:
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26;
$secondLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[1]];
$columnIndex = $firstLetterIndex + $secondLetterIndex;
break;
case 3:
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676;
$secondLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26;
$thirdLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[2]];
$columnIndex = $firstLetterIndex + $secondLetterIndex + $thirdLetterIndex;
break;
}
return $columnIndex;
@ -86,12 +91,13 @@ class CellHelper
/**
* Returns whether a cell index is valid, in an Excel world.
* To be valid, the cell index should start with capital letters and be followed by numbers.
* There can only be 3 letters, as there can only be 16,384 rows, which is equivalent to 'XFE'.
*
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
* @return bool
*/
protected static function isValidCellIndex($cellIndex)
{
return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1);
return (preg_match('/^[A-Z]{1,3}\d+$/', $cellIndex) === 1);
}
}

View File

@ -143,7 +143,9 @@ class FileBasedStrategy implements CachingStrategyInterface
}
$sharedString = null;
if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) {
// Using isset here because it is way faster than array_key_exists...
if (isset($this->inMemoryTempFileContents[$indexInFile])) {
$escapedSharedString = $this->inMemoryTempFileContents[$indexInFile];
$sharedString = $this->unescapeLineFeed($escapedSharedString);
}

View File

@ -148,7 +148,8 @@ class StyleHelper
// Default style (0) does not format numeric values as timestamps. Only custom styles do.
// Also if the style ID does not exist in the styles.xml file, format as numeric value.
if ($styleId === self::DEFAULT_STYLE_ID || !array_key_exists($styleId, $stylesAttributes)) {
// Using isset here because it is way faster than array_key_exists...
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
return false;
}
@ -193,7 +194,8 @@ class StyleHelper
{
$customNumberFormats = $this->getCustomNumberFormats();
if (!array_key_exists($numFmtId, $customNumberFormats)) {
// Using isset here because it is way faster than array_key_exists...
if (!isset($customNumberFormats[$numFmtId])) {
return false;
}

View File

@ -67,7 +67,8 @@ class SheetIterator implements IteratorInterface
*/
public function next()
{
if (array_key_exists($this->currentSheetIndex, $this->sheets)) {
// Using isset here because it is way faster than array_key_exists...
if (isset($this->sheets[$this->currentSheetIndex])) {
$currentSheet = $this->sheets[$this->currentSheetIndex];
$currentSheet->getRowIterator()->end();

View File

@ -56,7 +56,9 @@ abstract class AbstractStyleHelper
protected function hasStyleAlreadyBeenRegistered($style)
{
$serializedStyle = $style->serialize();
return array_key_exists($serializedStyle, $this->serializedStyleToStyleIdMappingTable);
// Using isset here because it is way faster than array_key_exists...
return isset($this->serializedStyleToStyleIdMappingTable[$serializedStyle]);
}
/**

View File

@ -10,6 +10,9 @@ namespace Box\Spout\Writer\Common\Helper;
*/
class CellHelper
{
/** @var array Cache containing the mapping column index => cell index */
private static $columnIndexToCellIndexCache = [];
/**
* Returns the cell index (base 26) associated to the base 10 column index.
* Excel uses A to Z letters for column indexing, where A is the 1st column,
@ -21,18 +24,26 @@ class CellHelper
*/
public static function getCellIndexFromColumnIndex($columnIndex)
{
$cellIndex = '';
$capitalAAsciiValue = ord('A');
$originalColumnIndex = $columnIndex;
do {
$modulus = $columnIndex % 26;
$cellIndex = chr($capitalAAsciiValue + $modulus) . $cellIndex;
// Using isset here because it is way faster than array_key_exists...
if (!isset(self::$columnIndexToCellIndexCache[$originalColumnIndex])) {
$cellIndex = '';
$capitalAAsciiValue = ord('A');
// substracting 1 because it's zero-based
$columnIndex = intval($columnIndex / 26) - 1;
} while ($columnIndex >= 0);
do {
$modulus = $columnIndex % 26;
$cellIndex = chr($capitalAAsciiValue + $modulus) . $cellIndex;
return $cellIndex;
// substracting 1 because it's zero-based
$columnIndex = intval($columnIndex / 26) - 1;
} while ($columnIndex >= 0);
self::$columnIndexToCellIndexCache[$originalColumnIndex] = $cellIndex;
}
return self::$columnIndexToCellIndexCache[$originalColumnIndex];
}
/**

View File

@ -146,7 +146,8 @@ class Worksheet implements WorksheetInterface
for ($i = 0; $i < $cellsCount; $i++) {
$currentCellValue = $dataRow[$currentCellIndex];
if (!array_key_exists($nextCellIndex, $dataRow) || $currentCellValue !== $dataRow[$nextCellIndex]) {
// Using isset here because it is way faster than array_key_exists...
if (!isset($dataRow[$nextCellIndex]) || $currentCellValue !== $dataRow[$nextCellIndex]) {
$numTimesValueRepeated = ($nextCellIndex - $currentCellIndex);
$data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated);