diff --git a/src/Spout/Reader/Common/XMLProcessor.php b/src/Spout/Reader/Common/XMLProcessor.php new file mode 100644 index 0000000..d8a1da8 --- /dev/null +++ b/src/Spout/Reader/Common/XMLProcessor.php @@ -0,0 +1,152 @@ +xmlReader = $xmlReader; + } + + /** + * @param string $nodeName A callback may be triggered when a node with this name is read + * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] + * @param callable $callback Callback to execute when the read node has the given name and type + * @return XMLProcessor + */ + public function registerCallback($nodeName, $nodeType, $callback) + { + $callbackKey = $this->getCallbackKey($nodeName, $nodeType); + $this->callbacks[$callbackKey] = $this->getInvokableCallbackData($callback); + + return $this; + } + + /** + * @param string $nodeName Name of the node + * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] + * @return string Key used to store the associated callback + */ + private function getCallbackKey($nodeName, $nodeType) + { + return "$nodeName$nodeType"; + } + + /** + * Because the callback can be a "protected" function, we don't want to use call_user_func() directly + * but instead invoke the callback using Reflection. This allows the invocation of "protected" functions. + * Since some functions can be called a lot, we pre-process the callback to only return the elements that + * will be needed to invoke the callback later. + * + * @param callable $callback Array reference to a callback: [OBJECT, METHOD_NAME] + * @return array Associative array containing the elements needed to invoke the callback using Reflection + */ + private function getInvokableCallbackData($callback) + { + $callbackObject = $callback[0]; + $callbackMethodName = $callback[1]; + $reflectionMethod = new \ReflectionMethod(get_class($callbackObject), $callbackMethodName); + $reflectionMethod->setAccessible(true); + + return [ + self::CALLBACK_REFLECTION_METHOD => $reflectionMethod, + self::CALLBACK_REFLECTION_OBJECT => $callbackObject, + ]; + } + + /** + * Resumes the reading of the XML file where it was left off. + * Stops whenever a callback indicates that reading should stop or at the end of the file. + * + * @return void + * @throws \Box\Spout\Reader\Exception\XMLProcessingException + */ + public function readUntilStopped() + { + while ($this->xmlReader->read()) { + $nodeType = $this->xmlReader->nodeType; + $nodeNamePossiblyWithPrefix = $this->xmlReader->name; + $nodeNameWithoutPrefix = $this->xmlReader->localName; + + $callbackData = $this->getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType); + + if ($callbackData !== null) { + $callbackResponse = $this->invokeCallback($callbackData, [$this->xmlReader]); + + if ($callbackResponse === self::PROCESSING_STOP) { + // stop reading + break; + } + } + } + } + + /** + * @param string $nodeNamePossiblyWithPrefix Name of the node, possibly prefixed + * @param string $nodeNameWithoutPrefix Name of the same node, un-prefixed + * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] + * @return array|null Callback data to be used for execution when a node of the given name/type is read or NULL if none found + */ + private function getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType) + { + // With prefixed nodes, we should match if (by order of preference): + // 1. the callback was registered with the prefixed node name (e.g. "x:worksheet") + // 2. the callback was registered with the un-prefixed node name (e.g. "worksheet") + $callbackKeyForPossiblyPrefixedName = $this->getCallbackKey($nodeNamePossiblyWithPrefix, $nodeType); + $callbackKeyForUnPrefixedName = $this->getCallbackKey($nodeNameWithoutPrefix, $nodeType); + $hasPrefix = ($nodeNamePossiblyWithPrefix !== $nodeNameWithoutPrefix); + + $callbackKeyToUse = $callbackKeyForUnPrefixedName; + if ($hasPrefix && isset($this->callbacks[$callbackKeyForPossiblyPrefixedName])) { + $callbackKeyToUse = $callbackKeyForPossiblyPrefixedName; + } + + // Using isset here because it is way faster than array_key_exists... + return isset($this->callbacks[$callbackKeyToUse]) ? $this->callbacks[$callbackKeyToUse] : null; + } + + /** + * @param array $callbackData Associative array containing data to invoke the callback using Reflection + * @param array $args Arguments to pass to the callback + * @return int Callback response + */ + private function invokeCallback($callbackData, $args) + { + $reflectionMethod = $callbackData[self::CALLBACK_REFLECTION_METHOD]; + $callbackObject = $callbackData[self::CALLBACK_REFLECTION_OBJECT]; + + return $reflectionMethod->invokeArgs($callbackObject, $args); + } +} diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index cdda704..ca38e05 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -8,6 +8,7 @@ use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\ODS\Helper\CellValueFormatter; use Box\Spout\Reader\Wrapper\XMLReader; +use Box\Spout\Reader\Common\XMLProcessor; /** * Class RowIterator @@ -29,6 +30,9 @@ class RowIterator implements IteratorInterface /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; + /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ + protected $xmlProcessor; + /** @var bool Whether empty rows should be returned or skipped */ protected $shouldPreserveEmptyRows; @@ -38,6 +42,9 @@ class RowIterator implements IteratorInterface /** @var bool Whether the iterator has already been rewound once */ protected $hasAlreadyBeenRewound = false; + /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */ + protected $currentlyProcessedRowData = []; + /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ protected $rowDataBuffer = null; @@ -72,6 +79,13 @@ class RowIterator implements IteratorInterface $this->xmlReader = $xmlReader; $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); $this->cellValueFormatter = new CellValueFormatter($options->shouldFormatDates()); + + // Register all callbacks to process different nodes when reading the XML file + $this->xmlProcessor = new XMLProcessor($this->xmlReader); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']); } /** @@ -122,7 +136,7 @@ class RowIterator implements IteratorInterface public function next() { if ($this->doesNeedDataForNextRowToBeProcessed()) { - $this->readDataForNextRow($this->xmlReader); + $this->readDataForNextRow(); } $this->lastRowIndexProcessed++; @@ -148,54 +162,26 @@ class RowIterator implements IteratorInterface } /** - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object * @return void * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ - protected function readDataForNextRow($xmlReader) + protected function readDataForNextRow() { - $rowData = []; + $this->currentlyProcessedRowData = []; try { - while ($xmlReader->read()) { - if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { - $this->processRowStartingNode($xmlReader); - - } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - $rowData = $this->processCellStartingNode($xmlReader, $rowData); - - } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue); - - // if the fetched row is empty and we don't want to preserve it... - if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { - // ... skip it - continue; - } - - $rowData = $this->processRowEndingNode($rowData, $isEmptyRow); - - // at this point, we have all the data we need for the row - // so that we can populate the buffer - break; - - } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { - $this->processTableEndingNode(); - break; - } - } - + $this->xmlProcessor->readUntilStopped(); } catch (XMLProcessingException $exception) { throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $rowData; + $this->rowDataBuffer = $this->currentlyProcessedRowData; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node - * @return void + * @return int A return code that indicates what action should the processor take next */ protected function processRowStartingNode($xmlReader) { @@ -204,14 +190,15 @@ class RowIterator implements IteratorInterface $this->lastProcessedCellValue = null; $this->numColumnsRepeated = 1; $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); + + return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node - * @param array $rowData Data of all cells read so far - * @return array Original row data + data for the cell that was just read + * @return int A return code that indicates what action should the processor take next */ - protected function processCellStartingNode($xmlReader, $rowData) + protected function processCellStartingNode($xmlReader) { $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); @@ -221,7 +208,7 @@ class RowIterator implements IteratorInterface // process cell N only after having read cell N+1 (see below why) if ($this->hasAlreadyReadOneCellInCurrentRow) { for ($i = 0; $i < $this->numColumnsRepeated; $i++) { - $rowData[] = $this->lastProcessedCellValue; + $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue; } } @@ -229,28 +216,34 @@ class RowIterator implements IteratorInterface $this->lastProcessedCellValue = $currentCellValue; $this->numColumnsRepeated = $currentNumColumnsRepeated; - return $rowData; + return XMLProcessor::PROCESSING_CONTINUE; } /** - * @param array $rowData Data of all cells read so far - * @param bool $isEmptyRow Whether the given row is empty - * @return array + * @return int A return code that indicates what action should the processor take next */ - protected function processRowEndingNode($rowData, $isEmptyRow) + protected function processRowEndingNode() { + $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRowData, $this->lastProcessedCellValue); + + // if the fetched row is empty and we don't want to preserve it... + if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { + // ... skip it + return XMLProcessor::PROCESSING_CONTINUE; + } + // if the row is empty, we don't want to return more than one cell $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. - // The current count of read columns is determined by counting the values in $rowData. + // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData". // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" // with a number-columns-repeated value equals to the number of (supported columns - used columns). // In Excel, the number of supported columns is 16384, but we don't want to returns rows with // always 16384 cells. - if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { + if ((count($this->currentlyProcessedRowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { - $rowData[] = $this->lastProcessedCellValue; + $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue; } } @@ -258,16 +251,20 @@ class RowIterator implements IteratorInterface // then the next row to be processed will be row (N+M). $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; - return $rowData; + // at this point, we have all the data we need for the row + // so that we can populate the buffer + return XMLProcessor::PROCESSING_STOP; } /** - * @return void + * @return int A return code that indicates what action should the processor take next */ protected function processTableEndingNode() { // The closing "" marks the end of the file $this->hasReachedEndOfFile = true; + + return XMLProcessor::PROCESSING_STOP; } /** diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index 50c4402..4c88ba6 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -9,6 +9,7 @@ use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Helper\CellHelper; use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; use Box\Spout\Reader\XLSX\Helper\StyleHelper; +use Box\Spout\Reader\Common\XMLProcessor; /** * Class RowIterator @@ -38,6 +39,9 @@ class RowIterator implements IteratorInterface /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; + /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ + protected $xmlProcessor; + /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; @@ -50,6 +54,9 @@ class RowIterator implements IteratorInterface */ protected $numReadRows = 0; + /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */ + protected $currentlyProcessedRowData = []; + /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ protected $rowDataBuffer = null; @@ -88,6 +95,14 @@ class RowIterator implements IteratorInterface $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates()); $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); + + // Register all callbacks to process different nodes when reading the XML file + $this->xmlProcessor = new XMLProcessor($this->xmlReader); + $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']); } /** @@ -152,7 +167,7 @@ class RowIterator implements IteratorInterface $this->nextRowIndexToBeProcessed++; if ($this->doesNeedDataForNextRowToBeProcessed()) { - $this->readDataForNextRow($this->xmlReader); + $this->readDataForNextRow(); } } @@ -180,55 +195,26 @@ class RowIterator implements IteratorInterface } /** - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object * @return void * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ - protected function readDataForNextRow($xmlReader) + protected function readDataForNextRow() { - $rowData = []; + $this->currentlyProcessedRowData = []; try { - while ($xmlReader->read()) { - if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { - $this->processDimensionStartingNode($xmlReader); - - } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { - $rowData = $this->processRowStartingNode($xmlReader); - - } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - $rowData = $this->processCellStartingNode($xmlReader, $rowData); - - } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // if the fetched row is empty and we don't want to preserve it.., - if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($rowData)) { - // ... skip it - continue; - } - - $rowData = $this->processRowEndingNode($rowData); - - // at this point, we have all the data we need for the row - // so that we can populate the buffer - break; - - } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { - $this->processWorksheetEndingNode(); - break; - } - } - + $this->xmlProcessor->readUntilStopped(); } catch (XMLProcessingException $exception) { throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $rowData; + $this->rowDataBuffer = $this->currentlyProcessedRowData; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node - * @return void + * @return int A return code that indicates what action should the processor take next */ protected function processDimensionStartingNode($xmlReader) { @@ -238,11 +224,13 @@ class RowIterator implements IteratorInterface $lastCellIndex = $matches[1]; $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; } + + return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node - * @return array + * @return int A return code that indicates what action should the processor take next */ protected function processRowStartingNode($xmlReader) { @@ -260,45 +248,58 @@ class RowIterator implements IteratorInterface $numberOfColumnsForRow = intval($numberOfColumnsForRow); } - return ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + + return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node - * @param array $rowData Data of all cells read so far (key = cell index, value = cell value) - * @return array Original row data + data for the cell that was just read (key = cell index, value = cell value) + * @return int A return code that indicates what action should the processor take next */ - protected function processCellStartingNode($xmlReader, $rowData) + protected function processCellStartingNode($xmlReader) { $currentColumnIndex = $this->getColumnIndex($xmlReader); $node = $xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node); - + $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node); $this->lastColumnIndexProcessed = $currentColumnIndex; - return $rowData; + return XMLProcessor::PROCESSING_CONTINUE; } /** - * @param array $rowData Data of all cells read so far (key = cell index, value = cell value) - * @return array + * @return int A return code that indicates what action should the processor take next */ - protected function processRowEndingNode($rowData) + protected function processRowEndingNode() { + // if the fetched row is empty and we don't want to preserve it.., + if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) { + // ... skip it + return XMLProcessor::PROCESSING_CONTINUE; + } + $this->numReadRows++; // If needed, we fill the empty cells - return ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); + if ($this->numColumns === 0) { + $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData); + } + + // at this point, we have all the data we need for the row + // so that we can populate the buffer + return XMLProcessor::PROCESSING_STOP; } /** - * @return void + * @return int A return code that indicates what action should the processor take next */ protected function processWorksheetEndingNode() { // The closing "" marks the end of the file $this->hasReachedEndOfFile = true; + + return XMLProcessor::PROCESSING_STOP; } /**