" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->xmlReader = $xmlReader; $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); } /** * Rewind the Iterator to the first element. * NOTE: It can only be done once, as it is not possible to read an XML file backwards. * @link http://php.net/manual/en/iterator.rewind.php * * @return void * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once */ public function rewind() { // Because sheet and row data is located in the file, we can't rewind both the // sheet iterator and the row iterator, as XML file cannot be read backwards. // Therefore, rewinding the row iterator has been disabled. if ($this->hasAlreadyBeenRewound) { throw new IteratorNotRewindableException(); } $this->hasAlreadyBeenRewound = true; $this->lastRowIndexProcessed = 0; $this->nextRowIndexToBeProcessed = 1; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; $this->next(); } /** * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * * @return bool */ public function valid() { return (!$this->hasReachedEndOfFile); } /** * Move forward to next element. Empty rows will be skipped. * @link http://php.net/manual/en/iterator.next.php * * @return void * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() { if ($this->doesNeedDataForNextRowToBeProcessed()) { $this->readDataForNextRow($this->xmlReader); } $this->lastRowIndexProcessed++; } /** * Returns whether we need data for the next row to be processed. * We don't need to read data if: * we have already read at least one row * AND * we need to preserve empty rows * AND * the last row that was read is not the row that need to be processed * (i.e. if we need to return empty rows) * * @return bool Whether we need data for the next row to be processed. */ protected function doesNeedDataForNextRowToBeProcessed() { $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); return ( !$hasReadAtLeastOneRow || !$this->shouldPreserveEmptyRows || $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 ); } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object * @return void * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ protected function readDataForNextRow($xmlReader) { $rowData = []; try { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { $this->processRowStartingNode($xmlReader); } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { $rowData = $this->processCellStartingNode($xmlReader, $rowData); } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue); // if the fetched row is empty and we don't want to preserve it... if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { // ... skip it continue; } $rowData = $this->processRowEndingNode($rowData, $isEmptyRow); // at this point, we have all the data we need for the row // so that we can populate the buffer break; } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { $this->processTableEndingNode(); break; } } } catch (XMLProcessingException $exception) { throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); } $this->rowDataBuffer = $rowData; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return void */ protected function processRowStartingNode($xmlReader) { // Reset data from current row $this->hasAlreadyReadOneCellInCurrentRow = false; $this->lastProcessedCellValue = null; $this->numColumnsRepeated = 1; $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @param array $rowData Data of all cells read so far * @return array Original row data + data for the cell that was just read */ protected function processCellStartingNode($xmlReader, $rowData) { $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); $node = $xmlReader->expand(); $currentCellValue = $this->getCellValue($node); // process cell N only after having read cell N+1 (see below why) if ($this->hasAlreadyReadOneCellInCurrentRow) { for ($i = 0; $i < $this->numColumnsRepeated; $i++) { $rowData[] = $this->lastProcessedCellValue; } } $this->hasAlreadyReadOneCellInCurrentRow = true; $this->lastProcessedCellValue = $currentCellValue; $this->numColumnsRepeated = $currentNumColumnsRepeated; return $rowData; } /** * @param array $rowData Data of all cells read so far * @param bool $isEmptyRow Whether the given row is empty * @return array */ protected function processRowEndingNode($rowData, $isEmptyRow) { // if the row is empty, we don't want to return more than one cell $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. // The current count of read columns is determined by counting the values in $rowData. // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" // with a number-columns-repeated value equals to the number of (supported columns - used columns). // In Excel, the number of supported columns is 16384, but we don't want to returns rows with // always 16384 cells. if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { $rowData[] = $this->lastProcessedCellValue; } } // If we are processing row N and the row is repeated M times, // then the next row to be processed will be row (N+M). $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; return $rowData; } /** * @return void */ protected function processTableEndingNode() { // The closing "" marks the end of the file $this->hasReachedEndOfFile = true; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing */ protected function getNumRowsRepeatedForCurrentNode($xmlReader) { $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; } /** * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error */ protected function getCellValue($node) { return $this->cellValueFormatter->extractAndFormatNodeValue($node); } /** * After finishing processing each cell, a row is considered empty if it contains * no cells or if the value of the last read cell is an empty string. * After finishing processing each cell, the last read cell is not part of the * row data yet (as we still need to apply the "num-columns-repeated" attribute). * * @param array $rowData * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell * @return bool Whether the row is empty */ protected function isEmptyRow($rowData, $lastReadCellValue) { return ( count($rowData) === 0 && (!isset($lastReadCellValue) || trim($lastReadCellValue) === '') ); } /** * Return the current element, from the buffer. * @link http://php.net/manual/en/iterator.current.php * * @return array|null */ public function current() { return $this->rowDataBuffer; } /** * Return the key of the current element * @link http://php.net/manual/en/iterator.key.php * * @return int */ public function key() { return $this->lastRowIndexProcessed; } /** * Cleans up what was created to iterate over the object. * * @return void */ public function end() { $this->xmlReader->close(); } }