diff --git a/src/Spout/Reader/ODS/Reader.php b/src/Spout/Reader/ODS/Reader.php index a52bafa..d040f90 100644 --- a/src/Spout/Reader/ODS/Reader.php +++ b/src/Spout/Reader/ODS/Reader.php @@ -42,7 +42,7 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates, $this->shouldPreserveEmptyRows); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index 48a78e6..4051583 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -23,33 +23,55 @@ class RowIterator implements IteratorInterface const MAX_COLUMNS_EXCEL = 16384; /** Definition of XML attribute used to parse data */ + const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated'; const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated'; /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; /** @var bool Whether the iterator has already been rewound once */ protected $hasAlreadyBeenRewound = false; - /** @var int Number of read rows */ - protected $numReadRows = 0; - /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ protected $rowDataBuffer = null; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; + /** @var int Last row index processed (one-based) */ + protected $lastRowIndexProcessed = 0; + + /** @var int Row index to be processed next (one-based) */ + protected $nextRowIndexToBeProcessed = 1; + + /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */ + protected $lastProcessedCellValue = null; + + /** @var int Number of times the last processed row should be repeated */ + protected $numRowsRepeated = 1; + + /** @var int Number of times the last cell value should be copied to the cells on its right */ + protected $numColumnsRepeated = 1; + + /** @var bool Whether at least one cell has been read for the row currently being processed */ + protected $hasAlreadyReadOneCellInCurrentRow = false; + + /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($xmlReader, $shouldFormatDates) + public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->xmlReader = $xmlReader; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); } @@ -71,7 +93,8 @@ class RowIterator implements IteratorInterface } $this->hasAlreadyBeenRewound = true; - $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 1; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; @@ -98,61 +121,72 @@ class RowIterator implements IteratorInterface * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() + { + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow($this->xmlReader); + } + + $this->lastRowIndexProcessed++; + } + + /** + * Returns whether we need data for the next row to be processed. + * We don't need to read data if: + * we have already read at least one row + * AND + * we need to preserve empty rows + * AND + * the last row that was read is not the row that need to be processed + * (i.e. if we need to return empty rows) + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + !$this->shouldPreserveEmptyRows || + $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 + ); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow($xmlReader) { $rowData = []; - $cellValue = null; - $numColumnsRepeated = 1; - $numCellsRead = 0; - $hasAlreadyReadOneCell = false; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode(); + while ($xmlReader->read()) { + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + $this->processRowStartingNode($xmlReader); - $node = $this->xmlReader->expand(); - $currentCellValue = $this->getCellValue($node); + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + $rowData = $this->processCellStartingNode($xmlReader, $rowData); - // process cell N only after having read cell N+1 (see below why) - if ($hasAlreadyReadOneCell) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue); + + // if the fetched row is empty and we don't want to preserve it... + if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { + // ... skip it + continue; } - $cellValue = $currentCellValue; - $numColumnsRepeated = $currentNumColumnsRepeated; + $rowData = $this->processRowEndingNode($rowData, $isEmptyRow); - $numCellsRead++; - $hasAlreadyReadOneCell = true; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue)); - if ($isEmptyRow) { - // skip empty rows - $this->next(); - return; - } - - // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. - // The current count of read columns is determined by counting the values in $rowData. - // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" - // with a number-columns-repeated value equals to the number of (supported columns - used columns). - // In Excel, the number of supported columns is 16384, but we don't want to returns rows with - // always 16384 cells. - if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } - $this->numReadRows++; - } + // at this point, we have all the data we need for the row + // so that we can populate the buffer break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { + $this->processTableEndingNode(); break; } } @@ -165,11 +199,99 @@ class RowIterator implements IteratorInterface } /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return void + */ + protected function processRowStartingNode($xmlReader) + { + // Reset data from current row + $this->hasAlreadyReadOneCellInCurrentRow = false; + $this->lastProcessedCellValue = null; + $this->numColumnsRepeated = 1; + $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @param array $rowData Data of all cells read so far + * @return array Original row data + data for the cell that was just read + */ + protected function processCellStartingNode($xmlReader, $rowData) + { + $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); + + $node = $xmlReader->expand(); + $currentCellValue = $this->getCellValue($node); + + // process cell N only after having read cell N+1 (see below why) + if ($this->hasAlreadyReadOneCellInCurrentRow) { + for ($i = 0; $i < $this->numColumnsRepeated; $i++) { + $rowData[] = $this->lastProcessedCellValue; + } + } + + $this->hasAlreadyReadOneCellInCurrentRow = true; + $this->lastProcessedCellValue = $currentCellValue; + $this->numColumnsRepeated = $currentNumColumnsRepeated; + + return $rowData; + } + + /** + * @param array $rowData Data of all cells read so far + * @param bool $isEmptyRow Whether the given row is empty + * @return array + */ + protected function processRowEndingNode($rowData, $isEmptyRow) + { + // if the row is empty, we don't want to return more than one cell + $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; + + // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. + // The current count of read columns is determined by counting the values in $rowData. + // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" + // with a number-columns-repeated value equals to the number of (supported columns - used columns). + // In Excel, the number of supported columns is 16384, but we don't want to returns rows with + // always 16384 cells. + if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { + for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { + $rowData[] = $this->lastProcessedCellValue; + } + } + + // If we are processing row N and the row is repeated M times, + // then the next row to be processed will be row (N+M). + $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; + + return $rowData; + } + + /** + * @return void + */ + protected function processTableEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing + */ + protected function getNumRowsRepeatedForCurrentNode($xmlReader) + { + $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); + return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ - protected function getNumColumnsRepeatedForCurrentNode() + protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { - $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); + $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; } @@ -185,14 +307,21 @@ class RowIterator implements IteratorInterface } /** - * empty() replacement that honours 0 as a valid value + * After finishing processing each cell, a row is considered empty if it contains + * no cells or if the value of the last read cell is an empty string. + * After finishing processing each cell, the last read cell is not part of the + * row data yet (as we still need to apply the "num-columns-repeated" attribute). * - * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value - * @return bool + * @param array $rowData + * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell + * @return bool Whether the row is empty */ - protected function isEmptyCellValue($value) + protected function isEmptyRow($rowData, $lastReadCellValue) { - return (!isset($value) || trim($value) === ''); + return ( + count($rowData) === 0 && + (!isset($lastReadCellValue) || trim($lastReadCellValue) === '') + ); } /** @@ -214,7 +343,7 @@ class RowIterator implements IteratorInterface */ public function key() { - return $this->numReadRows; + return $this->lastRowIndexProcessed; } diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php index 98d00b1..91669e0 100644 --- a/src/Spout/Reader/ODS/Sheet.php +++ b/src/Spout/Reader/ODS/Sheet.php @@ -28,12 +28,13 @@ class Sheet implements SheetInterface /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates); + $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php index 50224c1..2c1cafa 100644 --- a/src/Spout/Reader/ODS/SheetIterator.php +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -27,6 +27,9 @@ class SheetIterator implements IteratorInterface /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -42,12 +45,14 @@ class SheetIterator implements IteratorInterface /** * @param string $filePath Path of the file to be read * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $shouldFormatDates) + public function __construct($filePath, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->shouldFormatDates = $shouldFormatDates; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->xmlReader = new XMLReader(); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ @@ -116,7 +121,7 @@ class SheetIterator implements IteratorInterface $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); - return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex); + return new Sheet($this->xmlReader, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetName, $this->currentSheetIndex); } /** diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index dee4164..d8ec39b 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -211,15 +211,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @return void */ - public function testReadShouldSkipEmptyRow() + public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods'); - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods'); + + $this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped'); $expectedRows = [ - ['ods--11', 'ods--12', 'ods--13'], - // row skipped here + // skipped row here ['ods--21', 'ods--22', 'ods--23'], + // skipped row here + // skipped row here + ['ods--51', 'ods--52', 'ods--53'], + ['ods--61', 'ods--62', 'ods--63'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods', false, true); + + $this->assertEquals(6, count($allRows), 'There should be 6 rows'); + + $expectedRows = [ + [''], + ['ods--21', 'ods--22', 'ods--23'], + [''], + [''], + ['ods--51', 'ods--52', 'ods--53'], + ['ods--61', 'ods--62', 'ods--63'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -485,15 +509,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName * @param bool|void $shouldFormatDates + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Reader\ODS\Reader $reader */ $reader = ReaderFactory::create(Type::ODS); $reader->setShouldFormatDates($shouldFormatDates); + $reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/resources/ods/sheet_with_empty_row.ods b/tests/resources/ods/sheet_with_empty_row.ods deleted file mode 100644 index 4763df0..0000000 Binary files a/tests/resources/ods/sheet_with_empty_row.ods and /dev/null differ diff --git a/tests/resources/ods/sheet_with_empty_rows.ods b/tests/resources/ods/sheet_with_empty_rows.ods new file mode 100644 index 0000000..e7ad29a Binary files /dev/null and b/tests/resources/ods/sheet_with_empty_rows.ods differ