diff --git a/src/Spout/Reader/ODS/Reader.php b/src/Spout/Reader/ODS/Reader.php
index a52bafa..d040f90 100644
--- a/src/Spout/Reader/ODS/Reader.php
+++ b/src/Spout/Reader/ODS/Reader.php
@@ -42,7 +42,7 @@ class Reader extends AbstractReader
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
- $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
+ $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates, $this->shouldPreserveEmptyRows);
} else {
throw new IOException("Could not open $filePath for reading.");
}
diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php
index 48a78e6..4051583 100644
--- a/src/Spout/Reader/ODS/RowIterator.php
+++ b/src/Spout/Reader/ODS/RowIterator.php
@@ -23,33 +23,55 @@ class RowIterator implements IteratorInterface
const MAX_COLUMNS_EXCEL = 16384;
/** Definition of XML attribute used to parse data */
+ const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
+ /** @var bool Whether empty rows should be returned or skipped */
+ protected $shouldPreserveEmptyRows;
+
/** @var Helper\CellValueFormatter Helper to format cell values */
protected $cellValueFormatter;
/** @var bool Whether the iterator has already been rewound once */
protected $hasAlreadyBeenRewound = false;
- /** @var int Number of read rows */
- protected $numReadRows = 0;
-
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = null;
/** @var bool Indicates whether all rows have been read */
protected $hasReachedEndOfFile = false;
+ /** @var int Last row index processed (one-based) */
+ protected $lastRowIndexProcessed = 0;
+
+ /** @var int Row index to be processed next (one-based) */
+ protected $nextRowIndexToBeProcessed = 1;
+
+ /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
+ protected $lastProcessedCellValue = null;
+
+ /** @var int Number of times the last processed row should be repeated */
+ protected $numRowsRepeated = 1;
+
+ /** @var int Number of times the last cell value should be copied to the cells on its right */
+ protected $numColumnsRepeated = 1;
+
+ /** @var bool Whether at least one cell has been read for the row currently being processed */
+ protected $hasAlreadyReadOneCellInCurrentRow = false;
+
+
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
+ * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
*/
- public function __construct($xmlReader, $shouldFormatDates)
+ public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows)
{
$this->xmlReader = $xmlReader;
+ $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
}
@@ -71,7 +93,8 @@ class RowIterator implements IteratorInterface
}
$this->hasAlreadyBeenRewound = true;
- $this->numReadRows = 0;
+ $this->lastRowIndexProcessed = 0;
+ $this->nextRowIndexToBeProcessed = 1;
$this->rowDataBuffer = null;
$this->hasReachedEndOfFile = false;
@@ -98,61 +121,72 @@ class RowIterator implements IteratorInterface
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
*/
public function next()
+ {
+ if ($this->doesNeedDataForNextRowToBeProcessed()) {
+ $this->readDataForNextRow($this->xmlReader);
+ }
+
+ $this->lastRowIndexProcessed++;
+ }
+
+ /**
+ * Returns whether we need data for the next row to be processed.
+ * We don't need to read data if:
+ * we have already read at least one row
+ * AND
+ * we need to preserve empty rows
+ * AND
+ * the last row that was read is not the row that need to be processed
+ * (i.e. if we need to return empty rows)
+ *
+ * @return bool Whether we need data for the next row to be processed.
+ */
+ protected function doesNeedDataForNextRowToBeProcessed()
+ {
+ $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
+
+ return (
+ !$hasReadAtLeastOneRow ||
+ !$this->shouldPreserveEmptyRows ||
+ $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
+ );
+ }
+
+ /**
+ * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
+ * @return void
+ * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
+ * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
+ */
+ protected function readDataForNextRow($xmlReader)
{
$rowData = [];
- $cellValue = null;
- $numColumnsRepeated = 1;
- $numCellsRead = 0;
- $hasAlreadyReadOneCell = false;
try {
- while ($this->xmlReader->read()) {
- if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
- // Start of a cell description
- $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
+ while ($xmlReader->read()) {
+ if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
+ $this->processRowStartingNode($xmlReader);
- $node = $this->xmlReader->expand();
- $currentCellValue = $this->getCellValue($node);
+ } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
+ $rowData = $this->processCellStartingNode($xmlReader, $rowData);
- // process cell N only after having read cell N+1 (see below why)
- if ($hasAlreadyReadOneCell) {
- for ($i = 0; $i < $numColumnsRepeated; $i++) {
- $rowData[] = $cellValue;
- }
+ } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
+ $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue);
+
+ // if the fetched row is empty and we don't want to preserve it...
+ if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
+ // ... skip it
+ continue;
}
- $cellValue = $currentCellValue;
- $numColumnsRepeated = $currentNumColumnsRepeated;
+ $rowData = $this->processRowEndingNode($rowData, $isEmptyRow);
- $numCellsRead++;
- $hasAlreadyReadOneCell = true;
-
- } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
- // End of the row description
- $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
- if ($isEmptyRow) {
- // skip empty rows
- $this->next();
- return;
- }
-
- // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
- // The current count of read columns is determined by counting the values in $rowData.
- // This is to avoid creating a lot of empty cells, as Excel adds a last empty ""
- // with a number-columns-repeated value equals to the number of (supported columns - used columns).
- // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
- // always 16384 cells.
- if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
- for ($i = 0; $i < $numColumnsRepeated; $i++) {
- $rowData[] = $cellValue;
- }
- $this->numReadRows++;
- }
+ // at this point, we have all the data we need for the row
+ // so that we can populate the buffer
break;
- } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
- // The closing "" marks the end of the file
- $this->hasReachedEndOfFile = true;
+ } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
+ $this->processTableEndingNode();
break;
}
}
@@ -165,11 +199,99 @@ class RowIterator implements IteratorInterface
}
/**
+ * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node
+ * @return void
+ */
+ protected function processRowStartingNode($xmlReader)
+ {
+ // Reset data from current row
+ $this->hasAlreadyReadOneCellInCurrentRow = false;
+ $this->lastProcessedCellValue = null;
+ $this->numColumnsRepeated = 1;
+ $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
+ }
+
+ /**
+ * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node
+ * @param array $rowData Data of all cells read so far
+ * @return array Original row data + data for the cell that was just read
+ */
+ protected function processCellStartingNode($xmlReader, $rowData)
+ {
+ $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
+
+ $node = $xmlReader->expand();
+ $currentCellValue = $this->getCellValue($node);
+
+ // process cell N only after having read cell N+1 (see below why)
+ if ($this->hasAlreadyReadOneCellInCurrentRow) {
+ for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
+ $rowData[] = $this->lastProcessedCellValue;
+ }
+ }
+
+ $this->hasAlreadyReadOneCellInCurrentRow = true;
+ $this->lastProcessedCellValue = $currentCellValue;
+ $this->numColumnsRepeated = $currentNumColumnsRepeated;
+
+ return $rowData;
+ }
+
+ /**
+ * @param array $rowData Data of all cells read so far
+ * @param bool $isEmptyRow Whether the given row is empty
+ * @return array
+ */
+ protected function processRowEndingNode($rowData, $isEmptyRow)
+ {
+ // if the row is empty, we don't want to return more than one cell
+ $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
+
+ // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
+ // The current count of read columns is determined by counting the values in $rowData.
+ // This is to avoid creating a lot of empty cells, as Excel adds a last empty ""
+ // with a number-columns-repeated value equals to the number of (supported columns - used columns).
+ // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
+ // always 16384 cells.
+ if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
+ for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
+ $rowData[] = $this->lastProcessedCellValue;
+ }
+ }
+
+ // If we are processing row N and the row is repeated M times,
+ // then the next row to be processed will be row (N+M).
+ $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
+
+ return $rowData;
+ }
+
+ /**
+ * @return void
+ */
+ protected function processTableEndingNode()
+ {
+ // The closing "" marks the end of the file
+ $this->hasReachedEndOfFile = true;
+ }
+
+ /**
+ * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node
+ * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
+ */
+ protected function getNumRowsRepeatedForCurrentNode($xmlReader)
+ {
+ $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
+ return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
+ }
+
+ /**
+ * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node
* @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
*/
- protected function getNumColumnsRepeatedForCurrentNode()
+ protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
{
- $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
+ $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
}
@@ -185,14 +307,21 @@ class RowIterator implements IteratorInterface
}
/**
- * empty() replacement that honours 0 as a valid value
+ * After finishing processing each cell, a row is considered empty if it contains
+ * no cells or if the value of the last read cell is an empty string.
+ * After finishing processing each cell, the last read cell is not part of the
+ * row data yet (as we still need to apply the "num-columns-repeated" attribute).
*
- * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
- * @return bool
+ * @param array $rowData
+ * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
+ * @return bool Whether the row is empty
*/
- protected function isEmptyCellValue($value)
+ protected function isEmptyRow($rowData, $lastReadCellValue)
{
- return (!isset($value) || trim($value) === '');
+ return (
+ count($rowData) === 0 &&
+ (!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
+ );
}
/**
@@ -214,7 +343,7 @@ class RowIterator implements IteratorInterface
*/
public function key()
{
- return $this->numReadRows;
+ return $this->lastRowIndexProcessed;
}
diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php
index 98d00b1..91669e0 100644
--- a/src/Spout/Reader/ODS/Sheet.php
+++ b/src/Spout/Reader/ODS/Sheet.php
@@ -28,12 +28,13 @@ class Sheet implements SheetInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
+ * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
- public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
+ public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName)
{
- $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
+ $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows);
$this->index = $sheetIndex;
$this->name = $sheetName;
}
diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php
index 50224c1..2c1cafa 100644
--- a/src/Spout/Reader/ODS/SheetIterator.php
+++ b/src/Spout/Reader/ODS/SheetIterator.php
@@ -27,6 +27,9 @@ class SheetIterator implements IteratorInterface
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
+ /** @var bool Whether empty rows should be returned or skipped */
+ protected $shouldPreserveEmptyRows;
+
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
@@ -42,12 +45,14 @@ class SheetIterator implements IteratorInterface
/**
* @param string $filePath Path of the file to be read
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
+ * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
- public function __construct($filePath, $shouldFormatDates)
+ public function __construct($filePath, $shouldFormatDates, $shouldPreserveEmptyRows)
{
$this->filePath = $filePath;
$this->shouldFormatDates = $shouldFormatDates;
+ $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
@@ -116,7 +121,7 @@ class SheetIterator implements IteratorInterface
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
$sheetName = $this->escaper->unescape($escapedSheetName);
- return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
+ return new Sheet($this->xmlReader, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetName, $this->currentSheetIndex);
}
/**
diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php
index dee4164..d8ec39b 100644
--- a/tests/Spout/Reader/ODS/ReaderTest.php
+++ b/tests/Spout/Reader/ODS/ReaderTest.php
@@ -211,15 +211,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
/**
* @return void
*/
- public function testReadShouldSkipEmptyRow()
+ public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet()
{
- $allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods');
- $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
+ $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods');
+
+ $this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped');
$expectedRows = [
- ['ods--11', 'ods--12', 'ods--13'],
- // row skipped here
+ // skipped row here
['ods--21', 'ods--22', 'ods--23'],
+ // skipped row here
+ // skipped row here
+ ['ods--51', 'ods--52', 'ods--53'],
+ ['ods--61', 'ods--62', 'ods--63'],
+ ];
+ $this->assertEquals($expectedRows, $allRows);
+ }
+
+ /**
+ * @return void
+ */
+ public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet()
+ {
+ $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods', false, true);
+
+ $this->assertEquals(6, count($allRows), 'There should be 6 rows');
+
+ $expectedRows = [
+ [''],
+ ['ods--21', 'ods--22', 'ods--23'],
+ [''],
+ [''],
+ ['ods--51', 'ods--52', 'ods--53'],
+ ['ods--61', 'ods--62', 'ods--63'],
];
$this->assertEquals($expectedRows, $allRows);
}
@@ -485,15 +509,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
/**
* @param string $fileName
* @param bool|void $shouldFormatDates
+ * @param bool|void $shouldPreserveEmptyRows
* @return array All the read rows the given file
*/
- private function getAllRowsForFile($fileName, $shouldFormatDates = false)
+ private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
+ /** @var \Box\Spout\Reader\ODS\Reader $reader */
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldFormatDates($shouldFormatDates);
+ $reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
diff --git a/tests/resources/ods/sheet_with_empty_row.ods b/tests/resources/ods/sheet_with_empty_row.ods
deleted file mode 100644
index 4763df0..0000000
Binary files a/tests/resources/ods/sheet_with_empty_row.ods and /dev/null differ
diff --git a/tests/resources/ods/sheet_with_empty_rows.ods b/tests/resources/ods/sheet_with_empty_rows.ods
new file mode 100644
index 0000000..e7ad29a
Binary files /dev/null and b/tests/resources/ods/sheet_with_empty_rows.ods differ