diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index cb476ab..9089828 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -19,8 +19,16 @@ abstract class AbstractReader implements ReaderInterface /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; - /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ - protected $shouldFormatDates = false; + /** @var \Box\Spout\Reader\ReaderOptions */ + protected $readerOptions; + + /** + * The constructor. + */ + public function __construct() + { + $this->readerOptions = new ReaderOptions(); + } /** * Returns whether stream wrappers are supported @@ -69,7 +77,19 @@ abstract class AbstractReader implements ReaderInterface */ public function setShouldFormatDates($shouldFormatDates) { - $this->shouldFormatDates = $shouldFormatDates; + $this->readerOptions->setShouldFormatDates($shouldFormatDates); + return $this; + } + + /** + * Sets whether to skip or return "empty" rows. + * + * @param bool $shouldPreserveEmptyRows + * @return AbstractReader + */ + public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + { + $this->readerOptions->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); return $this; } diff --git a/src/Spout/Reader/ODS/Reader.php b/src/Spout/Reader/ODS/Reader.php index a52bafa..a713729 100644 --- a/src/Spout/Reader/ODS/Reader.php +++ b/src/Spout/Reader/ODS/Reader.php @@ -42,7 +42,7 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->readerOptions); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index e91ad90..08a400b 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -8,6 +8,7 @@ use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\ODS\Helper\CellValueFormatter; use Box\Spout\Reader\Wrapper\XMLReader; +use Box\Spout\Reader\ReaderOptions; /** * Class RowIterator @@ -21,10 +22,14 @@ class RowIterator implements IteratorInterface const XML_NODE_ROW = 'table:table-row'; const XML_NODE_CELL = 'table:table-cell'; const MAX_COLUMNS_EXCEL = 16384; + const MAX_ROWS_EXCEL = 1048576; /** Definition of XML attribute used to parse data */ const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated'; + /** Definition of XML attribute used to parse data */ + const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated'; + /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -34,23 +39,27 @@ class RowIterator implements IteratorInterface /** @var bool Whether the iterator has already been rewound once */ protected $hasAlreadyBeenRewound = false; - /** @var int Number of read rows */ - protected $numReadRows = 0; + /** @var int Key for iterator */ + protected $rowIndex = 0; - /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer = null; + /** @var array Buffer used to store the row data, while checking if there are more rows to read */ + protected $rowDataBuffer = []; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; + /** @var \Box\Spout\Reader\ReaderOptions */ + protected $readerOptions; + /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions */ - public function __construct($xmlReader, $shouldFormatDates) + public function __construct($xmlReader, ReaderOptions $readerOptions) { $this->xmlReader = $xmlReader; - $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); + $this->readerOptions = $readerOptions; + $this->cellValueFormatter = new CellValueFormatter($readerOptions->shouldFormatDates()); } /** @@ -71,8 +80,8 @@ class RowIterator implements IteratorInterface } $this->hasAlreadyBeenRewound = true; - $this->numReadRows = 0; - $this->rowDataBuffer = null; + $this->rowIndex = 0; + $this->rowDataBuffer = []; $this->hasReachedEndOfFile = false; $this->next(); @@ -90,7 +99,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows will be skipped. + * Move forward to next element. Empty rows can be skipped. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -99,15 +108,34 @@ class RowIterator implements IteratorInterface */ public function next() { + $prevRow = null; + + if (count($this->rowDataBuffer) > 1) { + array_shift($this->rowDataBuffer); + $this->rowIndex++; + + return; + } else { + $prevRow = $this->current(); + $this->rowDataBuffer = []; + } + $rowData = []; $cellValue = null; + $numRowsRepeated = 0; $numColumnsRepeated = 1; $numCellsRead = 0; $hasAlreadyReadOneCell = false; try { while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + // Start of a row description + $this->rowIndex++; + + $numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode(); + + } elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { // Start of a cell description $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode(); @@ -127,30 +155,37 @@ class RowIterator implements IteratorInterface $numCellsRead++; $hasAlreadyReadOneCell = true; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + } elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { // End of the row description $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue)); - if ($isEmptyRow) { - // skip empty rows - $this->next(); - return; + + if (!$isEmptyRow) { + // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. + // The current count of read columns is determined by counting the values in $rowData. + // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" + // with a number-columns-repeated value equals to the number of (supported columns - used columns). + // In Excel, the number of supported columns is 16384, but we don't want to returns rows with + // always 16384 cells. + if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { + for ($i = 0; $i < $numColumnsRepeated; $i++) { + $rowData[] = $cellValue; + } + } + } elseif ($this->readerOptions->shouldPreserveEmptyRows()) { + // Take number of cells from the previously read line. + $rowData = empty($prevRow) ? [] : array_fill(0, count($prevRow), ''); + } else { + return $this->next(); } - // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. - // The current count of read columns is determined by counting the values in $rowData. - // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" - // with a number-columns-repeated value equals to the number of (supported columns - used columns). - // In Excel, the number of supported columns is 16384, but we don't want to returns rows with - // always 16384 cells. - if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } - $this->numReadRows++; + // see above, now check number of rows... + if ($this->rowIndex - 1 + $numRowsRepeated >= self::MAX_ROWS_EXCEL) { + $numRowsRepeated = 0; + $this->hasReachedEndOfFile = true; } break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { + } elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { // The closing "" marks the end of the file $this->hasReachedEndOfFile = true; break; @@ -161,7 +196,9 @@ class RowIterator implements IteratorInterface throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $rowData; + for ($i = 0; $i < $numRowsRepeated; ++$i) { + $this->rowDataBuffer[] = $rowData; + } } /** @@ -173,6 +210,15 @@ class RowIterator implements IteratorInterface return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; } + /** + * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing + */ + protected function getNumRowsRepeatedForCurrentNode() + { + $numRowsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); + return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1; + } + /** * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * @@ -203,7 +249,7 @@ class RowIterator implements IteratorInterface */ public function current() { - return $this->rowDataBuffer; + return isset($this->rowDataBuffer[0]) ? $this->rowDataBuffer[0] : null; } /** @@ -214,10 +260,9 @@ class RowIterator implements IteratorInterface */ public function key() { - return $this->numReadRows; + return $this->rowIndex; } - /** * Cleans up what was created to iterate over the object. * diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php index 98d00b1..f2869a4 100644 --- a/src/Spout/Reader/ODS/Sheet.php +++ b/src/Spout/Reader/ODS/Sheet.php @@ -4,6 +4,7 @@ namespace Box\Spout\Reader\ODS; use Box\Spout\Reader\SheetInterface; use Box\Spout\Reader\Wrapper\XMLReader; +use Box\Spout\Reader\ReaderOptions; /** * Class Sheet @@ -27,13 +28,13 @@ class Sheet implements SheetInterface /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($xmlReader, ReaderOptions $readerOptions, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates); + $this->rowIterator = new RowIterator($xmlReader, $readerOptions); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php index f6cfdbe..dc41e7a 100644 --- a/src/Spout/Reader/ODS/SheetIterator.php +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -6,6 +6,7 @@ use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\Wrapper\XMLReader; +use Box\Spout\Reader\ReaderOptions; /** * Class SheetIterator @@ -24,8 +25,8 @@ class SheetIterator implements IteratorInterface /** @var string $filePath Path of the file to be read */ protected $filePath; - /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ - protected $shouldFormatDates; + /** @var \Box\Spout\Reader\ReaderOptions */ + protected $readerOptions; /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -41,13 +42,13 @@ class SheetIterator implements IteratorInterface /** * @param string $filePath Path of the file to be read - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $shouldFormatDates) + public function __construct($filePath, ReaderOptions $readerOptions) { $this->filePath = $filePath; - $this->shouldFormatDates = $shouldFormatDates; + $this->readerOptions = $readerOptions; $this->xmlReader = new XMLReader(); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ @@ -116,7 +117,7 @@ class SheetIterator implements IteratorInterface $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); - return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex); + return new Sheet($this->xmlReader, $this->readerOptions, $sheetName, $this->currentSheetIndex); } /** diff --git a/src/Spout/Reader/ReaderOptions.php b/src/Spout/Reader/ReaderOptions.php new file mode 100644 index 0000000..782947d --- /dev/null +++ b/src/Spout/Reader/ReaderOptions.php @@ -0,0 +1,62 @@ +shouldFormatDates = (bool)$shouldFormatDates; + return $this; + } + + /** + * Sets whether to skip or return "empty" rows. + * + * @param bool $shouldPreserveEmptyRows + * @return ReaderOptions + */ + public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + { + $this->shouldPreserveEmptyRows = (bool)$shouldPreserveEmptyRows; + return $this; + } + + /** + * @see setShouldFormatDates + * @return bool + */ + public function shouldFormatDates() + { + return $this->shouldFormatDates; + } + + /** + * @see setShouldPreserveEmptyRows + * @return bool + */ + public function shouldPreserveEmptyRows() + { + return $this->shouldPreserveEmptyRows; + } + +} diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index a6ff909..3886e4d 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -4,6 +4,7 @@ namespace Box\Spout\Reader\XLSX\Helper; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Sheet; +use Box\Spout\Reader\ReaderOptions; /** * Class SheetHelper @@ -26,21 +27,21 @@ class SheetHelper /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; - /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ - protected $shouldFormatDates; + /** @var \Box\Spout\Reader\ReaderOptions */ + protected $readerOptions; /** * @param string $filePath Path of the XLSX file being read * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, ReaderOptions $readerOptions) { $this->filePath = $filePath; $this->sharedStringsHelper = $sharedStringsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper; - $this->shouldFormatDates = $shouldFormatDates; + $this->readerOptions = $readerOptions; } /** @@ -92,7 +93,7 @@ class SheetHelper $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); - return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->readerOptions, $sheetIndexZeroBased, $sheetName); } /** diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php index bcf02cc..1d6f7e8 100644 --- a/src/Spout/Reader/XLSX/Reader.php +++ b/src/Spout/Reader/XLSX/Reader.php @@ -69,7 +69,7 @@ class Reader extends AbstractReader $this->sharedStringsHelper->extractSharedStrings(); } - $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->readerOptions); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index c7491ac..666178c 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -9,6 +9,7 @@ use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Helper\CellHelper; use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; use Box\Spout\Reader\XLSX\Helper\StyleHelper; +use Box\Spout\Reader\ReaderOptions; /** * Class RowIterator @@ -43,11 +44,11 @@ class RowIterator implements IteratorInterface /** @var Helper\StyleHelper $styleHelper Helper to work with styles */ protected $styleHelper; - /** @var int Number of read rows */ - protected $numReadRows = 0; + /** @var int Key for iterator */ + protected $rowIndex = 0; - /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer = null; + /** @var array Buffer used to store the row data, while checking if there are more rows to read */ + protected $rowDataBuffer = []; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; @@ -55,13 +56,16 @@ class RowIterator implements IteratorInterface /** @var int The number of columns the sheet has (0 meaning undefined) */ protected $numColumns = 0; + /** @var \Box\Spout\Reader\ReaderOptions */ + protected $readerOptions; + /** * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, ReaderOptions $readerOptions) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); @@ -69,7 +73,8 @@ class RowIterator implements IteratorInterface $this->xmlReader = new XMLReader(); $this->styleHelper = new StyleHelper($filePath); - $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates); + $this->readerOptions = $readerOptions; + $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $readerOptions->shouldFormatDates()); } /** @@ -101,7 +106,7 @@ class RowIterator implements IteratorInterface } $this->numReadRows = 0; - $this->rowDataBuffer = null; + $this->rowDataBuffer = []; $this->hasReachedEndOfFile = false; $this->numColumns = 0; @@ -131,6 +136,15 @@ class RowIterator implements IteratorInterface { $rowData = []; + if (count($this->rowDataBuffer) > 1) { + array_shift($this->rowDataBuffer); + $this->rowIndex++; + + return; + } else { + $this->rowDataBuffer = []; + } + try { while ($this->xmlReader->read()) { if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { @@ -141,8 +155,10 @@ class RowIterator implements IteratorInterface $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; } - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + } elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { // Start of the row description + $prevRowIndex = $this->rowIndex; + $newRowIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); // Read spans info if present $numberOfColumnsForRow = $this->numColumns; @@ -153,7 +169,15 @@ class RowIterator implements IteratorInterface } $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + if ($this->readerOptions->shouldPreserveEmptyRows()) { + for ($i = $prevRowIndex + 1; $i < $newRowIndex; ++$i) { + $this->rowDataBuffer[] = $rowData; // fake empty rows + } + } + + $this->rowIndex = $newRowIndex - count($this->rowDataBuffer); + + } elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { // Start of a cell description $currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); @@ -161,14 +185,14 @@ class RowIterator implements IteratorInterface $node = $this->xmlReader->expand(); $rowData[$currentColumnIndex] = $this->getCellValue($node); - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + } elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { // End of the row description // If needed, we fill the empty cells $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - $this->numReadRows++; + break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { + } elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { // The closing "" marks the end of the file $this->hasReachedEndOfFile = true; break; @@ -179,7 +203,7 @@ class RowIterator implements IteratorInterface throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $rowData; + $this->rowDataBuffer[] = $rowData; } /** @@ -201,7 +225,7 @@ class RowIterator implements IteratorInterface */ public function current() { - return $this->rowDataBuffer; + return isset($this->rowDataBuffer[0]) ? $this->rowDataBuffer[0] : null; } /** @@ -212,7 +236,7 @@ class RowIterator implements IteratorInterface */ public function key() { - return $this->numReadRows; + return $this->rowIndex; } diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php index a1c7d95..e60c5bc 100644 --- a/src/Spout/Reader/XLSX/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -3,6 +3,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Reader\SheetInterface; +use Box\Spout\Reader\ReaderOptions; /** * Class Sheet @@ -25,13 +26,13 @@ class Sheet implements SheetInterface * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper Helper to work with shared strings - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, ReaderOptions $readerOptions, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates); + $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $readerOptions); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php index f7a3f59..8685f2d 100644 --- a/src/Spout/Reader/XLSX/SheetIterator.php +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -5,6 +5,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\XLSX\Helper\SheetHelper; use Box\Spout\Reader\Exception\NoSheetsFoundException; +use Box\Spout\Reader\ReaderOptions; /** * Class SheetIterator @@ -24,13 +25,13 @@ class SheetIterator implements IteratorInterface * @param string $filePath Path of the file to be read * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ReaderOptions $readerOptions * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, ReaderOptions $readerOptions) { // Fetch all available sheets - $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates); + $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $readerOptions); $this->sheets = $sheetHelper->getSheets(); if (count($this->sheets) === 0) { diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index dee4164..5b5f709 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -137,6 +137,24 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @dataProvider dataProviderForTestReadWithFilesGeneratedByExternalSoftwares + * + * @param bool $skipLastEmptyValues + * @param string $fileName + * @return void + */ + public function testReadWithFilesGeneratedByExternalSoftwareAndEmptyRowsPreserved($fileName, $skipLastEmptyValues) + { + $reader = ReaderFactory::create(Type::ODS); + $reader->setShouldPreserveEmptyRows(true); + $allRows = $this->getAllRowsForFile($fileName, $reader); + + foreach ($allRows as $index => $row) { +// :TODO: write useful test +// $this->assertCount($expectedColumns, $row); + } + } /** * @return void @@ -169,8 +187,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSupportFormatDatesAndTimesIfSpecified() { - $shouldFormatDates = true; - $allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $shouldFormatDates); + $reader = ReaderFactory::create(Type::ODS); + $reader->setShouldFormatDates(true); + + $allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $reader); $expectedRows = [ ['05/19/2016', '5/19/16', '05/19/2016 16:39:00', '05/19/16 04:39 PM', '5/19/2016'], @@ -213,13 +233,48 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSkipEmptyRow() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods'); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.ods'); $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); $expectedRows = [ - ['ods--11', 'ods--12', 'ods--13'], + 1 => ['ods--11', 'ods--12', 'ods--13'], // row skipped here - ['ods--21', 'ods--22', 'ods--23'], + 3 => ['ods--21', 'ods--22', 'ods--23'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveEmptyRow() + { + $reader = ReaderFactory::create(Type::ODS); + $reader->setShouldPreserveEmptyRows(true); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.ods', $reader); + + $expectedRows = [ + 1 => ['ods--11', 'ods--12', 'ods--13'], + 2 => ['', '', ''], + 3 => ['ods--21', 'ods--22', 'ods--23'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveConsecutiveEmptyRows() + { + $reader = ReaderFactory::create(Type::ODS); + $reader->setShouldPreserveEmptyRows(true); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_consecutive_empty_rows.ods', $reader); + + $expectedRows = [ + 1 => ['First'], + 2 => [''], + 3 => [''], + 4 => ['Second'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -241,6 +296,29 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals([$expectedRow], $allRows); } + /** + * @return void + */ + public function testReadShouldHandleRepeatedRows() + { + $expectedRows = [ + 1 => ['First'], + 2 => ['First'], + 3 => ['First'], + 4 => ['Second'], + 5 => ['Third'], + 6 => ['Third'], + ]; + + $reader = ReaderFactory::create(Type::ODS); + $reader->setShouldPreserveEmptyRows(false); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_repeated_rows.ods', $reader); + $this->assertEquals($expectedRows, $allRows); + + $reader->setShouldPreserveEmptyRows(true); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_repeated_rows.ods', $reader); + $this->assertEquals($expectedRows, $allRows); + } /** * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) @@ -484,22 +562,49 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName - * @param bool|void $shouldFormatDates - * @return array All the read rows the given file + * @param Reader $reader + * @return array */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, Reader $reader = null) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory::create(Type::ODS); - $reader->setShouldFormatDates($shouldFormatDates); + if (!$reader) { + $reader = ReaderFactory::create(Type::ODS); + } + $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { - foreach ($sheet->getRowIterator() as $rowIndex => $row) { - $allRows[] = $row; - } + $allRows = array_merge($allRows, iterator_to_array($sheet->getRowIterator(), false)); + } + + $reader->close(); + + return $allRows; + } + + + /** + * @param string $fileName + * @param Reader $reader + * @return array + */ + private function getAllRowsForFirstSheet($fileName, Reader $reader = null) + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + if (!$reader) { + $reader = ReaderFactory::create(Type::ODS); + } + + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + $allRows = iterator_to_array($sheet->getRowIterator(), true); + break; } $reader->close(); diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index c705f2f..21e619d 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -239,8 +239,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSupportFormatDatesAndTimesIfSpecified() { - $shouldFormatDates = true; - $allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $shouldFormatDates); + $reader = ReaderFactory::create(Type::XLSX); + $reader->setShouldFormatDates(true); + + $allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $reader); $expectedRows = [ ['1/13/2016', '01/13/2016', '13-Jan-16', 'Wednesday January 13, 16', 'Today is 1/13/2016'], @@ -307,16 +309,53 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @return void */ - public function testReadShouldSkipEmptyRows() + public function testReadShouldSkipEmptyRow() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx'); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.xlsx'); $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + 1 => ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], // skipped row here - ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + 3 => ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveEmptyRow() + { + $reader = ReaderFactory::create(Type::XLSX); + $reader->setShouldPreserveEmptyRows(true); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.xlsx', $reader); + + $expectedRows = [ + 1 => ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + 2 => ['', '', '', '', ''], + 3 => ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveConsecutiveEmptyRows() + { + $reader = ReaderFactory::create(Type::XLSX); + $reader->setShouldPreserveEmptyRows(true); + $allRows = $this->getAllRowsForFirstSheet('sheet_with_consecutive_empty_rows.xlsx', $reader); + + $expectedRows = [ + 1 => ['First'], + 2 => [''], + 3 => [''], + 4 => [''], + 5 => ['Second'], + 6 => ['Third'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -549,22 +588,49 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName - * @param bool|void $shouldFormatDates - * @return array All the read rows the given file + * @param Reader $reader + * @return array */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, Reader $reader = null) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory::create(Type::XLSX); - $reader->setShouldFormatDates($shouldFormatDates); + if (!$reader) { + $reader = ReaderFactory::create(Type::XLSX); + } + $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { - foreach ($sheet->getRowIterator() as $rowIndex => $row) { - $allRows[] = $row; - } + $allRows = array_merge($allRows, iterator_to_array($sheet->getRowIterator(), false)); + } + + $reader->close(); + + return $allRows; + } + + + /** + * @param string $fileName + * @param Reader $reader + * @return array + */ + private function getAllRowsForFirstSheet($fileName, Reader $reader = null) + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + if (!$reader) { + $reader = ReaderFactory::create(Type::XLSX); + } + + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + $allRows = iterator_to_array($sheet->getRowIterator(), true); + break; } $reader->close(); diff --git a/tests/resources/ods/sheet_with_consecutive_empty_rows.ods b/tests/resources/ods/sheet_with_consecutive_empty_rows.ods new file mode 100644 index 0000000..558e24a Binary files /dev/null and b/tests/resources/ods/sheet_with_consecutive_empty_rows.ods differ diff --git a/tests/resources/ods/sheet_with_repeated_rows.ods b/tests/resources/ods/sheet_with_repeated_rows.ods new file mode 100644 index 0000000..54c1f4e Binary files /dev/null and b/tests/resources/ods/sheet_with_repeated_rows.ods differ diff --git a/tests/resources/xlsx/sheet_with_consecutive_empty_rows.xlsx b/tests/resources/xlsx/sheet_with_consecutive_empty_rows.xlsx new file mode 100644 index 0000000..7e3bfb4 Binary files /dev/null and b/tests/resources/xlsx/sheet_with_consecutive_empty_rows.xlsx differ