diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index cb476ab..0c5849d 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -22,6 +22,9 @@ abstract class AbstractReader implements ReaderInterface /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates = false; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows = false; + /** * Returns whether stream wrappers are supported * @@ -64,6 +67,7 @@ abstract class AbstractReader implements ReaderInterface /** * Sets whether date/time values should be returned as PHP objects or be formatted as strings. * + * @api * @param bool $shouldFormatDates * @return AbstractReader */ @@ -73,6 +77,19 @@ abstract class AbstractReader implements ReaderInterface return $this; } + /** + * Sets whether empty rows should be returned or skipped. + * + * @api + * @param bool $shouldPreserveEmptyRows + * @return AbstractReader + */ + public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + { + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; + return $this; + } + /** * Prepares the reader to read the given file. It also makes sure * that the file exists and is readable. diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index ab887ef..688e4cd 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -119,8 +119,9 @@ class Reader extends AbstractReader $this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, - $this->encoding, $this->endOfLineCharacter, + $this->encoding, + $this->shouldPreserveEmptyRows, $this->globalFunctionsHelper ); } diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 1ecbaf1..b805126 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -52,21 +52,26 @@ class RowIterator implements IteratorInterface /** @var string End of line delimiter, given by the user as input. */ protected $inputEOLDelimiter; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields - * @param string $encoding Encoding of the CSV file to be read * @param string $endOfLineDelimiter End of line delimiter + * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $endOfLineDelimiter, $encoding, $shouldPreserveEmptyRows, $globalFunctionsHelper) { $this->filePointer = $filePointer; $this->fieldDelimiter = $fieldDelimiter; $this->fieldEnclosure = $fieldEnclosure; $this->encoding = $encoding; $this->inputEOLDelimiter = $endOfLineDelimiter; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->encodingHelper = new EncodingHelper($globalFunctionsHelper); @@ -114,7 +119,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows are skipped. + * Move forward to next element. Reads data for the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -124,25 +129,48 @@ class RowIterator implements IteratorInterface { $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - if ($this->hasReachedEndOfFile) { - return; + if (!$this->hasReachedEndOfFile) { + $this->readDataForNextRow(); } + } + /** + * @return void + * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 + */ + protected function readDataForNextRow() + { do { $rowData = $this->getNextUTF8EncodedRow(); - $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData)); + } while ($this->shouldReadNextRow($rowData)); if ($rowData !== false) { - $this->rowDataBuffer = $rowData; + // str_replace will replace NULL values by empty strings + $this->rowDataBuffer = str_replace(null, null, $rowData); $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. // This happens when the last lines are empty lines. - $this->hasReachedEndOfFile = $hasNowReachedEndOfFile; + $this->hasReachedEndOfFile = true; } } + /** + * @param array|bool $currentRowData + * @return bool Whether the data for the current row can be returned or if we need to keep reading + */ + protected function shouldReadNextRow($currentRowData) + { + $hasSuccessfullyFetchedRowData = ($currentRowData !== false); + $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); + $isEmptyLine = $this->isEmptyLine($currentRowData); + + return ( + (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) || + (!$this->shouldPreserveEmptyRows && $isEmptyLine) + ); + } + /** * Returns the next row, converted if necessary to UTF-8. * As fgetcsv() does not manage correctly encoding for non UTF-8 data, @@ -154,7 +182,7 @@ class RowIterator implements IteratorInterface protected function getNextUTF8EncodedRow() { $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure); - if (false === $encodedRowData) { + if ($encodedRowData === false) { return false; } @@ -195,7 +223,7 @@ class RowIterator implements IteratorInterface } /** - * @param array $lineData Array containing the cells value for the line + * @param array|bool $lineData Array containing the cells value for the line * @return bool Whether the given line is empty */ protected function isEmptyLine($lineData) diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php index b9c66c7..98dcc7c 100644 --- a/src/Spout/Reader/CSV/Sheet.php +++ b/src/Spout/Reader/CSV/Sheet.php @@ -18,12 +18,21 @@ class Sheet implements SheetInterface * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter Character defining the end of a line * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper) { - $this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->rowIterator = new RowIterator( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper + ); } /** diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php index 0dfc16f..2003599 100644 --- a/src/Spout/Reader/CSV/SheetIterator.php +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -22,12 +22,21 @@ class SheetIterator implements IteratorInterface * @param resource $filePointer * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter Character defining the end of a line * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper) { - $this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->sheet = new Sheet( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper + ); } /** diff --git a/src/Spout/Reader/ODS/Reader.php b/src/Spout/Reader/ODS/Reader.php index a52bafa..d040f90 100644 --- a/src/Spout/Reader/ODS/Reader.php +++ b/src/Spout/Reader/ODS/Reader.php @@ -42,7 +42,7 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates, $this->shouldPreserveEmptyRows); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index 48a78e6..4051583 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -23,33 +23,55 @@ class RowIterator implements IteratorInterface const MAX_COLUMNS_EXCEL = 16384; /** Definition of XML attribute used to parse data */ + const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated'; const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated'; /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; /** @var bool Whether the iterator has already been rewound once */ protected $hasAlreadyBeenRewound = false; - /** @var int Number of read rows */ - protected $numReadRows = 0; - /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ protected $rowDataBuffer = null; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; + /** @var int Last row index processed (one-based) */ + protected $lastRowIndexProcessed = 0; + + /** @var int Row index to be processed next (one-based) */ + protected $nextRowIndexToBeProcessed = 1; + + /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */ + protected $lastProcessedCellValue = null; + + /** @var int Number of times the last processed row should be repeated */ + protected $numRowsRepeated = 1; + + /** @var int Number of times the last cell value should be copied to the cells on its right */ + protected $numColumnsRepeated = 1; + + /** @var bool Whether at least one cell has been read for the row currently being processed */ + protected $hasAlreadyReadOneCellInCurrentRow = false; + + /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($xmlReader, $shouldFormatDates) + public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->xmlReader = $xmlReader; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); } @@ -71,7 +93,8 @@ class RowIterator implements IteratorInterface } $this->hasAlreadyBeenRewound = true; - $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 1; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; @@ -98,61 +121,72 @@ class RowIterator implements IteratorInterface * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() + { + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow($this->xmlReader); + } + + $this->lastRowIndexProcessed++; + } + + /** + * Returns whether we need data for the next row to be processed. + * We don't need to read data if: + * we have already read at least one row + * AND + * we need to preserve empty rows + * AND + * the last row that was read is not the row that need to be processed + * (i.e. if we need to return empty rows) + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + !$this->shouldPreserveEmptyRows || + $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 + ); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow($xmlReader) { $rowData = []; - $cellValue = null; - $numColumnsRepeated = 1; - $numCellsRead = 0; - $hasAlreadyReadOneCell = false; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode(); + while ($xmlReader->read()) { + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + $this->processRowStartingNode($xmlReader); - $node = $this->xmlReader->expand(); - $currentCellValue = $this->getCellValue($node); + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + $rowData = $this->processCellStartingNode($xmlReader, $rowData); - // process cell N only after having read cell N+1 (see below why) - if ($hasAlreadyReadOneCell) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue); + + // if the fetched row is empty and we don't want to preserve it... + if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { + // ... skip it + continue; } - $cellValue = $currentCellValue; - $numColumnsRepeated = $currentNumColumnsRepeated; + $rowData = $this->processRowEndingNode($rowData, $isEmptyRow); - $numCellsRead++; - $hasAlreadyReadOneCell = true; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue)); - if ($isEmptyRow) { - // skip empty rows - $this->next(); - return; - } - - // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. - // The current count of read columns is determined by counting the values in $rowData. - // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" - // with a number-columns-repeated value equals to the number of (supported columns - used columns). - // In Excel, the number of supported columns is 16384, but we don't want to returns rows with - // always 16384 cells. - if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } - $this->numReadRows++; - } + // at this point, we have all the data we need for the row + // so that we can populate the buffer break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { + $this->processTableEndingNode(); break; } } @@ -165,11 +199,99 @@ class RowIterator implements IteratorInterface } /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return void + */ + protected function processRowStartingNode($xmlReader) + { + // Reset data from current row + $this->hasAlreadyReadOneCellInCurrentRow = false; + $this->lastProcessedCellValue = null; + $this->numColumnsRepeated = 1; + $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @param array $rowData Data of all cells read so far + * @return array Original row data + data for the cell that was just read + */ + protected function processCellStartingNode($xmlReader, $rowData) + { + $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); + + $node = $xmlReader->expand(); + $currentCellValue = $this->getCellValue($node); + + // process cell N only after having read cell N+1 (see below why) + if ($this->hasAlreadyReadOneCellInCurrentRow) { + for ($i = 0; $i < $this->numColumnsRepeated; $i++) { + $rowData[] = $this->lastProcessedCellValue; + } + } + + $this->hasAlreadyReadOneCellInCurrentRow = true; + $this->lastProcessedCellValue = $currentCellValue; + $this->numColumnsRepeated = $currentNumColumnsRepeated; + + return $rowData; + } + + /** + * @param array $rowData Data of all cells read so far + * @param bool $isEmptyRow Whether the given row is empty + * @return array + */ + protected function processRowEndingNode($rowData, $isEmptyRow) + { + // if the row is empty, we don't want to return more than one cell + $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; + + // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. + // The current count of read columns is determined by counting the values in $rowData. + // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" + // with a number-columns-repeated value equals to the number of (supported columns - used columns). + // In Excel, the number of supported columns is 16384, but we don't want to returns rows with + // always 16384 cells. + if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { + for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { + $rowData[] = $this->lastProcessedCellValue; + } + } + + // If we are processing row N and the row is repeated M times, + // then the next row to be processed will be row (N+M). + $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; + + return $rowData; + } + + /** + * @return void + */ + protected function processTableEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing + */ + protected function getNumRowsRepeatedForCurrentNode($xmlReader) + { + $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); + return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ - protected function getNumColumnsRepeatedForCurrentNode() + protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { - $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); + $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; } @@ -185,14 +307,21 @@ class RowIterator implements IteratorInterface } /** - * empty() replacement that honours 0 as a valid value + * After finishing processing each cell, a row is considered empty if it contains + * no cells or if the value of the last read cell is an empty string. + * After finishing processing each cell, the last read cell is not part of the + * row data yet (as we still need to apply the "num-columns-repeated" attribute). * - * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value - * @return bool + * @param array $rowData + * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell + * @return bool Whether the row is empty */ - protected function isEmptyCellValue($value) + protected function isEmptyRow($rowData, $lastReadCellValue) { - return (!isset($value) || trim($value) === ''); + return ( + count($rowData) === 0 && + (!isset($lastReadCellValue) || trim($lastReadCellValue) === '') + ); } /** @@ -214,7 +343,7 @@ class RowIterator implements IteratorInterface */ public function key() { - return $this->numReadRows; + return $this->lastRowIndexProcessed; } diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php index 98d00b1..91669e0 100644 --- a/src/Spout/Reader/ODS/Sheet.php +++ b/src/Spout/Reader/ODS/Sheet.php @@ -28,12 +28,13 @@ class Sheet implements SheetInterface /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates); + $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php index 50224c1..2c1cafa 100644 --- a/src/Spout/Reader/ODS/SheetIterator.php +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -27,6 +27,9 @@ class SheetIterator implements IteratorInterface /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -42,12 +45,14 @@ class SheetIterator implements IteratorInterface /** * @param string $filePath Path of the file to be read * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $shouldFormatDates) + public function __construct($filePath, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->shouldFormatDates = $shouldFormatDates; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->xmlReader = new XMLReader(); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ @@ -116,7 +121,7 @@ class SheetIterator implements IteratorInterface $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); - return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex); + return new Sheet($this->xmlReader, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetName, $this->currentSheetIndex); } /** diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index a6ff909..d69fef2 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -29,18 +29,23 @@ class SheetHelper /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** * @param string $filePath Path of the XLSX file being read * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->sharedStringsHelper = $sharedStringsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->shouldFormatDates = $shouldFormatDates; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; } /** @@ -92,7 +97,7 @@ class SheetHelper $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); - return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetIndexZeroBased, $sheetName); } /** diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php index bcf02cc..7532ee7 100644 --- a/src/Spout/Reader/XLSX/Reader.php +++ b/src/Spout/Reader/XLSX/Reader.php @@ -69,7 +69,7 @@ class Reader extends AbstractReader $this->sharedStringsHelper->extractSharedStrings(); } - $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates, $this->shouldPreserveEmptyRows); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index 896222e..e9ff507 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -26,6 +26,7 @@ class RowIterator implements IteratorInterface /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_REF = 'ref'; const XML_ATTRIBUTE_SPANS = 'spans'; + const XML_ATTRIBUTE_ROW_INDEX = 'r'; const XML_ATTRIBUTE_CELL_INDEX = 'r'; /** @var string Path of the XLSX file being read */ @@ -43,7 +44,10 @@ class RowIterator implements IteratorInterface /** @var Helper\StyleHelper $styleHelper Helper to work with styles */ protected $styleHelper; - /** @var int Number of read rows */ + /** + * TODO: This variable can be deleted when row indices get preserved + * @var int Number of read rows + */ protected $numReadRows = 0; /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ @@ -55,6 +59,15 @@ class RowIterator implements IteratorInterface /** @var int The number of columns the sheet has (0 meaning undefined) */ protected $numColumns = 0; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + + /** @var int Last row index processed (one-based) */ + protected $lastRowIndexProcessed = 0; + + /** @var int Row index to be processed next (one-based) */ + protected $nextRowIndexToBeProcessed = 0; + /** @var int Last column index processed (zero-based) */ protected $lastColumnIndexProcessed = -1; @@ -63,8 +76,9 @@ class RowIterator implements IteratorInterface * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); @@ -73,6 +87,8 @@ class RowIterator implements IteratorInterface $this->styleHelper = new StyleHelper($filePath); $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates); + + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; } /** @@ -104,6 +120,8 @@ class RowIterator implements IteratorInterface } $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 0; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; $this->numColumns = 0; @@ -123,7 +141,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows will be skipped. + * Move forward to next element. Reads data describing the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -131,53 +149,73 @@ class RowIterator implements IteratorInterface * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() + { + $this->nextRowIndexToBeProcessed++; + + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow($this->xmlReader); + } + } + + /** + * Returns whether we need data for the next row to be processed. + * We don't need to read data if: + * we have already read at least one row + * AND + * we need to preserve empty rows + * AND + * the last row that was read is not the row that need to be processed + * (i.e. if we need to return empty rows) + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + !$this->shouldPreserveEmptyRows || + $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed + ); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow($xmlReader) { $rowData = []; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { - // Read dimensions of the sheet - $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) - if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { - $lastCellIndex = $matches[1]; - $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + while ($xmlReader->read()) { + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { + $this->processDimensionStartingNode($xmlReader); + + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + $rowData = $this->processRowStartingNode($xmlReader); + + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + $rowData = $this->processCellStartingNode($xmlReader, $rowData); + + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + // if the fetched row is empty and we don't want to preserve it.., + if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($rowData)) { + // ... skip it + continue; } - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { - // Start of the row description + $rowData = $this->processRowEndingNode($rowData); - // Reset index of the last processed column - $this->lastColumnIndexProcessed = -1; - - // Read spans info if present - $numberOfColumnsForRow = $this->numColumns; - $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance - if ($spans) { - list(, $numberOfColumnsForRow) = explode(':', $spans); - $numberOfColumnsForRow = intval($numberOfColumnsForRow); - } - $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentColumnIndex = $this->getCellIndex($this->xmlReader); - - $node = $this->xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node); - - $this->lastColumnIndexProcessed = $currentColumnIndex; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - // If needed, we fill the empty cells - $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - $this->numReadRows++; + // at this point, we have all the data we need for the row + // so that we can populate the buffer break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { + $this->processWorksheetEndingNode(); break; } } @@ -190,11 +228,101 @@ class RowIterator implements IteratorInterface } /** - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" tag - * @return int + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return void + */ + protected function processDimensionStartingNode($xmlReader) + { + // Read dimensions of the sheet + $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) + if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { + $lastCellIndex = $matches[1]; + $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + } + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return array + */ + protected function processRowStartingNode($xmlReader) + { + // Reset index of the last processed column + $this->lastColumnIndexProcessed = -1; + + // Mark the last processed row as the one currently being read + $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); + + // Read spans info if present + $numberOfColumnsForRow = $this->numColumns; + $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance + if ($spans) { + list(, $numberOfColumnsForRow) = explode(':', $spans); + $numberOfColumnsForRow = intval($numberOfColumnsForRow); + } + + return ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @param array $rowData Data of all cells read so far (key = cell index, value = cell value) + * @return array Original row data + data for the cell that was just read (key = cell index, value = cell value) + */ + protected function processCellStartingNode($xmlReader, $rowData) + { + $currentColumnIndex = $this->getColumnIndex($xmlReader); + + $node = $xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node); + + $this->lastColumnIndexProcessed = $currentColumnIndex; + + return $rowData; + } + + /** + * @param array $rowData Data of all cells read so far (key = cell index, value = cell value) + * @return array + */ + protected function processRowEndingNode($rowData) + { + $this->numReadRows++; + + // If needed, we fill the empty cells + return ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); + } + + /** + * @return void + */ + protected function processWorksheetEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" node + * @return int Row index * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid */ - protected function getCellIndex($xmlReader) + protected function getRowIndex($xmlReader) + { + // Get "r" attribute if present (from something like + $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX); + + return ($currentRowIndex !== null) ? + intval($currentRowIndex) : + $this->lastRowIndexProcessed + 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" node + * @return int Column index + * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid + */ + protected function getColumnIndex($xmlReader) { // Get "r" attribute if present (from something like $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); @@ -216,25 +344,53 @@ class RowIterator implements IteratorInterface } /** - * Return the current element, from the buffer. + * @param array $rowData + * @return bool Whether the given row is empty + */ + protected function isEmptyRow($rowData) + { + return (count($rowData) === 1 && $rowData[0] === ''); + } + + /** + * Return the current element, either an empty row or from the buffer. * @link http://php.net/manual/en/iterator.current.php * * @return array|null */ public function current() { - return $this->rowDataBuffer; + $rowDataForRowToBeProcessed = $this->rowDataBuffer; + + if ($this->shouldPreserveEmptyRows) { + // when we need to preserve empty rows, we will either return + // an empty row or the last row read. This depends whether the + // index of last row that was read matches the index of the last + // row whose value should be returned. + if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { + // return empty row if mismatch between last processed row + // and the row that needs to be returned + $rowDataForRowToBeProcessed = ['']; + } + } + + return $rowDataForRowToBeProcessed; } /** - * Return the key of the current element + * Return the key of the current element. Here, the row index. * @link http://php.net/manual/en/iterator.key.php * * @return int */ public function key() { - return $this->numReadRows; + // TODO: This should return $this->nextRowIndexToBeProcessed + // but to avoid a breaking change, the return value for + // this function has been kept as the number of rows read. + return $this->shouldPreserveEmptyRows ? + $this->nextRowIndexToBeProcessed : + $this->numReadRows; } diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php index a1c7d95..b2405ae 100644 --- a/src/Spout/Reader/XLSX/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -26,12 +26,13 @@ class Sheet implements SheetInterface * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper Helper to work with shared strings * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates); + $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php index f286cea..88cd350 100644 --- a/src/Spout/Reader/XLSX/SheetIterator.php +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -25,12 +25,13 @@ class SheetIterator implements IteratorInterface * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows) { // Fetch all available sheets - $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates); + $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows); $this->sheets = $sheetHelper->getSheets(); if (count($this->sheets) === 0) { diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index f806fd2..429ffa6 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -115,29 +115,40 @@ class ReaderTest extends \PHPUnit_Framework_TestCase } /** - * @return array + * @return void */ - public function dataProviderForTestReadShouldSkipEmptyLines() + public function testReadShouldSkipEmptyLinesIfShouldPreserveEmptyRowsNotSet() { - return [ - ['csv_with_empty_line.csv'], - ['csv_with_empty_last_line.csv'], + $allRows = $this->getAllRowsForFile('csv_with_multiple_empty_lines.csv'); + + $expectedRows = [ + // skipped row here + ['csv--21', 'csv--22', 'csv--23'], + // skipped row here + ['csv--41', 'csv--42', 'csv--43'], + // skipped row here + // last row empty ]; + $this->assertEquals($expectedRows, $allRows); } /** - * @dataProvider dataProviderForTestReadShouldSkipEmptyLines - * - * @param string $fileName * @return void */ - public function testReadShouldSkipEmptyLines($fileName) + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() { - $allRows = $this->getAllRowsForFile($fileName); + $allRows = $this->getAllRowsForFile( + 'csv_with_multiple_empty_lines.csv', + ',', '"', "\n", EncodingHelper::ENCODING_UTF8, + $shouldPreserveEmptyRows = true + ); $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--31', 'csv--32', 'csv--33'], + [''], + ['csv--21', 'csv--22', 'csv--23'], + [''], + ['csv--41', 'csv--42', 'csv--43'], + [''], ]; $this->assertEquals($expectedRows, $allRows); } @@ -204,6 +215,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals('This is, a comma', $allRows[0][0]); } + /** + * @return void + */ + public function testReadCustomEOLs() + { + $allRows = $this->getAllRowsForFile('csv_with_CR_EOL.csv', ',', '"', "\r"); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ @@ -236,7 +262,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSkipBom($fileName, $fileEncoding) { - $allRows = $this->getAllRowsForFile($fileName, ',', '"', $fileEncoding); + $allRows = $this->getAllRowsForFile($fileName, ',', '"', "\n", $fileEncoding); $expectedRows = [ ['csv--11', 'csv--12', 'csv--13'], @@ -275,6 +301,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper|\PHPUnit_Framework_MockObject_MockObject $helperStub */ $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') ->setMethods(['function_exists']) ->getMock(); @@ -405,14 +432,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param string|void $fieldDelimiter * @param string|void $fieldEnclosure + * @param string|void $endOfLineCharacter * @param string|void $encoding + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ private function getAllRowsForFile( $fileName, $fieldDelimiter = ',', $fieldEnclosure = '"', - $encoding = EncodingHelper::ENCODING_UTF8) + $endOfLineCharacter = "\n", + $encoding = EncodingHelper::ENCODING_UTF8, + $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); @@ -422,7 +453,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $reader ->setFieldDelimiter($fieldDelimiter) ->setFieldEnclosure($fieldEnclosure) + ->setEndOfLineCharacter($endOfLineCharacter) ->setEncoding($encoding) + ->setShouldPreserveEmptyRows($shouldPreserveEmptyRows) ->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { @@ -436,51 +469,6 @@ class ReaderTest extends \PHPUnit_Framework_TestCase return $allRows; } - /** - * @return array - */ - public function dataProviderForTestReadCustomEOL() - { - return [ - ['csv_with_CR_EOL.csv', "\r"], - ['csv_standard.csv', "\n"], - ]; - } - - /** - * @dataProvider dataProviderForTestReadCustomEOL - * - * @param string $fileName - * @param string $customEOL - * @return void - */ - public function testReadCustomEOLs($fileName, $customEOL) - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - /** @var \Box\Spout\Reader\CSV\Reader $reader */ - $reader = ReaderFactory::create(Type::CSV); - $reader - ->setEndOfLineCharacter($customEOL) - ->open($resourcePath); - - foreach ($reader->getSheetIterator() as $sheet) { - foreach ($sheet->getRowIterator() as $row) { - $allRows[] = $row; - } - } - - $reader->close(); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - /** * @return void */ diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index dee4164..d8ec39b 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -211,15 +211,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @return void */ - public function testReadShouldSkipEmptyRow() + public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods'); - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods'); + + $this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped'); $expectedRows = [ - ['ods--11', 'ods--12', 'ods--13'], - // row skipped here + // skipped row here ['ods--21', 'ods--22', 'ods--23'], + // skipped row here + // skipped row here + ['ods--51', 'ods--52', 'ods--53'], + ['ods--61', 'ods--62', 'ods--63'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods', false, true); + + $this->assertEquals(6, count($allRows), 'There should be 6 rows'); + + $expectedRows = [ + [''], + ['ods--21', 'ods--22', 'ods--23'], + [''], + [''], + ['ods--51', 'ods--52', 'ods--53'], + ['ods--61', 'ods--62', 'ods--63'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -485,15 +509,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName * @param bool|void $shouldFormatDates + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Reader\ODS\Reader $reader */ $reader = ReaderFactory::create(Type::ODS); $reader->setShouldFormatDates($shouldFormatDates); + $reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index dffbc26..51b79d4 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -352,16 +352,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @return void */ - public function testReadShouldSkipEmptyRows() + public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx'); + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows_and_missing_row_index.xlsx'); - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + $this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped'); $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], // skipped row here - ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + ['s1--A2', 's1--B2', 's1--C2'], + // skipped row here + // skipped row here + ['s1--A5', 's1--B5', 's1--C5'], + ['s1--A6', 's1--B6', 's1--C6'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows_and_missing_row_index.xlsx', false, true); + + $this->assertEquals(6, count($allRows), 'There should be 6 rows'); + + $expectedRows = [ + [''], + ['s1--A2', 's1--B2', 's1--C2'], + [''], + [''], + ['s1--A5', 's1--B5', 's1--C5'], + ['s1--A6', 's1--B6', 's1--C6'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -595,15 +618,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName * @param bool|void $shouldFormatDates + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Reader\XLSX\Reader $reader */ $reader = ReaderFactory::create(Type::XLSX); $reader->setShouldFormatDates($shouldFormatDates); + $reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/resources/csv/csv_with_empty_last_line.csv b/tests/resources/csv/csv_with_empty_last_line.csv deleted file mode 100644 index 8892982..0000000 --- a/tests/resources/csv/csv_with_empty_last_line.csv +++ /dev/null @@ -1,2 +0,0 @@ -csv--11,csv--12,csv--13 -csv--31,csv--32,csv--33 diff --git a/tests/resources/csv/csv_with_empty_line.csv b/tests/resources/csv/csv_with_empty_line.csv deleted file mode 100644 index 8da735f..0000000 --- a/tests/resources/csv/csv_with_empty_line.csv +++ /dev/null @@ -1,3 +0,0 @@ -csv--11,csv--12,csv--13 - -csv--31,csv--32,csv--33 \ No newline at end of file diff --git a/tests/resources/csv/csv_with_multiple_empty_lines.csv b/tests/resources/csv/csv_with_multiple_empty_lines.csv new file mode 100644 index 0000000..c25f253 --- /dev/null +++ b/tests/resources/csv/csv_with_multiple_empty_lines.csv @@ -0,0 +1,5 @@ + +csv--21,csv--22,csv--23 + +csv--41,csv--42,csv--43 + diff --git a/tests/resources/ods/sheet_with_empty_row.ods b/tests/resources/ods/sheet_with_empty_row.ods deleted file mode 100644 index 4763df0..0000000 Binary files a/tests/resources/ods/sheet_with_empty_row.ods and /dev/null differ diff --git a/tests/resources/ods/sheet_with_empty_rows.ods b/tests/resources/ods/sheet_with_empty_rows.ods new file mode 100644 index 0000000..e7ad29a Binary files /dev/null and b/tests/resources/ods/sheet_with_empty_rows.ods differ diff --git a/tests/resources/xlsx/sheet_with_empty_row.xlsx b/tests/resources/xlsx/sheet_with_empty_row.xlsx deleted file mode 100644 index b9330b0..0000000 Binary files a/tests/resources/xlsx/sheet_with_empty_row.xlsx and /dev/null differ diff --git a/tests/resources/xlsx/sheet_with_empty_rows_and_missing_row_index.xlsx b/tests/resources/xlsx/sheet_with_empty_rows_and_missing_row_index.xlsx new file mode 100644 index 0000000..2f5cf15 Binary files /dev/null and b/tests/resources/xlsx/sheet_with_empty_rows_and_missing_row_index.xlsx differ