From 0978d340f0869be89ae11afdd539cba446969d13 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 17 Oct 2016 10:20:02 -0700 Subject: [PATCH] Option to keep empty rows (#331) * Add option to preserve empty rows when reading an XLSX file * Add option to preserve empty rows when reading a CSV file * Add option to preserve empty rows when reading an ODS file --- src/Spout/Reader/AbstractReader.php | 17 ++ src/Spout/Reader/CSV/Reader.php | 3 +- src/Spout/Reader/CSV/RowIterator.php | 50 +++- src/Spout/Reader/CSV/Sheet.php | 13 +- src/Spout/Reader/CSV/SheetIterator.php | 13 +- src/Spout/Reader/ODS/Reader.php | 2 +- src/Spout/Reader/ODS/RowIterator.php | 243 +++++++++++++---- src/Spout/Reader/ODS/Sheet.php | 5 +- src/Spout/Reader/ODS/SheetIterator.php | 9 +- src/Spout/Reader/XLSX/Helper/SheetHelper.php | 9 +- src/Spout/Reader/XLSX/Reader.php | 2 +- src/Spout/Reader/XLSX/RowIterator.php | 252 ++++++++++++++---- src/Spout/Reader/XLSX/Sheet.php | 5 +- src/Spout/Reader/XLSX/SheetIterator.php | 5 +- tests/Spout/Reader/CSV/ReaderTest.php | 106 ++++---- tests/Spout/Reader/ODS/ReaderTest.php | 39 ++- tests/Spout/Reader/XLSX/ReaderTest.php | 38 ++- .../csv/csv_with_empty_last_line.csv | 2 - tests/resources/csv/csv_with_empty_line.csv | 3 - .../csv/csv_with_multiple_empty_lines.csv | 5 + tests/resources/ods/sheet_with_empty_row.ods | Bin 2571 -> 0 bytes tests/resources/ods/sheet_with_empty_rows.ods | Bin 0 -> 9253 bytes .../resources/xlsx/sheet_with_empty_row.xlsx | Bin 3689 -> 0 bytes ...with_empty_rows_and_missing_row_index.xlsx | Bin 0 -> 3685 bytes 24 files changed, 612 insertions(+), 209 deletions(-) delete mode 100644 tests/resources/csv/csv_with_empty_last_line.csv delete mode 100644 tests/resources/csv/csv_with_empty_line.csv create mode 100644 tests/resources/csv/csv_with_multiple_empty_lines.csv delete mode 100644 tests/resources/ods/sheet_with_empty_row.ods create mode 100644 tests/resources/ods/sheet_with_empty_rows.ods delete mode 100644 tests/resources/xlsx/sheet_with_empty_row.xlsx create mode 100644 tests/resources/xlsx/sheet_with_empty_rows_and_missing_row_index.xlsx diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index cb476ab..0c5849d 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -22,6 +22,9 @@ abstract class AbstractReader implements ReaderInterface /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates = false; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows = false; + /** * Returns whether stream wrappers are supported * @@ -64,6 +67,7 @@ abstract class AbstractReader implements ReaderInterface /** * Sets whether date/time values should be returned as PHP objects or be formatted as strings. * + * @api * @param bool $shouldFormatDates * @return AbstractReader */ @@ -73,6 +77,19 @@ abstract class AbstractReader implements ReaderInterface return $this; } + /** + * Sets whether empty rows should be returned or skipped. + * + * @api + * @param bool $shouldPreserveEmptyRows + * @return AbstractReader + */ + public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + { + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; + return $this; + } + /** * Prepares the reader to read the given file. It also makes sure * that the file exists and is readable. diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index ab887ef..688e4cd 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -119,8 +119,9 @@ class Reader extends AbstractReader $this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, - $this->encoding, $this->endOfLineCharacter, + $this->encoding, + $this->shouldPreserveEmptyRows, $this->globalFunctionsHelper ); } diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 1ecbaf1..b805126 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -52,21 +52,26 @@ class RowIterator implements IteratorInterface /** @var string End of line delimiter, given by the user as input. */ protected $inputEOLDelimiter; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields - * @param string $encoding Encoding of the CSV file to be read * @param string $endOfLineDelimiter End of line delimiter + * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $endOfLineDelimiter, $encoding, $shouldPreserveEmptyRows, $globalFunctionsHelper) { $this->filePointer = $filePointer; $this->fieldDelimiter = $fieldDelimiter; $this->fieldEnclosure = $fieldEnclosure; $this->encoding = $encoding; $this->inputEOLDelimiter = $endOfLineDelimiter; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->encodingHelper = new EncodingHelper($globalFunctionsHelper); @@ -114,7 +119,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows are skipped. + * Move forward to next element. Reads data for the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -124,25 +129,48 @@ class RowIterator implements IteratorInterface { $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - if ($this->hasReachedEndOfFile) { - return; + if (!$this->hasReachedEndOfFile) { + $this->readDataForNextRow(); } + } + /** + * @return void + * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 + */ + protected function readDataForNextRow() + { do { $rowData = $this->getNextUTF8EncodedRow(); - $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData)); + } while ($this->shouldReadNextRow($rowData)); if ($rowData !== false) { - $this->rowDataBuffer = $rowData; + // str_replace will replace NULL values by empty strings + $this->rowDataBuffer = str_replace(null, null, $rowData); $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. // This happens when the last lines are empty lines. - $this->hasReachedEndOfFile = $hasNowReachedEndOfFile; + $this->hasReachedEndOfFile = true; } } + /** + * @param array|bool $currentRowData + * @return bool Whether the data for the current row can be returned or if we need to keep reading + */ + protected function shouldReadNextRow($currentRowData) + { + $hasSuccessfullyFetchedRowData = ($currentRowData !== false); + $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); + $isEmptyLine = $this->isEmptyLine($currentRowData); + + return ( + (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) || + (!$this->shouldPreserveEmptyRows && $isEmptyLine) + ); + } + /** * Returns the next row, converted if necessary to UTF-8. * As fgetcsv() does not manage correctly encoding for non UTF-8 data, @@ -154,7 +182,7 @@ class RowIterator implements IteratorInterface protected function getNextUTF8EncodedRow() { $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure); - if (false === $encodedRowData) { + if ($encodedRowData === false) { return false; } @@ -195,7 +223,7 @@ class RowIterator implements IteratorInterface } /** - * @param array $lineData Array containing the cells value for the line + * @param array|bool $lineData Array containing the cells value for the line * @return bool Whether the given line is empty */ protected function isEmptyLine($lineData) diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php index b9c66c7..98dcc7c 100644 --- a/src/Spout/Reader/CSV/Sheet.php +++ b/src/Spout/Reader/CSV/Sheet.php @@ -18,12 +18,21 @@ class Sheet implements SheetInterface * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter Character defining the end of a line * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper) { - $this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->rowIterator = new RowIterator( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper + ); } /** diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php index 0dfc16f..2003599 100644 --- a/src/Spout/Reader/CSV/SheetIterator.php +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -22,12 +22,21 @@ class SheetIterator implements IteratorInterface * @param resource $filePointer * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter Character defining the end of a line * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper) { - $this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->sheet = new Sheet( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper + ); } /** diff --git a/src/Spout/Reader/ODS/Reader.php b/src/Spout/Reader/ODS/Reader.php index a52bafa..d040f90 100644 --- a/src/Spout/Reader/ODS/Reader.php +++ b/src/Spout/Reader/ODS/Reader.php @@ -42,7 +42,7 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates, $this->shouldPreserveEmptyRows); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index 48a78e6..4051583 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -23,33 +23,55 @@ class RowIterator implements IteratorInterface const MAX_COLUMNS_EXCEL = 16384; /** Definition of XML attribute used to parse data */ + const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated'; const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated'; /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; /** @var bool Whether the iterator has already been rewound once */ protected $hasAlreadyBeenRewound = false; - /** @var int Number of read rows */ - protected $numReadRows = 0; - /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ protected $rowDataBuffer = null; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; + /** @var int Last row index processed (one-based) */ + protected $lastRowIndexProcessed = 0; + + /** @var int Row index to be processed next (one-based) */ + protected $nextRowIndexToBeProcessed = 1; + + /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */ + protected $lastProcessedCellValue = null; + + /** @var int Number of times the last processed row should be repeated */ + protected $numRowsRepeated = 1; + + /** @var int Number of times the last cell value should be copied to the cells on its right */ + protected $numColumnsRepeated = 1; + + /** @var bool Whether at least one cell has been read for the row currently being processed */ + protected $hasAlreadyReadOneCellInCurrentRow = false; + + /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($xmlReader, $shouldFormatDates) + public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->xmlReader = $xmlReader; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); } @@ -71,7 +93,8 @@ class RowIterator implements IteratorInterface } $this->hasAlreadyBeenRewound = true; - $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 1; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; @@ -98,61 +121,72 @@ class RowIterator implements IteratorInterface * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() + { + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow($this->xmlReader); + } + + $this->lastRowIndexProcessed++; + } + + /** + * Returns whether we need data for the next row to be processed. + * We don't need to read data if: + * we have already read at least one row + * AND + * we need to preserve empty rows + * AND + * the last row that was read is not the row that need to be processed + * (i.e. if we need to return empty rows) + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + !$this->shouldPreserveEmptyRows || + $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 + ); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow($xmlReader) { $rowData = []; - $cellValue = null; - $numColumnsRepeated = 1; - $numCellsRead = 0; - $hasAlreadyReadOneCell = false; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode(); + while ($xmlReader->read()) { + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + $this->processRowStartingNode($xmlReader); - $node = $this->xmlReader->expand(); - $currentCellValue = $this->getCellValue($node); + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + $rowData = $this->processCellStartingNode($xmlReader, $rowData); - // process cell N only after having read cell N+1 (see below why) - if ($hasAlreadyReadOneCell) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue); + + // if the fetched row is empty and we don't want to preserve it... + if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { + // ... skip it + continue; } - $cellValue = $currentCellValue; - $numColumnsRepeated = $currentNumColumnsRepeated; + $rowData = $this->processRowEndingNode($rowData, $isEmptyRow); - $numCellsRead++; - $hasAlreadyReadOneCell = true; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue)); - if ($isEmptyRow) { - // skip empty rows - $this->next(); - return; - } - - // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. - // The current count of read columns is determined by counting the values in $rowData. - // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" - // with a number-columns-repeated value equals to the number of (supported columns - used columns). - // In Excel, the number of supported columns is 16384, but we don't want to returns rows with - // always 16384 cells. - if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } - $this->numReadRows++; - } + // at this point, we have all the data we need for the row + // so that we can populate the buffer break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { + $this->processTableEndingNode(); break; } } @@ -165,11 +199,99 @@ class RowIterator implements IteratorInterface } /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return void + */ + protected function processRowStartingNode($xmlReader) + { + // Reset data from current row + $this->hasAlreadyReadOneCellInCurrentRow = false; + $this->lastProcessedCellValue = null; + $this->numColumnsRepeated = 1; + $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @param array $rowData Data of all cells read so far + * @return array Original row data + data for the cell that was just read + */ + protected function processCellStartingNode($xmlReader, $rowData) + { + $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); + + $node = $xmlReader->expand(); + $currentCellValue = $this->getCellValue($node); + + // process cell N only after having read cell N+1 (see below why) + if ($this->hasAlreadyReadOneCellInCurrentRow) { + for ($i = 0; $i < $this->numColumnsRepeated; $i++) { + $rowData[] = $this->lastProcessedCellValue; + } + } + + $this->hasAlreadyReadOneCellInCurrentRow = true; + $this->lastProcessedCellValue = $currentCellValue; + $this->numColumnsRepeated = $currentNumColumnsRepeated; + + return $rowData; + } + + /** + * @param array $rowData Data of all cells read so far + * @param bool $isEmptyRow Whether the given row is empty + * @return array + */ + protected function processRowEndingNode($rowData, $isEmptyRow) + { + // if the row is empty, we don't want to return more than one cell + $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; + + // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. + // The current count of read columns is determined by counting the values in $rowData. + // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" + // with a number-columns-repeated value equals to the number of (supported columns - used columns). + // In Excel, the number of supported columns is 16384, but we don't want to returns rows with + // always 16384 cells. + if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { + for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { + $rowData[] = $this->lastProcessedCellValue; + } + } + + // If we are processing row N and the row is repeated M times, + // then the next row to be processed will be row (N+M). + $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; + + return $rowData; + } + + /** + * @return void + */ + protected function processTableEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing + */ + protected function getNumRowsRepeatedForCurrentNode($xmlReader) + { + $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); + return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ - protected function getNumColumnsRepeatedForCurrentNode() + protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { - $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); + $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; } @@ -185,14 +307,21 @@ class RowIterator implements IteratorInterface } /** - * empty() replacement that honours 0 as a valid value + * After finishing processing each cell, a row is considered empty if it contains + * no cells or if the value of the last read cell is an empty string. + * After finishing processing each cell, the last read cell is not part of the + * row data yet (as we still need to apply the "num-columns-repeated" attribute). * - * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value - * @return bool + * @param array $rowData + * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell + * @return bool Whether the row is empty */ - protected function isEmptyCellValue($value) + protected function isEmptyRow($rowData, $lastReadCellValue) { - return (!isset($value) || trim($value) === ''); + return ( + count($rowData) === 0 && + (!isset($lastReadCellValue) || trim($lastReadCellValue) === '') + ); } /** @@ -214,7 +343,7 @@ class RowIterator implements IteratorInterface */ public function key() { - return $this->numReadRows; + return $this->lastRowIndexProcessed; } diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php index 98d00b1..91669e0 100644 --- a/src/Spout/Reader/ODS/Sheet.php +++ b/src/Spout/Reader/ODS/Sheet.php @@ -28,12 +28,13 @@ class Sheet implements SheetInterface /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates); + $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php index 50224c1..2c1cafa 100644 --- a/src/Spout/Reader/ODS/SheetIterator.php +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -27,6 +27,9 @@ class SheetIterator implements IteratorInterface /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -42,12 +45,14 @@ class SheetIterator implements IteratorInterface /** * @param string $filePath Path of the file to be read * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $shouldFormatDates) + public function __construct($filePath, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->shouldFormatDates = $shouldFormatDates; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->xmlReader = new XMLReader(); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ @@ -116,7 +121,7 @@ class SheetIterator implements IteratorInterface $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); - return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex); + return new Sheet($this->xmlReader, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetName, $this->currentSheetIndex); } /** diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index a6ff909..d69fef2 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -29,18 +29,23 @@ class SheetHelper /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** * @param string $filePath Path of the XLSX file being read * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->sharedStringsHelper = $sharedStringsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->shouldFormatDates = $shouldFormatDates; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; } /** @@ -92,7 +97,7 @@ class SheetHelper $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); - return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetIndexZeroBased, $sheetName); } /** diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php index bcf02cc..7532ee7 100644 --- a/src/Spout/Reader/XLSX/Reader.php +++ b/src/Spout/Reader/XLSX/Reader.php @@ -69,7 +69,7 @@ class Reader extends AbstractReader $this->sharedStringsHelper->extractSharedStrings(); } - $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates, $this->shouldPreserveEmptyRows); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index 896222e..e9ff507 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -26,6 +26,7 @@ class RowIterator implements IteratorInterface /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_REF = 'ref'; const XML_ATTRIBUTE_SPANS = 'spans'; + const XML_ATTRIBUTE_ROW_INDEX = 'r'; const XML_ATTRIBUTE_CELL_INDEX = 'r'; /** @var string Path of the XLSX file being read */ @@ -43,7 +44,10 @@ class RowIterator implements IteratorInterface /** @var Helper\StyleHelper $styleHelper Helper to work with styles */ protected $styleHelper; - /** @var int Number of read rows */ + /** + * TODO: This variable can be deleted when row indices get preserved + * @var int Number of read rows + */ protected $numReadRows = 0; /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ @@ -55,6 +59,15 @@ class RowIterator implements IteratorInterface /** @var int The number of columns the sheet has (0 meaning undefined) */ protected $numColumns = 0; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + + /** @var int Last row index processed (one-based) */ + protected $lastRowIndexProcessed = 0; + + /** @var int Row index to be processed next (one-based) */ + protected $nextRowIndexToBeProcessed = 0; + /** @var int Last column index processed (zero-based) */ protected $lastColumnIndexProcessed = -1; @@ -63,8 +76,9 @@ class RowIterator implements IteratorInterface * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); @@ -73,6 +87,8 @@ class RowIterator implements IteratorInterface $this->styleHelper = new StyleHelper($filePath); $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates); + + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; } /** @@ -104,6 +120,8 @@ class RowIterator implements IteratorInterface } $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 0; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; $this->numColumns = 0; @@ -123,7 +141,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows will be skipped. + * Move forward to next element. Reads data describing the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -131,53 +149,73 @@ class RowIterator implements IteratorInterface * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() + { + $this->nextRowIndexToBeProcessed++; + + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow($this->xmlReader); + } + } + + /** + * Returns whether we need data for the next row to be processed. + * We don't need to read data if: + * we have already read at least one row + * AND + * we need to preserve empty rows + * AND + * the last row that was read is not the row that need to be processed + * (i.e. if we need to return empty rows) + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + !$this->shouldPreserveEmptyRows || + $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed + ); + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow($xmlReader) { $rowData = []; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { - // Read dimensions of the sheet - $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) - if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { - $lastCellIndex = $matches[1]; - $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + while ($xmlReader->read()) { + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { + $this->processDimensionStartingNode($xmlReader); + + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { + $rowData = $this->processRowStartingNode($xmlReader); + + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + $rowData = $this->processCellStartingNode($xmlReader, $rowData); + + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + // if the fetched row is empty and we don't want to preserve it.., + if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($rowData)) { + // ... skip it + continue; } - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { - // Start of the row description + $rowData = $this->processRowEndingNode($rowData); - // Reset index of the last processed column - $this->lastColumnIndexProcessed = -1; - - // Read spans info if present - $numberOfColumnsForRow = $this->numColumns; - $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance - if ($spans) { - list(, $numberOfColumnsForRow) = explode(':', $spans); - $numberOfColumnsForRow = intval($numberOfColumnsForRow); - } - $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentColumnIndex = $this->getCellIndex($this->xmlReader); - - $node = $this->xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node); - - $this->lastColumnIndexProcessed = $currentColumnIndex; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - // If needed, we fill the empty cells - $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - $this->numReadRows++; + // at this point, we have all the data we need for the row + // so that we can populate the buffer break; - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { + $this->processWorksheetEndingNode(); break; } } @@ -190,11 +228,101 @@ class RowIterator implements IteratorInterface } /** - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" tag - * @return int + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return void + */ + protected function processDimensionStartingNode($xmlReader) + { + // Read dimensions of the sheet + $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) + if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { + $lastCellIndex = $matches[1]; + $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + } + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return array + */ + protected function processRowStartingNode($xmlReader) + { + // Reset index of the last processed column + $this->lastColumnIndexProcessed = -1; + + // Mark the last processed row as the one currently being read + $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); + + // Read spans info if present + $numberOfColumnsForRow = $this->numColumns; + $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance + if ($spans) { + list(, $numberOfColumnsForRow) = explode(':', $spans); + $numberOfColumnsForRow = intval($numberOfColumnsForRow); + } + + return ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @param array $rowData Data of all cells read so far (key = cell index, value = cell value) + * @return array Original row data + data for the cell that was just read (key = cell index, value = cell value) + */ + protected function processCellStartingNode($xmlReader, $rowData) + { + $currentColumnIndex = $this->getColumnIndex($xmlReader); + + $node = $xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node); + + $this->lastColumnIndexProcessed = $currentColumnIndex; + + return $rowData; + } + + /** + * @param array $rowData Data of all cells read so far (key = cell index, value = cell value) + * @return array + */ + protected function processRowEndingNode($rowData) + { + $this->numReadRows++; + + // If needed, we fill the empty cells + return ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); + } + + /** + * @return void + */ + protected function processWorksheetEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" node + * @return int Row index * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid */ - protected function getCellIndex($xmlReader) + protected function getRowIndex($xmlReader) + { + // Get "r" attribute if present (from something like + $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX); + + return ($currentRowIndex !== null) ? + intval($currentRowIndex) : + $this->lastRowIndexProcessed + 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" node + * @return int Column index + * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid + */ + protected function getColumnIndex($xmlReader) { // Get "r" attribute if present (from something like $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); @@ -216,25 +344,53 @@ class RowIterator implements IteratorInterface } /** - * Return the current element, from the buffer. + * @param array $rowData + * @return bool Whether the given row is empty + */ + protected function isEmptyRow($rowData) + { + return (count($rowData) === 1 && $rowData[0] === ''); + } + + /** + * Return the current element, either an empty row or from the buffer. * @link http://php.net/manual/en/iterator.current.php * * @return array|null */ public function current() { - return $this->rowDataBuffer; + $rowDataForRowToBeProcessed = $this->rowDataBuffer; + + if ($this->shouldPreserveEmptyRows) { + // when we need to preserve empty rows, we will either return + // an empty row or the last row read. This depends whether the + // index of last row that was read matches the index of the last + // row whose value should be returned. + if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { + // return empty row if mismatch between last processed row + // and the row that needs to be returned + $rowDataForRowToBeProcessed = ['']; + } + } + + return $rowDataForRowToBeProcessed; } /** - * Return the key of the current element + * Return the key of the current element. Here, the row index. * @link http://php.net/manual/en/iterator.key.php * * @return int */ public function key() { - return $this->numReadRows; + // TODO: This should return $this->nextRowIndexToBeProcessed + // but to avoid a breaking change, the return value for + // this function has been kept as the number of rows read. + return $this->shouldPreserveEmptyRows ? + $this->nextRowIndexToBeProcessed : + $this->numReadRows; } diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php index a1c7d95..b2405ae 100644 --- a/src/Spout/Reader/XLSX/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -26,12 +26,13 @@ class Sheet implements SheetInterface * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper Helper to work with shared strings * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates); + $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php index f286cea..88cd350 100644 --- a/src/Spout/Reader/XLSX/SheetIterator.php +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -25,12 +25,13 @@ class SheetIterator implements IteratorInterface * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows) { // Fetch all available sheets - $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates); + $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows); $this->sheets = $sheetHelper->getSheets(); if (count($this->sheets) === 0) { diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index f806fd2..429ffa6 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -115,29 +115,40 @@ class ReaderTest extends \PHPUnit_Framework_TestCase } /** - * @return array + * @return void */ - public function dataProviderForTestReadShouldSkipEmptyLines() + public function testReadShouldSkipEmptyLinesIfShouldPreserveEmptyRowsNotSet() { - return [ - ['csv_with_empty_line.csv'], - ['csv_with_empty_last_line.csv'], + $allRows = $this->getAllRowsForFile('csv_with_multiple_empty_lines.csv'); + + $expectedRows = [ + // skipped row here + ['csv--21', 'csv--22', 'csv--23'], + // skipped row here + ['csv--41', 'csv--42', 'csv--43'], + // skipped row here + // last row empty ]; + $this->assertEquals($expectedRows, $allRows); } /** - * @dataProvider dataProviderForTestReadShouldSkipEmptyLines - * - * @param string $fileName * @return void */ - public function testReadShouldSkipEmptyLines($fileName) + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() { - $allRows = $this->getAllRowsForFile($fileName); + $allRows = $this->getAllRowsForFile( + 'csv_with_multiple_empty_lines.csv', + ',', '"', "\n", EncodingHelper::ENCODING_UTF8, + $shouldPreserveEmptyRows = true + ); $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--31', 'csv--32', 'csv--33'], + [''], + ['csv--21', 'csv--22', 'csv--23'], + [''], + ['csv--41', 'csv--42', 'csv--43'], + [''], ]; $this->assertEquals($expectedRows, $allRows); } @@ -204,6 +215,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals('This is, a comma', $allRows[0][0]); } + /** + * @return void + */ + public function testReadCustomEOLs() + { + $allRows = $this->getAllRowsForFile('csv_with_CR_EOL.csv', ',', '"', "\r"); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ @@ -236,7 +262,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSkipBom($fileName, $fileEncoding) { - $allRows = $this->getAllRowsForFile($fileName, ',', '"', $fileEncoding); + $allRows = $this->getAllRowsForFile($fileName, ',', '"', "\n", $fileEncoding); $expectedRows = [ ['csv--11', 'csv--12', 'csv--13'], @@ -275,6 +301,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper|\PHPUnit_Framework_MockObject_MockObject $helperStub */ $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') ->setMethods(['function_exists']) ->getMock(); @@ -405,14 +432,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param string|void $fieldDelimiter * @param string|void $fieldEnclosure + * @param string|void $endOfLineCharacter * @param string|void $encoding + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ private function getAllRowsForFile( $fileName, $fieldDelimiter = ',', $fieldEnclosure = '"', - $encoding = EncodingHelper::ENCODING_UTF8) + $endOfLineCharacter = "\n", + $encoding = EncodingHelper::ENCODING_UTF8, + $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); @@ -422,7 +453,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $reader ->setFieldDelimiter($fieldDelimiter) ->setFieldEnclosure($fieldEnclosure) + ->setEndOfLineCharacter($endOfLineCharacter) ->setEncoding($encoding) + ->setShouldPreserveEmptyRows($shouldPreserveEmptyRows) ->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { @@ -436,51 +469,6 @@ class ReaderTest extends \PHPUnit_Framework_TestCase return $allRows; } - /** - * @return array - */ - public function dataProviderForTestReadCustomEOL() - { - return [ - ['csv_with_CR_EOL.csv', "\r"], - ['csv_standard.csv', "\n"], - ]; - } - - /** - * @dataProvider dataProviderForTestReadCustomEOL - * - * @param string $fileName - * @param string $customEOL - * @return void - */ - public function testReadCustomEOLs($fileName, $customEOL) - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - /** @var \Box\Spout\Reader\CSV\Reader $reader */ - $reader = ReaderFactory::create(Type::CSV); - $reader - ->setEndOfLineCharacter($customEOL) - ->open($resourcePath); - - foreach ($reader->getSheetIterator() as $sheet) { - foreach ($sheet->getRowIterator() as $row) { - $allRows[] = $row; - } - } - - $reader->close(); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - /** * @return void */ diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index dee4164..d8ec39b 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -211,15 +211,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @return void */ - public function testReadShouldSkipEmptyRow() + public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods'); - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods'); + + $this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped'); $expectedRows = [ - ['ods--11', 'ods--12', 'ods--13'], - // row skipped here + // skipped row here ['ods--21', 'ods--22', 'ods--23'], + // skipped row here + // skipped row here + ['ods--51', 'ods--52', 'ods--53'], + ['ods--61', 'ods--62', 'ods--63'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods', false, true); + + $this->assertEquals(6, count($allRows), 'There should be 6 rows'); + + $expectedRows = [ + [''], + ['ods--21', 'ods--22', 'ods--23'], + [''], + [''], + ['ods--51', 'ods--52', 'ods--53'], + ['ods--61', 'ods--62', 'ods--63'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -485,15 +509,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName * @param bool|void $shouldFormatDates + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Reader\ODS\Reader $reader */ $reader = ReaderFactory::create(Type::ODS); $reader->setShouldFormatDates($shouldFormatDates); + $reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index dffbc26..51b79d4 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -352,16 +352,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @return void */ - public function testReadShouldSkipEmptyRows() + public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx'); + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows_and_missing_row_index.xlsx'); - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + $this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped'); $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], // skipped row here - ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + ['s1--A2', 's1--B2', 's1--C2'], + // skipped row here + // skipped row here + ['s1--A5', 's1--B5', 's1--C5'], + ['s1--A6', 's1--B6', 's1--C6'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows_and_missing_row_index.xlsx', false, true); + + $this->assertEquals(6, count($allRows), 'There should be 6 rows'); + + $expectedRows = [ + [''], + ['s1--A2', 's1--B2', 's1--C2'], + [''], + [''], + ['s1--A5', 's1--B5', 's1--C5'], + ['s1--A6', 's1--B6', 's1--C6'], ]; $this->assertEquals($expectedRows, $allRows); } @@ -595,15 +618,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName * @param bool|void $shouldFormatDates + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName, $shouldFormatDates = false) + private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Reader\XLSX\Reader $reader */ $reader = ReaderFactory::create(Type::XLSX); $reader->setShouldFormatDates($shouldFormatDates); + $reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/resources/csv/csv_with_empty_last_line.csv b/tests/resources/csv/csv_with_empty_last_line.csv deleted file mode 100644 index 8892982..0000000 --- a/tests/resources/csv/csv_with_empty_last_line.csv +++ /dev/null @@ -1,2 +0,0 @@ -csv--11,csv--12,csv--13 -csv--31,csv--32,csv--33 diff --git a/tests/resources/csv/csv_with_empty_line.csv b/tests/resources/csv/csv_with_empty_line.csv deleted file mode 100644 index 8da735f..0000000 --- a/tests/resources/csv/csv_with_empty_line.csv +++ /dev/null @@ -1,3 +0,0 @@ -csv--11,csv--12,csv--13 - -csv--31,csv--32,csv--33 \ No newline at end of file diff --git a/tests/resources/csv/csv_with_multiple_empty_lines.csv b/tests/resources/csv/csv_with_multiple_empty_lines.csv new file mode 100644 index 0000000..c25f253 --- /dev/null +++ b/tests/resources/csv/csv_with_multiple_empty_lines.csv @@ -0,0 +1,5 @@ + +csv--21,csv--22,csv--23 + +csv--41,csv--42,csv--43 + diff --git a/tests/resources/ods/sheet_with_empty_row.ods b/tests/resources/ods/sheet_with_empty_row.ods deleted file mode 100644 index 4763df0bbd15e0770e109dd9b66ebee88df5270c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2571 zcmZ{m2{hDeAIHbomzG=hWlSMEldX_6moTQu7$MsmF*M8!*_xCr(Uhf7)?AV|ON{JU zvvkRlK@75tozM&>WOt|D`@X7k-{*h+=RD{CJm24Q&iDMjpZ|k|v$F96001t46#SU+ z{0Z5d2_OJ4$p--NGQT=|;(gJ0UljrlOR;Xp4?zys%qp@`F-8Y@G(u_5&UHJh;`D>F ztjZ6gq)pl$)h(JH$F&uSWnhAvImK-0{u+XgTr#b`%JvjIu9FVqi0(p&;xay&@+(pwm38gTEC;2iyDt|U z9F%CO8+}~JkJqEz)r+`hZWP8!;X-E2kE%0(;b$CF@dH76v)T8A- z7D{t}1UzMiFw`ivd4GwpKq%~8LJVu58Q1Tojbb4fJl7(Qw%qt{)5Od2%Bvo62RL8l z{}H>rZagh(F}j4m?6AN{lH)OV^Qw3;1SA-WnrtnC!G5m&7F>ryBpqx%t1BN&i$L%_ zq?_%+VK( z()GNaX(BipW8FtKWVK0jwW{)cGP4mHK3gK{FF#v{)Y*fWN@giayz5rn8-N~tze(ny z22sBpnjGICE-!84K%j6`n1}WDH8epc6{NZsn}3HD`F9im09j@fnBD)s!Off@&R>Em z!!8=B{&yP};G(0XaVeeXKw~b?GE*b)<2IZV9^;Dk`JSD!xCZyP5WahhfjpD9CTLn0 z;))G;b+OrW7Q_4k4m+(_RLbjg^-skowGs>hi)EH~((60cj@;FmtUqg>x4587irJhx zQ(LyVTKEc7dX>fd@DZmxS(jRx7J`jo>{8SpsQ{knbjy1ef(5Q&xsMmeQ?sI`Q2IvF zMWeGc@e$l5ZQiXu7=MROou-Q&xg4;to{;l$V`Y zTdB2|JDMaJRC0^+XMg53FMI&9005#)f4G?b;LyHK-*6Hg3&r1s96tY5JA4|)Uj9HK z{I?hf)vgslf*#Dg-l$xDj>daJ8f@ua`&56g^GtN)Bnj6dq;tj20u}3R+9j9g;Uhoy zZcbSnIdNCRR;$kuR@dF*T&DPW!(NdMXF2+u8x{$TtF91DG*?rBfJznS+_)qleX9j9 zQZYUgUy(&&l=A-+zttnw_9 z=ww0+3=_DSRxO!YV-o-6t6aUgK=J+Tgy@VA9yUv+8gd z6(%9-vs_s1sZTx2tzQ>~a*>53cvwF6{%WFzyl~3iPjh2u!FV^`rO%v#%n<0E0w&aa zmQ3>-^5(G2{0N3t4p&72o+{5skGsV=aijC@QKmlGUlBO3FI?1>?1L5$-Mm52UXT;t$-Bo*(3Nfe8mk(aiQ z<&zzA4~Cd~BAYb3iTd@a9RsnV+cpIOV$VZUkxQ<-j`G_^wNm$_x=wuB=^JvV71V?j zxOE7n4*(>bo#WdY P2lKIJ!a405aR7e-7MVx^ diff --git a/tests/resources/ods/sheet_with_empty_rows.ods b/tests/resources/ods/sheet_with_empty_rows.ods new file mode 100644 index 0000000000000000000000000000000000000000..e7ad29a6c7059191903d71ed0b69aae04e72aba0 GIT binary patch literal 9253 zcmdUVbzBu&+x8ZurKF^lQo5wOyF)=5Ho4gw*fg?{mM-aT>F$#51}W)oq(zYr&Ut-2 z=RA7e@B8z+_ixs$nLXDvGxu6^ueq-^%5w1caRC5i0H8QrLBxtf zYoNW04bafW24rDuXm0@lGdqG!m>`Bw3n&xB1_(BR7&}-4!S+m08#|z(3Dg`2v{(Kc zXY`x7rlni5yXDa>r@Fa=wGr6R0t98Y|50VK0hEn2_Xq76$KeF)f2L(l#eMXpV3lNFg~NEr)Qw2W#wRh z&dI^c#lz3b$-%=Vz{kzO$IHhrC?X&%DI_i}D##};C@L;2s4OicCMqT-DJv+0o zXl$%*Zf|IAtNGB`{Gp?%y}!MoyrZSQ?L$+4XLEadM|)RaTkl|hS6hEiM}J>m*U(ts z(0I?tSvoul1@&xc0`2S+D|CznSDyCpUR|D_ez`pV{{8!HU%I)u(Rber000nQ%7}}oJJ0UV-YDUiks#T`b!{uRZ=d@tjmomXQJ<$63}QdVtr51pcRzE;0=M#_fM&PaAgE|~UdRhvwn&)MPn*ul7r-_KyB#{_IAvg8d+T z&BKI}5@2jp^cEWj27;05P*?Xhwsn~TUGai|MV^6vmY(bzUboD#OID68p33Hu6He0O z&2tw@G{I3WrRT~-l&V>qU7Yb`#2TLyb{9$?=emn~OBq#aEfj=*EW}!^mG|py!#I>> zYZ6)ABSP2-D-cj?`X+8>jWwRSpGT`$-fS&w2*x&qJavw#wN|;ETFe%Z z;d?D>p%9E(GSkCbHeIxB#WBwwPr(&dGz#*;Xj1irmABh{-(FrU*Jmlt=j)x_B4D&W zo=eQNK?pCdt2(CMo$a+sa%iEU$E?tv-Ei=qE2irRLD$WOOt;SC_`07}o2bJblxAFh9c|OzH%FzS5d5aV{_OMe<8!Q6Eyl%NSz5B1ADt$v zKt9isGQ%BhCSx-Za&)84Tw|0pIWM+|jS^M5X>Xg9inXXF@6&LlbC#{s2Y2QpD0i7n zf?-6$BRA&l%WpYuky45cod-6$LcVf-cw-637KpRG+$u(7Xi87V3SPWmo{`KDHhRJRwbGTS%Z|t9p@{zwSZNg&ZNBuk8MUBO%;GV`iArHnY9K`%#UmsCtzXSdHU=3 zYW*hWMO|F=z3}NcWEbzq=ZsnOPlowWk-gI1!Dzzx3=fHAj+<{BrYRqy=%K9gZru=E zk0G68`tspqvRnj!HDL|&z0opgBxe0d)k4$Y#iiOF;!t8$Z>G9W#LwNnW(iHlE?~H7 z%hbf+qcyAcbxZkCyKYQ;7BO>W-gclxKMsK=evL-V!j2X98fU??YhIM2%ZQMwAuD9J z98uDN2;7vKQ_T$r$L4Nydr};LK-Z9U(#G@RaWsD^$APkggo*eTHy=L6;bSzUXuq+3 zfo=k=i-X?C<@$;E8}D^lO5DQc_lUZ%>$(u9HCh>`HOsZgg-kAoWXe_L4R{pH7uY&&-cmwHvAmv93ntfO*yL`q6xSM$tZNL8+Ny{FWkTGn(q(eS z0fDFfy6Zvi%gGC5m=_O(=?v)ov)7X8-CcvmrcB>tH8rES-BU0rl8Ut5 zDiNCTBeUHvVATYJH&PxkY|owT%t2lpPtI+=sp5Q0B_UXvR1rD}caX01#nM1v05S_H ztZCOTwyDb^s}*!r0q9gwKg-VTiP0O%PDIBnl(r!V$QoOx0&LR_cVb<_;@!CC6heSX zDTM1v@K#0YTeFTN-DaP^5C%r?>*(ptZg0Wz{a?$x&`l0GpX_NdI)nAtyz<)?sX;pJ3%fzeX!}+J zsy73ymFM1;#5*E=(HxD=;|=8=K0K&b7%}i+(lBv&6O}8XLJ8vcS-8=bNr5G}$$Hr(BvU0Vn_LincHzZ#ZjRQQb=KJ~Q(WDbR z*_s(qqNsfej5g?z<*#GAaq3aOOR49i?E;sF!*M29*>HKx{bVqB<`-OQzFG}_yy|Qw zdfVqCz3+;9MB9b)xLGji5o;t}KnxN5R?V?m)~P3k>70S5{Lo{4C^2iKDlCJZS1!Lk zru%|lWk^*pIUHDH*CoTamb5m4NJ*OvaYMa@7b=gi{V^w+f!`m#G!h>LqIa)Egf#+9agu^ zqMM~BVKWd--_G&Trix8lI-xJwLw9Kr3GE1Cf1yj1gY)6=Y+1PBTIGgTkZT$*PL%T~ zZr%sBV(d32j?Fd`*QANUq3(0=%)=GgJtOVgP~JvS{_^ZMI?{qsqNaXwUCxYe0z+QX zTe>=GiXGLmL?LbA*5p)gox~$H+F1f|Ls3pBasrG*5ry6?QOcmSgw^I!sM@$(l5a^K zhF_{FNUVXZ*0_(r7wDGNG;KWr#vO6RpHP@Rx;x`+Fkha`C2tRgd@#TeG51q7CVjH9 zJxFGZg&Hn%K;|x_K3hL4^tz~`zu6e$22NQH2|1rn2XFm0y+I)b0Dd1Hp5BHBF$mby z!py<$j}#2b#%yS83(-m5AJuv zhP5Hs!W0O#XR=nC#d{=tNL?eTIxP&^Qvun-Ug<+>^a`q>1ulN1k%rz-r1>i z^-f?q9{m_!My>~-;fjoB&Iu>{N{tk)liaq_54tKtkx|G(L@mzNUK8;Qt)Q5`P_f@l z3wODAED$u$o0nFOYFMq>cg zh(2_9dg1MZoOQjWrqTl)@5R^A=B@|Et`6g!pBE|L86>eCDD5=xx!i1iXBxgCTq&t^ z6bA_pWtv@uZ&n_YYe<#c_5aEX^D%^b0D$-HqxJj83hlPdj3HqA|9N!gUn)RWIkB29 zUOb{3Ay(*LxF3+| zV#q;Vj@(;yJ;Eem_n8^tR!>I+!(&Jl1vx2?{#hWW8>{xbr@J*gAG_v^ky|~8W1#~6 z>uiA0sXWG?H9!WvtxL4N0%oMzVBT9U}))PorYgZgaMrD4!MOB<^RnGGDwSdD| zsLiM4KvfifStC@qLUx6A3+Y!NM8A-AvogW##sSSVNq;{=$H+-hPf29(6#fRcShHeD zbM@}%WR)(a56=P%NBs~NQ}3J`SD0%6S9lqGTnjo4|CHalf6uc-pVs6BWdvXK95i{}<}XIo z8>BTMy&X2YrDOQ@nlg6yf%f>gbI4>y3rBNS?41b z4(b^{7)FoY?~1SRWr{kTUKvdftI~^u*12CtX%m(i;~v( z-H$APBn?+K((hm2?bIK2HGd3y08>jcEHeCReUFgft7EdBCTxY0e#sSn*?cM7SQ7E;A_d#bk#PN}@0Qb^<^^k_PIHyR z6|y`x*R8=PcJ9RC8vqL_m>C+9c!UkHaB*}f^zOWURD4-z8KL_Lpbse>YVSP7aM#~> zW%;BN{3k{Gp zeyI?L00xfJhZ5a5c?uO;Ub1%O>zyRewry{74DLUeIP2-k?^l7Jybm$?@L(xPzw&L? zLSc{$B$m^x@({cKdobVJd1zdEQ4Pv8GMUy5hSUBhvyA&j=Li&4D$W2e88=T}N-;4_ zRR&-NO1yv1d)T2`1sP(^H0CUSEk^X0ukUp_H&-%->1lSmC%e9Rs0MP~B&#;Mn9+Wh zrX$x(pE7X!Iz*z^x!E$p1zt?V^T=K|sffV(_FO$}KQ{3k5d^UxTd>K7zqw+%LA;At zuTl68UG4(_ocRA1vF@Z%sJ#ma_(zs7uVo!J!HM;KK_A-})pH^#N;hrKt|m7h&yrgB zZ7lhcTvA~Wj#RbNMX3H_8L8U|%u_WT?&;$cv#wP$J${tfsTyjuP;u$Lf&!Mhx9xA# zX@|`uO zO+#M!P)u$guhnkM%l)S-UrCw})4!~q6+ra@?6&oY=F;i#D)nB=2Z48?Lrn$Rt@sVL zPI%o);qKscrHZ4dw7|Fc{K>w#aoH)hdZ7`BwfFB)P)H>oV=$hkKV=k>@S4zhb|~H4 zICGDWUO(SWqkLO+QWc3&JSIf6HJQV!+&3d2^j#8}z3UAB4Y7&v{&$Xo@D6F?*C>Fd zO;dVQ57M=7%E0P92Ew&!8ZS$1Q%UQ&vXP-jT`g@UQ2$B>dm?)30gXhVvG;^SnW~bN zUR0}R)!FlTuW6?&8F0xLo-hWT7Qwk!%;Y-67q=xh@IWzYOheTk0;zy@P8NJA`WSl^ zj>mN06qFyt0|tBU$@U$5;!St~lSscH+VHxljifXsTS@3A5J=K};2mla5{9aJ52);N zES|mh*fL*rOF`;E=ypeacNM1dupIwN1i=6Z_7soyaE?@AbiQz|uG1wxf`t4f`yh9J zh+K|p#yb{#Ugr=5ru)SgD(KsyJ{c@oG<}1TBe!UdjOMII~xST(n&AyzKdZ9QrZRfSP_<(UIF+5^UCQ+|B z*S$dRbcz@!7Q4+U)=|;C%U2PX;VERL6bUsUH&QbPgf`M|E)wnI%1#_IeaboJjm0v* z5MmwOp|sF-fGaih7}xc=`igKHHoVh1y4g@7IEkbhuV8+7p)4gJ;6&u(*u#qVrs3^2 zoVB8$#Bn5Y?4hxhEZ-&n*$3wv>WtO(9SrOANwVOmyx{ixmyd^yFS9Q!L6&<=$9#$1 zv71nLPEh?8Jrn?>dC11cYmRHccv4ek_3uP*n6v1B475t8HDW4}xQ2Newj43>zf z9oJ9_OEF2cFl?KBpphw};md9?MT#BD(jj&jvmI|fR3J(fd32%&=H$K@a$C6$?U7in zezJ4_Q+A)ca-rL~KyusGebxus20ic;OA1lk!E3>$IN>;hr)VKVekkHmaHeV}htQWw zKKO2NU1`-7%+zLQ8hiEuq?$osA9L4TA(W5gYRw7;lKo{8>9m#7$KE42(u$B|TW60a zFQZWA3*fHo222VhLKHcLzsziD#61+C8IA!r9G#xBNAuh;#i3{RzEIg|EAvrp-LLUM zJ|N%!PD92U;OY%L*?Ofrk<4u+z{&c$8#5o8GvyHPLz$MYb`|{y11p*zUVw(e0UFv% zrs$O7%4@~5iPfz4;ncSp;#KN(fk#mJFbGaOcCXM3#)O%jq*D^o-KSt_9?@t{Z-v6yu9WngmJ)v@GZXN3m=G%cPl-j?bOr9kl3O zUPu;&9yiPnSA+!4xq8aZ%1Qe3!b=OE9EcSje693KpWu)bIb$>sRO?Io!pR5^JK>Uv zCK;A~j?eHMhWmU5kys=kyXMKTce&O?%!cI5`La}f?wN~|Mf2Iz;G&(ORqI z)oeAy(m=$rD;X%d7&4pAOvUGHVjD>J(QkUQ8=iuf&ml%$;lj}$B}5*>iUrP9*<_(L zZDMsz>s^Nm+!-pX0Sn9;^n@@~+uXIBG0(T)D-ywKgK<#{9F2txz_H2yVx{bL>xXm5Cz zpN2%qg(b4!`d)m&>)9`6tA0XdfNeUZA*E))HnvAm3Q^TIlInUe?xBBT)5ebv^bKoq z8~)t6=2Wst<9u)YqxL=)8ZD|Y3Zs?7Y%tDTPPvI; z)38_6-vbe6y_z!3T#QO$3#y7GtgNtu>lq6WOGlE+!3O3J5uS%km1~VpLa;s-;6~Ja zM!~IBpT!m0>HE;mZ5`?J+cy=SOhy#>E?bMP+?e9MPItF+ zQ%FH1D@~0?5=+7eDg}}Cw`H&+j}~lZcV|#v{TuvUgF|9SVjBL}#0u~BHAO)}U4&6a zQIh#T3F>X@7peDKwzFV0Zqf%Acd90;vD(@ppQ4RnDz8^jhHHv8p!P1lz*tiWl>2mE z9X2|^f~IX9x}bq1Z317tA|V)rI^r|5G7@+(|LU5KbGNh4s!{-pD>j4fFud5B-mZ1$Pz86x1Up3#;M$S!( znK=yRA{ULY@>)G7h~qVQI_$UJq$I=Yd0>wDL3FUJe9r8~bCy^2H7&5iur2V+6~J*| zmWRa>I~UI2qR7Hdq7|~M)iL*`iPVnQUra7L64^`;F1=a(~TQ5u8 zz7Q(Q-Gjpg{CZ{Rd!$^v|F@X*8*S?c>5p#}ev$wHcM{4^Tlxj*UzM%DyYwgS&+ij{Za>2K z?~D7Xbp7A5DE@-wr}FiCmcQSV&o5a1s)YTX=S~azX^p?&`MWaqd!{>W?5Caog6XGH z_Is8)t?Z{UVE(s{|4TXhJ=x!#Wbq5Kzba{e^8Dz}f0c>;-k;NNdHxf)Zk4qkUGE3- zuj$&IjP%o%u>K}0{S)}t5%Vsa`)QH4QS)bR_fOnk1MeU5?tg&Nx6OO|jm{sV>pxL{ i4J3C#`KR%a{U$^!%OTtj=l}rr?a$^m{-plVTK@+>#mIsH literal 0 HcmV?d00001 diff --git a/tests/resources/xlsx/sheet_with_empty_row.xlsx b/tests/resources/xlsx/sheet_with_empty_row.xlsx deleted file mode 100644 index b9330b0bee92e0a755f58d9c15f311648a6615f1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3689 zcmai12{e>@8z00FjU^$>NU|1VnJlU7OAI5FrHIjFpRs2zt|eQxxpK1>v<`9;BCIqCsajGlZZ5 zfg<*SKnH=Rm(*}r4-D4him{Ii#@#}~+u5lUucAwoWdAT7+?O#Z<}Ay6v_P#Jq+9#oQwBxKD?1V{NG zp}}3!W!2#9%#(5A?3I44DuRPDs=5W**W-HvE7)GMiG{Q|95OS)rY=?1E|lOWZ6D_c zNq8%N?DOvOiuETI1%3~-a2dSvodJ13`B*2s`Kl!3+r(!FNy|50yrLpUM4In>FZQ2j zeyQLec2H;5u~Bq_&di|uh4Le>QQh$GJfT)Fff;`J*Ol$x-Z&Rwg3iWW9c~oL8_J?e z1QYf}b5wHK#Y$QZxJVn4g|VW`N;Q*EgI^?rK+2a)H2_|>gbw(3#SP=+4&T(-2*hhV%*Y^Sl4`-pr>0Q$r3glyvAP}kTR~K{@FsEbzZ38iw`o}*^X3x%wcr_ za&$b>nATl)rk->*1d-B-8jqJR<}#PhaWG&aJLkDSJj~(?pQ7~jFj1ZX5ik%>LfaM}yE$V3__&I*F>u4V z?4*tTd|O{B@cI;x!z|m?tX*8zlQ%7{Jd#?9MSYs_huTLAnQtoU(gMNWmM(36+P(9n6sQW8uy;d)j?>W5 zH?L5S3%hy_tf|s0X_fho;>^4gOsYE5J7NNm4JX@5-rp;(@`>)vKhZeFf7Fz;lu$ox z@lP?$?->GWIzP$*xH$#r`8A^^FL0gy-DJ}T zCe)v9n~G3feuaZSb+qv4w(O1Ni(P=rfqMk~TrB6ZRS^OrR06(Fke4#ruch>6R)Dd` zrhRD+oeI`UA(M01lOlS{3n#>j?!Ogoyn6cJOVwhQ6U2@vsNJ_h?c|P?(X?UyB*Oco zC{!`NsPD_oUu$MtjsJ2rp*H9?U_<%%-edruzKP5XTaA{x-W$0b zel2P|wIKzcfPi!RewaB_(=KWzqpGu_C0%So4Via95^lHn943{oQRlp&+9YvzTG0R` zdOQ7}yaN>WDpxTUFUvLnuLG1T_r^IJYS(dYj%XataXlcvs<+O8D9LnlI<)kU#sJp> zwZno~IHqBu?aWPAygVX5W^i?ODp!gur`e_EDU~pZ;?tb_;>X(}%(fuK^*EA^osKi* ztOs3hJ3RJNUa}H$+=SODNpW(yLI*}G+wZO~^R&~4SDaJII!>R*{<)dqxJLojMzIBj zQE2t324#pv$P}2Lq_b-+8JfDuTU6b7LvXqGN;1dX2`GlzR%X|na4C*Ukfs958 z^|D1WSA}<*j52#qdzzOKOnQvQjyHv!L7yGx)lquqW(dXS^JsYX@E zkLn_Tg(e_1*Jlern!BB~8|JE!hnqdt)_t9ffcoY9I-SxBjt?JW%P-vxg-KQmGWELghUaDmg1QVC-S8pLC|RFBJ=zpsm(V!J5lbT3cs-OH#7I72SmOH{`| z7g|7eD?K*aw5d+chyvi{5D@nRzdZHual-s`(bG^5M?At*iu!y*x!a@wmTj+NKF4N1 z;@Lz%NHMT+?ZdWCw#OpIObXm)NHT%UU;P7~he401Pv!MLi;9+cTiwc?5_|(!bLV>f z(Q|)4bfu@AJ~A^mz9bska(K~!>efRJF2-KAC1!qfYw(}01E;Ofc*cu{!5OTL;5g4o zxS2qb*}Tx-Egy}PlJp6@hFAaW5)g!s%!d9`{^;@w25fJLseF%Dv$wXAQ_+=y8#FmI z9}uijJjE%8rn$u#{yA9-a`5D#?+NtONfBt9k|}>~foyAijb{sB7L!NbBdTdXMs73) zrB-trApBe!^b|iOkP5r&Srrs@+s$Pm4`06u80lxynmEKodH^$35)1RYWKRB@F};m& zkEDQn#z)NPqrdIIl(mq3+@f!myzrW9jy%WSzIu6et423CX1<0eegL>V3GmFk!SVL0 ziiuUxCn_^(ENFLI51vWOVhqX5>M_=~c*Se=QNHf+Va0;*m))>Ch8ah{`OYmiq?_HIuGO*yH9;y3hb#T2 zv}#p2PYZNntg14S@qe_NNvcCxb<Gb;11 z8-_F%*E%D`8Xe^Y@$*WDvz`4SUc^uDf1wm=${WH=DkktjLk6@LO`E}AyicI>hw|!b zEU0E5DWc9Z!pj)l8}HIE7!A5yOy!v|?HyL34Y&H}PQRS@q?#*DOmj%%htx2MeT|(| z!+zz1fBcv0Qa5-=(R7TQS?o8oMKc?Uv+&7R1Rqwc1Q_bnK1(!2|KJx7Z6p;FN2k>u zmw^m)sel6h?`tr?>NeS9DW)8-JAUgc9z`%fq&6qW??!j7d=yN?7Ut&&`CrD?cO#1Y z1$eb>Q~KTB&RvNDd%GR>YjdI)-Kff)>jwp$3*_OJ(amLq0^d#0`rxI=zrd%*&oR7t z+ueZQon)o`3D}#|or9TTM}9kOdqdg5@!lhia*iet-@OJL#pAt41?2=u@Wu&SgJUOj zX9uJ}@$^5T+pkXn?mM+n{{VcNQz-xd diff --git a/tests/resources/xlsx/sheet_with_empty_rows_and_missing_row_index.xlsx b/tests/resources/xlsx/sheet_with_empty_rows_and_missing_row_index.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2f5cf154495c5d431a0caf947d7ce5eedb2f2e76 GIT binary patch literal 3685 zcmai13pmql8=ooTrIF?sQ{<4!940G?A!K7-6jtQ4u$a@FNmF=D3Xd&rchCN zS4s{chmvFEG^dw^4Eg?}A^N`WxvpK?wg3Hlp6CAEhu{4-!*FhZfj}T`kV%@=AuiX6 zUL*$y6te*Y+5-H0OwW@Co4INT9*Rb-NZ8W*;S_Fg#jMZq=c79&yBD^|?$#oI3IZ zan(=f@HH;|F1B42@^^nfJW^+6Ye9_%Ka9Ul?zot2@ZLv2K5yXXEAI~BGmyuUL3 zG<1r;cwcDL7UOTH8g>qG*_gFI(z)e7fQcU88fg!gnb?kcT-iGM+@q90Ihf$w*C6+x zH;3aAq+r7Z;Yv~0cxAgDFI97t1A1yd~>w%3F9I1hHNW{XA zzcr51B{?X_X1Hb4>cD-4wrqSnC>^q!+G{6|l$pwjA7bh=T)4Q8RJFAG?*Au&X8PtB#%!TI?c{s=xiEUh|!~b*a0xMMp`h2peS`;mDj!!9M8HnU8x_>OH1Ot9tSWrtgGBN*&{F31aZqPSS0gFA zIJvmr&w=Uk-hMp`x|}nH&w>X$Z32_5sy=3ZhzrBktGATBy7sgx=t5`V?uL==($$l%@9!Ip98{xnLdvQ?}%GyUuv2_-TX&q(e5u%Wg7P7b&B_ArpKDdn~K{M*|KY}Rfjhzkln!vi^R2AdrQg5$bc%>AxY!gV#;8~T=RN0Olej*u zH~|s^-TzPC0S3P-S49DTyA}X%43w+*(mqRO0iHgm96dcxu>$h5dTWU^Wxg|Gk>zI_ z!n~<^VzMLzp?;`E>x_3Y3RxKUb^hB(z6w*r;FF%8O42Y+%3$(yh(wdT;IxY9?el^# zE|Ii@WUkIuMEtu4soGe&m4thW=J4!GWBA6s-M*%3_dW(m>=99MNHE>%_Rz+hPNtHa zG@EdQea+;zpe)50H7Z`6%eA?zcjOEovD_rEI`uOJPjTl9vRtsbvt@XimPNDa;16I}Jhzom>cAfLHeR*CrmdGF#u zf!iu0JA^aQBQPaR=yOKL+xYOSJ>3^j2Q80;Bk}1CL5z2HynA7H!xf!{-4(36gjIR> z&OK_kZr50Gl_i+g#(Dkw05^Mqz;F8LDmloVu;`-c@80z0-Z-{xVv;_yGrmdeJCWnBo5G^Joq{v`+Z~%DE_?SJuy;)6J!&42wYdS3 z;5Ur0kx8+clDpgV#zH&Av_Qh#`SK?jSw#Q0$R`!Ij?WPwZsvr_SIK&A4)z*oj2gnM zk@$X-Y_;Zo5e-MqzeJ+jQVpS7)P=^AxWOqgj!tDGq2e;R4yK3hPeTN(ZUy%1X8h<6 zZ=j^V6*ohMc-5O|zDr^hwbyOlr-^CB?}grcToTsb&1X0yB+9q}x7JpS3OR;n-sR0~ zDY(YihrKUF%j)93NaV|z&fR6(^^H0GSagy(32U7{KEGU~OIpZ(GMV-o;8q>rnSY7n z)nyeIk2a<0@Ew{qYIpdmm65}HE<2~g(#W<%!u}1a_O_TNHM+PRe#QL5=eT)~!I=gB zu&PU%S+0)s>vopi3Bea*pps=HC$8DOaN~i|UY?T-&Hg8N@_T)z&Bd`ALl;mZw9>q< zGGxTC1}$b*pkgPi z=)Ticp_;TJ;`4=pc;+Y6A0Hiu3w%8q3c|MT*@tpV+C+_0|E~FJ!4V&hD`SG$d9}VV zL05eCbtMqUt3@y98s>1@WsXsn5*z?JnCLp&MTiQkSVOaVcN8Z!xn_>4;0KJs(B zUfuqA%K&G>6j^pxJ;GNF|GQ|9GQ8G@e6y9(t;ksmW=WJnile(5-kMK!-$^99H#@`=23ZWas zW@qr)cZ*Bj(PcLh#Um*7EzZG4RK4gKfM%H&PQ&3v5*25_G z3F0x({79X!X3r~}gA(3%M}>5B?B{k@(_Ry}LB)3L8(;3}2Y2d3Gl&Lh`E!^WgvH$6 zK1Qkw(%LRj=|`eccKz!da*ej^O{Zt2Cd?-!u~(N5ccg}!(ktf_?^O@uT*X z!*GCMod10U1_)hT#T3@pZwBI(4c3Q_HQuq;0-68>iLD;L9=dkiV`GXbG0T`m`S@Rl zvF0N7T0s8|bfi^N(y!;Oos-zGV#g%t7*AvI;BJ8z*jsbWCZq5QP_Y^ki+@Rle g6*jh*@@O4)rN@|IHZJBo6a)pC0>R~Ay}W?_149UP?f?J) literal 0 HcmV?d00001