diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index ab887ef..688e4cd 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -119,8 +119,9 @@ class Reader extends AbstractReader $this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, - $this->encoding, $this->endOfLineCharacter, + $this->encoding, + $this->shouldPreserveEmptyRows, $this->globalFunctionsHelper ); } diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 1ecbaf1..b805126 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -52,21 +52,26 @@ class RowIterator implements IteratorInterface /** @var string End of line delimiter, given by the user as input. */ protected $inputEOLDelimiter; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields - * @param string $encoding Encoding of the CSV file to be read * @param string $endOfLineDelimiter End of line delimiter + * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $endOfLineDelimiter, $encoding, $shouldPreserveEmptyRows, $globalFunctionsHelper) { $this->filePointer = $filePointer; $this->fieldDelimiter = $fieldDelimiter; $this->fieldEnclosure = $fieldEnclosure; $this->encoding = $encoding; $this->inputEOLDelimiter = $endOfLineDelimiter; + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->encodingHelper = new EncodingHelper($globalFunctionsHelper); @@ -114,7 +119,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows are skipped. + * Move forward to next element. Reads data for the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -124,25 +129,48 @@ class RowIterator implements IteratorInterface { $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - if ($this->hasReachedEndOfFile) { - return; + if (!$this->hasReachedEndOfFile) { + $this->readDataForNextRow(); } + } + /** + * @return void + * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 + */ + protected function readDataForNextRow() + { do { $rowData = $this->getNextUTF8EncodedRow(); - $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData)); + } while ($this->shouldReadNextRow($rowData)); if ($rowData !== false) { - $this->rowDataBuffer = $rowData; + // str_replace will replace NULL values by empty strings + $this->rowDataBuffer = str_replace(null, null, $rowData); $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. // This happens when the last lines are empty lines. - $this->hasReachedEndOfFile = $hasNowReachedEndOfFile; + $this->hasReachedEndOfFile = true; } } + /** + * @param array|bool $currentRowData + * @return bool Whether the data for the current row can be returned or if we need to keep reading + */ + protected function shouldReadNextRow($currentRowData) + { + $hasSuccessfullyFetchedRowData = ($currentRowData !== false); + $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); + $isEmptyLine = $this->isEmptyLine($currentRowData); + + return ( + (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) || + (!$this->shouldPreserveEmptyRows && $isEmptyLine) + ); + } + /** * Returns the next row, converted if necessary to UTF-8. * As fgetcsv() does not manage correctly encoding for non UTF-8 data, @@ -154,7 +182,7 @@ class RowIterator implements IteratorInterface protected function getNextUTF8EncodedRow() { $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure); - if (false === $encodedRowData) { + if ($encodedRowData === false) { return false; } @@ -195,7 +223,7 @@ class RowIterator implements IteratorInterface } /** - * @param array $lineData Array containing the cells value for the line + * @param array|bool $lineData Array containing the cells value for the line * @return bool Whether the given line is empty */ protected function isEmptyLine($lineData) diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php index b9c66c7..98dcc7c 100644 --- a/src/Spout/Reader/CSV/Sheet.php +++ b/src/Spout/Reader/CSV/Sheet.php @@ -18,12 +18,21 @@ class Sheet implements SheetInterface * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter Character defining the end of a line * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper) { - $this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->rowIterator = new RowIterator( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper + ); } /** diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php index 0dfc16f..2003599 100644 --- a/src/Spout/Reader/CSV/SheetIterator.php +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -22,12 +22,21 @@ class SheetIterator implements IteratorInterface * @param resource $filePointer * @param string $fieldDelimiter Character that delimits fields * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter Character defining the end of a line * @param string $encoding Encoding of the CSV file to be read + * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper) { - $this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->sheet = new Sheet( + $filePointer, $fieldDelimiter, $fieldEnclosure, + $endOfLineCharacter, $encoding, $shouldPreserveEmptyRows, + $globalFunctionsHelper + ); } /** diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index f806fd2..429ffa6 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -115,29 +115,40 @@ class ReaderTest extends \PHPUnit_Framework_TestCase } /** - * @return array + * @return void */ - public function dataProviderForTestReadShouldSkipEmptyLines() + public function testReadShouldSkipEmptyLinesIfShouldPreserveEmptyRowsNotSet() { - return [ - ['csv_with_empty_line.csv'], - ['csv_with_empty_last_line.csv'], + $allRows = $this->getAllRowsForFile('csv_with_multiple_empty_lines.csv'); + + $expectedRows = [ + // skipped row here + ['csv--21', 'csv--22', 'csv--23'], + // skipped row here + ['csv--41', 'csv--42', 'csv--43'], + // skipped row here + // last row empty ]; + $this->assertEquals($expectedRows, $allRows); } /** - * @dataProvider dataProviderForTestReadShouldSkipEmptyLines - * - * @param string $fileName * @return void */ - public function testReadShouldSkipEmptyLines($fileName) + public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet() { - $allRows = $this->getAllRowsForFile($fileName); + $allRows = $this->getAllRowsForFile( + 'csv_with_multiple_empty_lines.csv', + ',', '"', "\n", EncodingHelper::ENCODING_UTF8, + $shouldPreserveEmptyRows = true + ); $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--31', 'csv--32', 'csv--33'], + [''], + ['csv--21', 'csv--22', 'csv--23'], + [''], + ['csv--41', 'csv--42', 'csv--43'], + [''], ]; $this->assertEquals($expectedRows, $allRows); } @@ -204,6 +215,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals('This is, a comma', $allRows[0][0]); } + /** + * @return void + */ + public function testReadCustomEOLs() + { + $allRows = $this->getAllRowsForFile('csv_with_CR_EOL.csv', ',', '"', "\r"); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ @@ -236,7 +262,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSkipBom($fileName, $fileEncoding) { - $allRows = $this->getAllRowsForFile($fileName, ',', '"', $fileEncoding); + $allRows = $this->getAllRowsForFile($fileName, ',', '"', "\n", $fileEncoding); $expectedRows = [ ['csv--11', 'csv--12', 'csv--13'], @@ -275,6 +301,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper|\PHPUnit_Framework_MockObject_MockObject $helperStub */ $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') ->setMethods(['function_exists']) ->getMock(); @@ -405,14 +432,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param string|void $fieldDelimiter * @param string|void $fieldEnclosure + * @param string|void $endOfLineCharacter * @param string|void $encoding + * @param bool|void $shouldPreserveEmptyRows * @return array All the read rows the given file */ private function getAllRowsForFile( $fileName, $fieldDelimiter = ',', $fieldEnclosure = '"', - $encoding = EncodingHelper::ENCODING_UTF8) + $endOfLineCharacter = "\n", + $encoding = EncodingHelper::ENCODING_UTF8, + $shouldPreserveEmptyRows = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); @@ -422,7 +453,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $reader ->setFieldDelimiter($fieldDelimiter) ->setFieldEnclosure($fieldEnclosure) + ->setEndOfLineCharacter($endOfLineCharacter) ->setEncoding($encoding) + ->setShouldPreserveEmptyRows($shouldPreserveEmptyRows) ->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { @@ -436,51 +469,6 @@ class ReaderTest extends \PHPUnit_Framework_TestCase return $allRows; } - /** - * @return array - */ - public function dataProviderForTestReadCustomEOL() - { - return [ - ['csv_with_CR_EOL.csv', "\r"], - ['csv_standard.csv', "\n"], - ]; - } - - /** - * @dataProvider dataProviderForTestReadCustomEOL - * - * @param string $fileName - * @param string $customEOL - * @return void - */ - public function testReadCustomEOLs($fileName, $customEOL) - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - /** @var \Box\Spout\Reader\CSV\Reader $reader */ - $reader = ReaderFactory::create(Type::CSV); - $reader - ->setEndOfLineCharacter($customEOL) - ->open($resourcePath); - - foreach ($reader->getSheetIterator() as $sheet) { - foreach ($sheet->getRowIterator() as $row) { - $allRows[] = $row; - } - } - - $reader->close(); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - /** * @return void */ diff --git a/tests/resources/csv/csv_with_empty_last_line.csv b/tests/resources/csv/csv_with_empty_last_line.csv deleted file mode 100644 index 8892982..0000000 --- a/tests/resources/csv/csv_with_empty_last_line.csv +++ /dev/null @@ -1,2 +0,0 @@ -csv--11,csv--12,csv--13 -csv--31,csv--32,csv--33 diff --git a/tests/resources/csv/csv_with_empty_line.csv b/tests/resources/csv/csv_with_empty_line.csv deleted file mode 100644 index 8da735f..0000000 --- a/tests/resources/csv/csv_with_empty_line.csv +++ /dev/null @@ -1,3 +0,0 @@ -csv--11,csv--12,csv--13 - -csv--31,csv--32,csv--33 \ No newline at end of file diff --git a/tests/resources/csv/csv_with_multiple_empty_lines.csv b/tests/resources/csv/csv_with_multiple_empty_lines.csv new file mode 100644 index 0000000..c25f253 --- /dev/null +++ b/tests/resources/csv/csv_with_multiple_empty_lines.csv @@ -0,0 +1,5 @@ + +csv--21,csv--22,csv--23 + +csv--41,csv--42,csv--43 +