From 8a3b895afc01f6a3a61a386a5b16bac70d462a19 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Tue, 28 Jul 2015 23:13:15 -0700 Subject: [PATCH] Fix CSV reader when last line is empty If the last line was empty, it would create an infinite loop... --- .../Common/Helper/GlobalFunctionsHelper.php | 12 +++++ src/Spout/Reader/CSV/RowIterator.php | 47 +++++++++++++++---- tests/Spout/Reader/CSV/ReaderTest.php | 18 ++++++- .../csv/csv_with_empty_last_line.csv | 2 + 4 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 tests/resources/csv/csv_with_empty_last_line.csv diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index 7cd8de5..6b5d7e5 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -167,6 +167,18 @@ class GlobalFunctionsHelper return file_get_contents($filePath); } + /** + * Wrapper around global function feof() + * @see feof() + * + * @param resource + * @return bool + */ + public function feof($handle) + { + return feof($handle); + } + /** * Wrapper around global function is_readable() * @see is_readable() diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 3de1da7..0c9fc07 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -40,6 +40,9 @@ class RowIterator implements IteratorInterface /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */ protected $encodingHelper; + /** @var string End of line delimiter, encoded using the same encoding as the CSV */ + protected $encodedEOLDelimiter; + /** * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields @@ -108,18 +111,25 @@ class RowIterator implements IteratorInterface */ public function next() { - $lineData = null; - $this->hasReachedEndOfFile = feof($this->filePointer); + $lineData = false; + $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); if (!$this->hasReachedEndOfFile) { do { $utf8EncodedLineData = $this->getNextUTF8EncodedLine(); - $lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure); - } while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData))); + if ($utf8EncodedLineData !== false) { + $lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure); + } + $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); + } while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData)); - if ($lineData !== false && $lineData !== null) { + if ($lineData !== false) { $this->rowDataBuffer = $lineData; $this->numReadRows++; + } else { + // If we reach this point, it means end of file was reached. + // This happens when the last lines are empty lines. + $this->hasReachedEndOfFile = $hasNowReachedEndOfFile; } } } @@ -128,28 +138,45 @@ class RowIterator implements IteratorInterface * Returns the next line, converted if necessary to UTF-8. * Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually. * - * @return string The next line for the current file pointer, encoded in UTF-8 + * @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 */ protected function getNextUTF8EncodedLine() { // Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding. - $encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding); + $encodedEOLDelimiter = $this->getEncodedEOLDelimiter(); $encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter); - // Once the line has been read, it can be converted to UTF-8 - $utf8EncodedLineData = $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding); + // If the line could have been read, it can be converted to UTF-8 + $utf8EncodedLineData = ($encodedLineData !== false) ? + $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) : + false; return $utf8EncodedLineData; } + /** + * Returns the end of line delimiter, encoded using the same encoding as the CSV. + * The return value is cached. + * + * @return string + */ + protected function getEncodedEOLDelimiter() + { + if (!isset($this->encodedEOLDelimiter)) { + $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding); + } + + return $this->encodedEOLDelimiter; + } + /** * @param array $lineData Array containing the cells value for the line * @return bool Whether the given line is empty */ protected function isEmptyLine($lineData) { - return (count($lineData) === 1 && $lineData[0] === null); + return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null); } /** diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index 180441e..b868b1e 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -115,11 +115,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase } /** + * @return array + */ + public function dataProviderForTestReadShouldSkipEmptyLines() + { + return [ + ['csv_with_empty_line.csv'], + ['csv_with_empty_last_line.csv'], + ]; + } + + /** + * @dataProvider dataProviderForTestReadShouldSkipEmptyLines + * + * @param string $fileName * @return void */ - public function testReadShouldSkipEmptyLines() + public function testReadShouldSkipEmptyLines($fileName) { - $allRows = $this->getAllRowsForFile('csv_with_empty_line.csv'); + $allRows = $this->getAllRowsForFile($fileName); $expectedRows = [ ['csv--11', 'csv--12', 'csv--13'], diff --git a/tests/resources/csv/csv_with_empty_last_line.csv b/tests/resources/csv/csv_with_empty_last_line.csv new file mode 100644 index 0000000..8892982 --- /dev/null +++ b/tests/resources/csv/csv_with_empty_last_line.csv @@ -0,0 +1,2 @@ +csv--11,csv--12,csv--13 +csv--31,csv--32,csv--33