Fix CSV reader when last line is empty

If the last line was empty, it would create an infinite loop...
This commit is contained in:
Adrien Loison 2015-07-28 23:13:15 -07:00
parent e3f7ecfa64
commit 8a3b895afc
4 changed files with 67 additions and 12 deletions

View File

@ -167,6 +167,18 @@ class GlobalFunctionsHelper
return file_get_contents($filePath);
}
/**
* Wrapper around global function feof()
* @see feof()
*
* @param resource
* @return bool
*/
public function feof($handle)
{
return feof($handle);
}
/**
* Wrapper around global function is_readable()
* @see is_readable()

View File

@ -40,6 +40,9 @@ class RowIterator implements IteratorInterface
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
protected $encodingHelper;
/** @var string End of line delimiter, encoded using the same encoding as the CSV */
protected $encodedEOLDelimiter;
/**
* @param resource $filePointer Pointer to the CSV file to read
* @param string $fieldDelimiter Character that delimits fields
@ -108,18 +111,25 @@ class RowIterator implements IteratorInterface
*/
public function next()
{
$lineData = null;
$this->hasReachedEndOfFile = feof($this->filePointer);
$lineData = false;
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
if (!$this->hasReachedEndOfFile) {
do {
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
} while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData)));
if ($utf8EncodedLineData !== false) {
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
}
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
if ($lineData !== false && $lineData !== null) {
if ($lineData !== false) {
$this->rowDataBuffer = $lineData;
$this->numReadRows++;
} else {
// If we reach this point, it means end of file was reached.
// This happens when the last lines are empty lines.
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
}
}
}
@ -128,28 +138,45 @@ class RowIterator implements IteratorInterface
* Returns the next line, converted if necessary to UTF-8.
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
*
* @return string The next line for the current file pointer, encoded in UTF-8
* @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*/
protected function getNextUTF8EncodedLine()
{
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
$encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
$encodedEOLDelimiter = $this->getEncodedEOLDelimiter();
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter);
// Once the line has been read, it can be converted to UTF-8
$utf8EncodedLineData = $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding);
// If the line could have been read, it can be converted to UTF-8
$utf8EncodedLineData = ($encodedLineData !== false) ?
$this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) :
false;
return $utf8EncodedLineData;
}
/**
* Returns the end of line delimiter, encoded using the same encoding as the CSV.
* The return value is cached.
*
* @return string
*/
protected function getEncodedEOLDelimiter()
{
if (!isset($this->encodedEOLDelimiter)) {
$this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
}
return $this->encodedEOLDelimiter;
}
/**
* @param array $lineData Array containing the cells value for the line
* @return bool Whether the given line is empty
*/
protected function isEmptyLine($lineData)
{
return (count($lineData) === 1 && $lineData[0] === null);
return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
}
/**

View File

@ -115,11 +115,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
}
/**
* @return array
*/
public function dataProviderForTestReadShouldSkipEmptyLines()
{
return [
['csv_with_empty_line.csv'],
['csv_with_empty_last_line.csv'],
];
}
/**
* @dataProvider dataProviderForTestReadShouldSkipEmptyLines
*
* @param string $fileName
* @return void
*/
public function testReadShouldSkipEmptyLines()
public function testReadShouldSkipEmptyLines($fileName)
{
$allRows = $this->getAllRowsForFile('csv_with_empty_line.csv');
$allRows = $this->getAllRowsForFile($fileName);
$expectedRows = [
['csv--11', 'csv--12', 'csv--13'],

View File

@ -0,0 +1,2 @@
csv--11,csv--12,csv--13
csv--31,csv--32,csv--33
1 csv--11 csv--12 csv--13
2 csv--31 csv--32 csv--33