Fix CSV reader when last line is empty
If the last line was empty, it would create an infinite loop...
This commit is contained in:
parent
e3f7ecfa64
commit
8a3b895afc
@ -167,6 +167,18 @@ class GlobalFunctionsHelper
|
||||
return file_get_contents($filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function feof()
|
||||
* @see feof()
|
||||
*
|
||||
* @param resource
|
||||
* @return bool
|
||||
*/
|
||||
public function feof($handle)
|
||||
{
|
||||
return feof($handle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function is_readable()
|
||||
* @see is_readable()
|
||||
|
@ -40,6 +40,9 @@ class RowIterator implements IteratorInterface
|
||||
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
|
||||
protected $encodingHelper;
|
||||
|
||||
/** @var string End of line delimiter, encoded using the same encoding as the CSV */
|
||||
protected $encodedEOLDelimiter;
|
||||
|
||||
/**
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
@ -108,18 +111,25 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
$lineData = null;
|
||||
$this->hasReachedEndOfFile = feof($this->filePointer);
|
||||
$lineData = false;
|
||||
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
|
||||
if (!$this->hasReachedEndOfFile) {
|
||||
do {
|
||||
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
|
||||
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
} while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData)));
|
||||
if ($utf8EncodedLineData !== false) {
|
||||
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
}
|
||||
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
|
||||
|
||||
if ($lineData !== false && $lineData !== null) {
|
||||
if ($lineData !== false) {
|
||||
$this->rowDataBuffer = $lineData;
|
||||
$this->numReadRows++;
|
||||
} else {
|
||||
// If we reach this point, it means end of file was reached.
|
||||
// This happens when the last lines are empty lines.
|
||||
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -128,28 +138,45 @@ class RowIterator implements IteratorInterface
|
||||
* Returns the next line, converted if necessary to UTF-8.
|
||||
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
|
||||
*
|
||||
* @return string The next line for the current file pointer, encoded in UTF-8
|
||||
* @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||
*/
|
||||
protected function getNextUTF8EncodedLine()
|
||||
{
|
||||
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
|
||||
$encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
|
||||
$encodedEOLDelimiter = $this->getEncodedEOLDelimiter();
|
||||
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter);
|
||||
|
||||
// Once the line has been read, it can be converted to UTF-8
|
||||
$utf8EncodedLineData = $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding);
|
||||
// If the line could have been read, it can be converted to UTF-8
|
||||
$utf8EncodedLineData = ($encodedLineData !== false) ?
|
||||
$this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) :
|
||||
false;
|
||||
|
||||
return $utf8EncodedLineData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the end of line delimiter, encoded using the same encoding as the CSV.
|
||||
* The return value is cached.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function getEncodedEOLDelimiter()
|
||||
{
|
||||
if (!isset($this->encodedEOLDelimiter)) {
|
||||
$this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
|
||||
}
|
||||
|
||||
return $this->encodedEOLDelimiter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $lineData Array containing the cells value for the line
|
||||
* @return bool Whether the given line is empty
|
||||
*/
|
||||
protected function isEmptyLine($lineData)
|
||||
{
|
||||
return (count($lineData) === 1 && $lineData[0] === null);
|
||||
return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -115,11 +115,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadShouldSkipEmptyLines()
|
||||
{
|
||||
return [
|
||||
['csv_with_empty_line.csv'],
|
||||
['csv_with_empty_last_line.csv'],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldSkipEmptyLines
|
||||
*
|
||||
* @param string $fileName
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyLines()
|
||||
public function testReadShouldSkipEmptyLines($fileName)
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_empty_line.csv');
|
||||
$allRows = $this->getAllRowsForFile($fileName);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
|
2
tests/resources/csv/csv_with_empty_last_line.csv
Normal file
2
tests/resources/csv/csv_with_empty_last_line.csv
Normal file
@ -0,0 +1,2 @@
|
||||
csv--11,csv--12,csv--13
|
||||
csv--31,csv--32,csv--33
|
|
Loading…
x
Reference in New Issue
Block a user