Fix CSV reader when last line is empty
If the last line was empty, it would create an infinite loop...
This commit is contained in:
parent
e3f7ecfa64
commit
8a3b895afc
@ -167,6 +167,18 @@ class GlobalFunctionsHelper
|
|||||||
return file_get_contents($filePath);
|
return file_get_contents($filePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper around global function feof()
|
||||||
|
* @see feof()
|
||||||
|
*
|
||||||
|
* @param resource
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function feof($handle)
|
||||||
|
{
|
||||||
|
return feof($handle);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapper around global function is_readable()
|
* Wrapper around global function is_readable()
|
||||||
* @see is_readable()
|
* @see is_readable()
|
||||||
|
@ -40,6 +40,9 @@ class RowIterator implements IteratorInterface
|
|||||||
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
|
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
|
||||||
protected $encodingHelper;
|
protected $encodingHelper;
|
||||||
|
|
||||||
|
/** @var string End of line delimiter, encoded using the same encoding as the CSV */
|
||||||
|
protected $encodedEOLDelimiter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param resource $filePointer Pointer to the CSV file to read
|
* @param resource $filePointer Pointer to the CSV file to read
|
||||||
* @param string $fieldDelimiter Character that delimits fields
|
* @param string $fieldDelimiter Character that delimits fields
|
||||||
@ -108,18 +111,25 @@ class RowIterator implements IteratorInterface
|
|||||||
*/
|
*/
|
||||||
public function next()
|
public function next()
|
||||||
{
|
{
|
||||||
$lineData = null;
|
$lineData = false;
|
||||||
$this->hasReachedEndOfFile = feof($this->filePointer);
|
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||||
|
|
||||||
if (!$this->hasReachedEndOfFile) {
|
if (!$this->hasReachedEndOfFile) {
|
||||||
do {
|
do {
|
||||||
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
|
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
|
||||||
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
|
if ($utf8EncodedLineData !== false) {
|
||||||
} while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData)));
|
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||||
|
}
|
||||||
|
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||||
|
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
|
||||||
|
|
||||||
if ($lineData !== false && $lineData !== null) {
|
if ($lineData !== false) {
|
||||||
$this->rowDataBuffer = $lineData;
|
$this->rowDataBuffer = $lineData;
|
||||||
$this->numReadRows++;
|
$this->numReadRows++;
|
||||||
|
} else {
|
||||||
|
// If we reach this point, it means end of file was reached.
|
||||||
|
// This happens when the last lines are empty lines.
|
||||||
|
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -128,28 +138,45 @@ class RowIterator implements IteratorInterface
|
|||||||
* Returns the next line, converted if necessary to UTF-8.
|
* Returns the next line, converted if necessary to UTF-8.
|
||||||
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
|
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
|
||||||
*
|
*
|
||||||
* @return string The next line for the current file pointer, encoded in UTF-8
|
* @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
|
||||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||||
*/
|
*/
|
||||||
protected function getNextUTF8EncodedLine()
|
protected function getNextUTF8EncodedLine()
|
||||||
{
|
{
|
||||||
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
|
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
|
||||||
$encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
|
$encodedEOLDelimiter = $this->getEncodedEOLDelimiter();
|
||||||
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter);
|
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter);
|
||||||
|
|
||||||
// Once the line has been read, it can be converted to UTF-8
|
// If the line could have been read, it can be converted to UTF-8
|
||||||
$utf8EncodedLineData = $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding);
|
$utf8EncodedLineData = ($encodedLineData !== false) ?
|
||||||
|
$this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) :
|
||||||
|
false;
|
||||||
|
|
||||||
return $utf8EncodedLineData;
|
return $utf8EncodedLineData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the end of line delimiter, encoded using the same encoding as the CSV.
|
||||||
|
* The return value is cached.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected function getEncodedEOLDelimiter()
|
||||||
|
{
|
||||||
|
if (!isset($this->encodedEOLDelimiter)) {
|
||||||
|
$this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->encodedEOLDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param array $lineData Array containing the cells value for the line
|
* @param array $lineData Array containing the cells value for the line
|
||||||
* @return bool Whether the given line is empty
|
* @return bool Whether the given line is empty
|
||||||
*/
|
*/
|
||||||
protected function isEmptyLine($lineData)
|
protected function isEmptyLine($lineData)
|
||||||
{
|
{
|
||||||
return (count($lineData) === 1 && $lineData[0] === null);
|
return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -115,11 +115,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function dataProviderForTestReadShouldSkipEmptyLines()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
['csv_with_empty_line.csv'],
|
||||||
|
['csv_with_empty_last_line.csv'],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataProviderForTestReadShouldSkipEmptyLines
|
||||||
|
*
|
||||||
|
* @param string $fileName
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function testReadShouldSkipEmptyLines()
|
public function testReadShouldSkipEmptyLines($fileName)
|
||||||
{
|
{
|
||||||
$allRows = $this->getAllRowsForFile('csv_with_empty_line.csv');
|
$allRows = $this->getAllRowsForFile($fileName);
|
||||||
|
|
||||||
$expectedRows = [
|
$expectedRows = [
|
||||||
['csv--11', 'csv--12', 'csv--13'],
|
['csv--11', 'csv--12', 'csv--13'],
|
||||||
|
2
tests/resources/csv/csv_with_empty_last_line.csv
Normal file
2
tests/resources/csv/csv_with_empty_last_line.csv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
csv--11,csv--12,csv--13
|
||||||
|
csv--31,csv--32,csv--33
|
|
Loading…
x
Reference in New Issue
Block a user