Merge pull request #77 from box/fix_csv_reader_empty_last_line

Fix CSV reader when last line is empty
This commit is contained in:
Adrien Loison 2015-07-29 10:27:44 -07:00
commit 40a86c4b6c
4 changed files with 67 additions and 12 deletions

View File

@ -167,6 +167,18 @@ class GlobalFunctionsHelper
return file_get_contents($filePath); return file_get_contents($filePath);
} }
/**
* Wrapper around global function feof()
* @see feof()
*
* @param resource
* @return bool
*/
public function feof($handle)
{
return feof($handle);
}
/** /**
* Wrapper around global function is_readable() * Wrapper around global function is_readable()
* @see is_readable() * @see is_readable()

View File

@ -40,6 +40,9 @@ class RowIterator implements IteratorInterface
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */ /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
protected $encodingHelper; protected $encodingHelper;
/** @var string End of line delimiter, encoded using the same encoding as the CSV */
protected $encodedEOLDelimiter;
/** /**
* @param resource $filePointer Pointer to the CSV file to read * @param resource $filePointer Pointer to the CSV file to read
* @param string $fieldDelimiter Character that delimits fields * @param string $fieldDelimiter Character that delimits fields
@ -108,18 +111,25 @@ class RowIterator implements IteratorInterface
*/ */
public function next() public function next()
{ {
$lineData = null; $lineData = false;
$this->hasReachedEndOfFile = feof($this->filePointer); $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
if (!$this->hasReachedEndOfFile) { if (!$this->hasReachedEndOfFile) {
do { do {
$utf8EncodedLineData = $this->getNextUTF8EncodedLine(); $utf8EncodedLineData = $this->getNextUTF8EncodedLine();
if ($utf8EncodedLineData !== false) {
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure); $lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
} while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData))); }
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
if ($lineData !== false && $lineData !== null) { if ($lineData !== false) {
$this->rowDataBuffer = $lineData; $this->rowDataBuffer = $lineData;
$this->numReadRows++; $this->numReadRows++;
} else {
// If we reach this point, it means end of file was reached.
// This happens when the last lines are empty lines.
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
} }
} }
} }
@ -128,28 +138,45 @@ class RowIterator implements IteratorInterface
* Returns the next line, converted if necessary to UTF-8. * Returns the next line, converted if necessary to UTF-8.
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually. * Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
* *
* @return string The next line for the current file pointer, encoded in UTF-8 * @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*/ */
protected function getNextUTF8EncodedLine() protected function getNextUTF8EncodedLine()
{ {
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding. // Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
$encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding); $encodedEOLDelimiter = $this->getEncodedEOLDelimiter();
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter); $encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter);
// Once the line has been read, it can be converted to UTF-8 // If the line could have been read, it can be converted to UTF-8
$utf8EncodedLineData = $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding); $utf8EncodedLineData = ($encodedLineData !== false) ?
$this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) :
false;
return $utf8EncodedLineData; return $utf8EncodedLineData;
} }
/**
* Returns the end of line delimiter, encoded using the same encoding as the CSV.
* The return value is cached.
*
* @return string
*/
protected function getEncodedEOLDelimiter()
{
if (!isset($this->encodedEOLDelimiter)) {
$this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
}
return $this->encodedEOLDelimiter;
}
/** /**
* @param array $lineData Array containing the cells value for the line * @param array $lineData Array containing the cells value for the line
* @return bool Whether the given line is empty * @return bool Whether the given line is empty
*/ */
protected function isEmptyLine($lineData) protected function isEmptyLine($lineData)
{ {
return (count($lineData) === 1 && $lineData[0] === null); return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
} }
/** /**

View File

@ -115,11 +115,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
} }
/** /**
* @return array
*/
public function dataProviderForTestReadShouldSkipEmptyLines()
{
return [
['csv_with_empty_line.csv'],
['csv_with_empty_last_line.csv'],
];
}
/**
* @dataProvider dataProviderForTestReadShouldSkipEmptyLines
*
* @param string $fileName
* @return void * @return void
*/ */
public function testReadShouldSkipEmptyLines() public function testReadShouldSkipEmptyLines($fileName)
{ {
$allRows = $this->getAllRowsForFile('csv_with_empty_line.csv'); $allRows = $this->getAllRowsForFile($fileName);
$expectedRows = [ $expectedRows = [
['csv--11', 'csv--12', 'csv--13'], ['csv--11', 'csv--12', 'csv--13'],

View File

@ -0,0 +1,2 @@
csv--11,csv--12,csv--13
csv--31,csv--32,csv--33
1 csv--11 csv--12 csv--13
2 csv--31 csv--32 csv--33