Add option to preserve empty rows when reading a CSV file
This commit is contained in:
parent
503809eb53
commit
dd7cb1b04e
@ -119,8 +119,9 @@ class Reader extends AbstractReader
|
||||
$this->filePointer,
|
||||
$this->fieldDelimiter,
|
||||
$this->fieldEnclosure,
|
||||
$this->encoding,
|
||||
$this->endOfLineCharacter,
|
||||
$this->encoding,
|
||||
$this->shouldPreserveEmptyRows,
|
||||
$this->globalFunctionsHelper
|
||||
);
|
||||
}
|
||||
|
@ -52,21 +52,26 @@ class RowIterator implements IteratorInterface
|
||||
/** @var string End of line delimiter, given by the user as input. */
|
||||
protected $inputEOLDelimiter;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/**
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param string $endOfLineDelimiter End of line delimiter
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $endOfLineDelimiter, $encoding, $shouldPreserveEmptyRows, $globalFunctionsHelper)
|
||||
{
|
||||
$this->filePointer = $filePointer;
|
||||
$this->fieldDelimiter = $fieldDelimiter;
|
||||
$this->fieldEnclosure = $fieldEnclosure;
|
||||
$this->encoding = $encoding;
|
||||
$this->inputEOLDelimiter = $endOfLineDelimiter;
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
|
||||
$this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
|
||||
@ -114,7 +119,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forward to next element. Empty rows are skipped.
|
||||
* Move forward to next element. Reads data for the next unprocessed row.
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
@ -124,25 +129,48 @@ class RowIterator implements IteratorInterface
|
||||
{
|
||||
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
|
||||
if ($this->hasReachedEndOfFile) {
|
||||
return;
|
||||
if (!$this->hasReachedEndOfFile) {
|
||||
$this->readDataForNextRow();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||
*/
|
||||
protected function readDataForNextRow()
|
||||
{
|
||||
do {
|
||||
$rowData = $this->getNextUTF8EncodedRow();
|
||||
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
} while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
|
||||
} while ($this->shouldReadNextRow($rowData));
|
||||
|
||||
if ($rowData !== false) {
|
||||
$this->rowDataBuffer = $rowData;
|
||||
// str_replace will replace NULL values by empty strings
|
||||
$this->rowDataBuffer = str_replace(null, null, $rowData);
|
||||
$this->numReadRows++;
|
||||
} else {
|
||||
// If we reach this point, it means end of file was reached.
|
||||
// This happens when the last lines are empty lines.
|
||||
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array|bool $currentRowData
|
||||
* @return bool Whether the data for the current row can be returned or if we need to keep reading
|
||||
*/
|
||||
protected function shouldReadNextRow($currentRowData)
|
||||
{
|
||||
$hasSuccessfullyFetchedRowData = ($currentRowData !== false);
|
||||
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
$isEmptyLine = $this->isEmptyLine($currentRowData);
|
||||
|
||||
return (
|
||||
(!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
|
||||
(!$this->shouldPreserveEmptyRows && $isEmptyLine)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next row, converted if necessary to UTF-8.
|
||||
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
|
||||
@ -154,7 +182,7 @@ class RowIterator implements IteratorInterface
|
||||
protected function getNextUTF8EncodedRow()
|
||||
{
|
||||
$encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
if (false === $encodedRowData) {
|
||||
if ($encodedRowData === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -195,7 +223,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $lineData Array containing the cells value for the line
|
||||
* @param array|bool $lineData Array containing the cells value for the line
|
||||
* @return bool Whether the given line is empty
|
||||
*/
|
||||
protected function isEmptyLine($lineData)
|
||||
|
@ -18,12 +18,21 @@ class Sheet implements SheetInterface
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @param string $endOfLineCharacter Character defining the end of a line
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper)
|
||||
public function __construct(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper);
|
||||
$this->rowIterator = new RowIterator(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -22,12 +22,21 @@ class SheetIterator implements IteratorInterface
|
||||
* @param resource $filePointer
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @param string $endOfLineCharacter Character defining the end of a line
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper)
|
||||
public function __construct(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper)
|
||||
{
|
||||
$this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper);
|
||||
$this->sheet = new Sheet(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -115,29 +115,40 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
* @return void
|
||||
*/
|
||||
public function dataProviderForTestReadShouldSkipEmptyLines()
|
||||
public function testReadShouldSkipEmptyLinesIfShouldPreserveEmptyRowsNotSet()
|
||||
{
|
||||
return [
|
||||
['csv_with_empty_line.csv'],
|
||||
['csv_with_empty_last_line.csv'],
|
||||
$allRows = $this->getAllRowsForFile('csv_with_multiple_empty_lines.csv');
|
||||
|
||||
$expectedRows = [
|
||||
// skipped row here
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
// skipped row here
|
||||
['csv--41', 'csv--42', 'csv--43'],
|
||||
// skipped row here
|
||||
// last row empty
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldSkipEmptyLines
|
||||
*
|
||||
* @param string $fileName
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyLines($fileName)
|
||||
public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile($fileName);
|
||||
$allRows = $this->getAllRowsForFile(
|
||||
'csv_with_multiple_empty_lines.csv',
|
||||
',', '"', "\n", EncodingHelper::ENCODING_UTF8,
|
||||
$shouldPreserveEmptyRows = true
|
||||
);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
[''],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
[''],
|
||||
['csv--41', 'csv--42', 'csv--43'],
|
||||
[''],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -204,6 +215,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals('This is, a comma', $allRows[0][0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadCustomEOLs()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_CR_EOL.csv', ',', '"', "\r");
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
@ -236,7 +262,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testReadShouldSkipBom($fileName, $fileEncoding)
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile($fileName, ',', '"', $fileEncoding);
|
||||
$allRows = $this->getAllRowsForFile($fileName, ',', '"', "\n", $fileEncoding);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
@ -275,6 +301,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper|\PHPUnit_Framework_MockObject_MockObject $helperStub */
|
||||
$helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper')
|
||||
->setMethods(['function_exists'])
|
||||
->getMock();
|
||||
@ -405,14 +432,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
* @param string $fileName
|
||||
* @param string|void $fieldDelimiter
|
||||
* @param string|void $fieldEnclosure
|
||||
* @param string|void $endOfLineCharacter
|
||||
* @param string|void $encoding
|
||||
* @param bool|void $shouldPreserveEmptyRows
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile(
|
||||
$fileName,
|
||||
$fieldDelimiter = ',',
|
||||
$fieldEnclosure = '"',
|
||||
$encoding = EncodingHelper::ENCODING_UTF8)
|
||||
$endOfLineCharacter = "\n",
|
||||
$encoding = EncodingHelper::ENCODING_UTF8,
|
||||
$shouldPreserveEmptyRows = false)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
@ -422,7 +453,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$reader
|
||||
->setFieldDelimiter($fieldDelimiter)
|
||||
->setFieldEnclosure($fieldEnclosure)
|
||||
->setEndOfLineCharacter($endOfLineCharacter)
|
||||
->setEncoding($encoding)
|
||||
->setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
@ -436,51 +469,6 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
return $allRows;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadCustomEOL()
|
||||
{
|
||||
return [
|
||||
['csv_with_CR_EOL.csv', "\r"],
|
||||
['csv_standard.csv', "\n"],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadCustomEOL
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param string $customEOL
|
||||
* @return void
|
||||
*/
|
||||
public function testReadCustomEOLs($fileName, $customEOL)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Reader\CSV\Reader $reader */
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader
|
||||
->setEndOfLineCharacter($customEOL)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheet) {
|
||||
foreach ($sheet->getRowIterator() as $row) {
|
||||
$allRows[] = $row;
|
||||
}
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
|
@ -1,2 +0,0 @@
|
||||
csv--11,csv--12,csv--13
|
||||
csv--31,csv--32,csv--33
|
|
@ -1,3 +0,0 @@
|
||||
csv--11,csv--12,csv--13
|
||||
|
||||
csv--31,csv--32,csv--33
|
|
5
tests/resources/csv/csv_with_multiple_empty_lines.csv
Normal file
5
tests/resources/csv/csv_with_multiple_empty_lines.csv
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
csv--21,csv--22,csv--23
|
||||
|
||||
csv--41,csv--42,csv--43
|
||||
|
|
Loading…
x
Reference in New Issue
Block a user