From 962f9d03efe1ef17c072c339fd6bf5438fe235be Mon Sep 17 00:00:00 2001 From: madflow Date: Mon, 11 Jun 2018 21:38:30 +0200 Subject: [PATCH] introduce a start and end column index for readers --- .../Common/Manager/OptionsManagerAbstract.php | 2 +- .../Reader/CSV/Manager/OptionsManager.php | 2 + src/Spout/Reader/CSV/RowIterator.php | 38 +++- src/Spout/Reader/Common/Entity/Options.php | 17 +- .../InvalidReaderOptionValueException.php | 7 + src/Spout/Reader/ReaderAbstract.php | 28 +++ tests/Spout/Reader/CSV/ReaderTest.php | 186 +++++++++++++++++- tests/resources/csv/csv_with_headers.csv | 5 + 8 files changed, 265 insertions(+), 20 deletions(-) create mode 100644 src/Spout/Reader/Exception/InvalidReaderOptionValueException.php create mode 100644 tests/resources/csv/csv_with_headers.csv diff --git a/src/Spout/Common/Manager/OptionsManagerAbstract.php b/src/Spout/Common/Manager/OptionsManagerAbstract.php index 20eb14e..3ce00e5 100644 --- a/src/Spout/Common/Manager/OptionsManagerAbstract.php +++ b/src/Spout/Common/Manager/OptionsManagerAbstract.php @@ -7,7 +7,7 @@ namespace Box\Spout\Common\Manager; */ abstract class OptionsManagerAbstract implements OptionsManagerInterface { - const PREFIX_OPTION = 'OPTION_'; + public const PREFIX_OPTION = 'OPTION_'; /** @var string[] List of all supported option names */ private $supportedOptions = []; diff --git a/src/Spout/Reader/CSV/Manager/OptionsManager.php b/src/Spout/Reader/CSV/Manager/OptionsManager.php index befefe3..acb7d5a 100644 --- a/src/Spout/Reader/CSV/Manager/OptionsManager.php +++ b/src/Spout/Reader/CSV/Manager/OptionsManager.php @@ -23,6 +23,8 @@ class OptionsManager extends OptionsManagerAbstract Options::FIELD_DELIMITER, Options::FIELD_ENCLOSURE, Options::ENCODING, + Options::START_COLUMN, + Options::END_COLUMN, ]; } diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index bec3072..85c83f4 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -8,6 +8,7 @@ use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory; +use Box\Spout\Reader\Exception\InvalidReaderOptionValueException; use Box\Spout\Reader\IteratorInterface; /** @@ -54,6 +55,9 @@ class RowIterator implements IteratorInterface /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; + /** @var OptionsManagerInterface */ + protected $optionsManager; + /** * @param resource $filePointer Pointer to the CSV file to read * @param OptionsManagerInterface $optionsManager @@ -76,6 +80,7 @@ class RowIterator implements IteratorInterface $this->encodingHelper = $encodingHelper; $this->entityFactory = $entityFactory; $this->globalFunctionsHelper = $globalFunctionsHelper; + $this->optionsManager = $optionsManager; } /** @@ -177,9 +182,8 @@ class RowIterator implements IteratorInterface * Returns the next row, converted if necessary to UTF-8. * As fgetcsv() does not manage correctly encoding for non UTF-8 data, * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes) - * - * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 - * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read + * @throws InvalidReaderOptionValueException + * @return array|false If unable to convert data to UTF-8 */ protected function getNextUTF8EncodedRow() { @@ -188,6 +192,33 @@ class RowIterator implements IteratorInterface return false; } + // The start and end column index should be able to be set after the reader has been opened + $startColumnIndex = $this->optionsManager->getOption(Options::START_COLUMN); + $endColumnIndex = $this->optionsManager->getOption(Options::END_COLUMN); + + if ($startColumnIndex < 0) { + throw new InvalidReaderOptionValueException( + 'The start column index has to be a non negative number' + ); + } + + if ($endColumnIndex && $endColumnIndex <= $startColumnIndex) { + throw new InvalidReaderOptionValueException( + 'The end column index has to be a larger number than the start index' + ); + } + + // The range of the cells to be read is determined by the start and end column index + $readerLength = $endColumnIndex ? ($endColumnIndex - $startColumnIndex) + 1 : null; + $encodedRowData = \array_slice($encodedRowData, $startColumnIndex, $readerLength); + + // If there is an end column index - the resulting data is a fixed array + // starting at $startColumnIndex and ending at $endColumnIndex. + // Missing array values are filled with the empty value ''. + if ($endColumnIndex && count($encodedRowData) < $readerLength) { + $encodedRowData = $encodedRowData + \array_fill(0, $readerLength, ''); + } + foreach ($encodedRowData as $cellIndex => $cellValue) { switch ($this->encoding) { case EncodingHelper::ENCODING_UTF16_LE: @@ -202,7 +233,6 @@ class RowIterator implements IteratorInterface $cellValue = rtrim($cellValue); break; } - $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding); } diff --git a/src/Spout/Reader/Common/Entity/Options.php b/src/Spout/Reader/Common/Entity/Options.php index 293d4c0..745bc21 100644 --- a/src/Spout/Reader/Common/Entity/Options.php +++ b/src/Spout/Reader/Common/Entity/Options.php @@ -9,15 +9,18 @@ namespace Box\Spout\Reader\Common\Entity; abstract class Options { // Common options - const SHOULD_FORMAT_DATES = 'shouldFormatDates'; - const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows'; + public const SHOULD_FORMAT_DATES = 'shouldFormatDates'; + public const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows'; + + public const START_COLUMN = 'startColumn'; + public const END_COLUMN = 'endColumn'; // CSV specific options - const FIELD_DELIMITER = 'fieldDelimiter'; - const FIELD_ENCLOSURE = 'fieldEnclosure'; - const ENCODING = 'encoding'; + public const FIELD_DELIMITER = 'fieldDelimiter'; + public const FIELD_ENCLOSURE = 'fieldEnclosure'; + public const ENCODING = 'encoding'; // XLSX specific options - const TEMP_FOLDER = 'tempFolder'; - const SHOULD_USE_1904_DATES = 'shouldUse1904Dates'; + public const TEMP_FOLDER = 'tempFolder'; + public const SHOULD_USE_1904_DATES = 'shouldUse1904Dates'; } diff --git a/src/Spout/Reader/Exception/InvalidReaderOptionValueException.php b/src/Spout/Reader/Exception/InvalidReaderOptionValueException.php new file mode 100644 index 0000000..e96edd8 --- /dev/null +++ b/src/Spout/Reader/Exception/InvalidReaderOptionValueException.php @@ -0,0 +1,7 @@ +optionsManager->setOption(Options::START_COLUMN, $startColumnIndex); + + return $this; + } + + /** + * @param int $endColumnIndex + * @return ReaderAbstract + */ + public function setEndColumnIndex(int $endColumnIndex) : ReaderAbstract + { + $this->optionsManager->setOption(Options::END_COLUMN, $endColumnIndex); + + return $this; + } + /** * Prepares the reader to read the given file. It also makes sure * that the file exists and is readable. diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index 684b045..37ff6a4 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -3,13 +3,13 @@ namespace Box\Spout\Reader\CSV; use Box\Spout\Common\Creator\HelperFactory; +use Box\Spout\Common\Entity\Row; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Helper\EncodingHelper; use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory; use Box\Spout\Reader\CSV\Manager\OptionsManager; use Box\Spout\Reader\Exception\ReaderNotOpenedException; -use Box\Spout\Reader\ReaderInterface; use Box\Spout\TestUsingResource; use PHPUnit\Framework\TestCase; @@ -474,10 +474,144 @@ class ReaderTest extends TestCase $reader->open('unsupported://foobar'); } + /** + * @return void + */ + public function testReadWithStartAndEndColumn() + { + $fileName = 'csv_with_headers.csv'; + $allRows = $this->getAllRowsForFile($fileName); + + $expectedRows = [ + ['Header-1', 'Header-2', 'Header-3', ''], + ['Test-1', 'Test-2', 'Test-3', ''], + ['Test-1', '', '', ''], + ['Test-1', 'Test-2', 'Test-3', 'Test-4'], + ['', '', 'Test-3', ''], + ]; + + $this->assertEquals($expectedRows, $allRows, 'All columns are respected without starting column'); + + $expectedRowsWithStartAndEnd = [ + ['Header-2', 'Header-3'], + ['Test-2', 'Test-3'], + ['', ''], + ['Test-2', 'Test-3'], + ['', 'Test-3'], + ]; + + $rowsWithRange = $this->getAllRowsForFileWithRange($fileName, 1, 2); + + $this->assertEquals( + $expectedRowsWithStartAndEnd, + $rowsWithRange, + 'All columns are read starting at index 1 and ending at index 2' + ); + + $expectedRowsWithStart = [ + ['Header-3', ''], + ['Test-3', ''], + ['', ''], + ['Test-3', 'Test-4'], + ['Test-3', ''], + ]; + + $rowsWithStart = $this->getAllRowsForFileWithRange($fileName, 2); + + $this->assertEquals( + $expectedRowsWithStart, + $rowsWithStart, + 'All columns are read starting at index 2' + ); + + $expectedRowsWithEnd = [ + ['Header-1', 'Header-2', 'Header-3'], + ['Test-1', 'Test-2', 'Test-3'], + ['Test-1', '', ''], + ['Test-1', 'Test-2', 'Test-3'], + ['', '', 'Test-3'], + ]; + + $rowsWithEnd = $this->getAllRowsForFileWithRange($fileName, 0, 2); + + $this->assertEquals( + $expectedRowsWithEnd, + $rowsWithEnd, + 'All columns are read ending at index 2' + ); + } + + /** + * @return void + */ + public function testSetStartAndEndColumnAfterReaderOpened() + { + $fileName = 'csv_with_headers.csv'; + $resourcePath = $this->getResourcePath($fileName); + $allRows = []; + $expectedRowsWithStartAndEnd = [ + ['Header-2', 'Header-3'], + ['Test-2', 'Test-3'], + ['', ''], + ['Test-2', 'Test-3'], + ['', 'Test-3'], + ]; + + /** @var \Box\Spout\Reader\CSV\Reader $reader */ + $reader = $this->createCSVReader(); + $reader->open($resourcePath); + $reader->setStartColumnIndex(1); + $reader->setEndColumnIndex(2); + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + /** + * @var int + * @var Row $row + */ + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row->toArray(); + } + } + $reader->close(); + $this->assertEquals($expectedRowsWithStartAndEnd, $allRows, 'Correct range set after reader was opened'); + } + + public function testDifferentCellsAndRange() + { + $fileName = 'csv_with_different_cells_number.csv'; + $allRows = $this->getAllRowsForFileWithRange($fileName, 0, 2); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', ''], + ['csv--31', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + * @expectedException \Box\Spout\Reader\Exception\InvalidReaderOptionValueException + */ + public function testNegativeStartColumnIndex() + { + $fileName = 'csv_with_headers.csv'; + $this->getAllRowsForFileWithRange($fileName, -1); + } + + /** + * @return void + * @expectedException \Box\Spout\Reader\Exception\InvalidReaderOptionValueException + */ + public function testEndColumnIndexSmallerThanStartIndex() + { + $fileName = 'csv_with_headers.csv'; + $this->getAllRowsForFileWithRange($fileName, 3, 1); + } + /** * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper|null $optionsManager * @param \Box\Spout\Common\Manager\OptionsManagerInterface|null $globalFunctionsHelper - * @return ReaderInterface + * @return Reader */ private function createCSVReader($optionsManager = null, $globalFunctionsHelper = null) { @@ -494,28 +628,42 @@ class ReaderTest extends TestCase * @param string $fieldEnclosure * @param string $encoding * @param bool $shouldPreserveEmptyRows + * @param int $startColumnIndex + * @param int|null $endColumnIndex * @return array All the read rows the given file */ private function getAllRowsForFile( - $fileName, - $fieldDelimiter = ',', - $fieldEnclosure = '"', - $encoding = EncodingHelper::ENCODING_UTF8, - $shouldPreserveEmptyRows = false - ) { + string $fileName, + string $fieldDelimiter = ',', + string $fieldEnclosure = '"', + string $encoding = EncodingHelper::ENCODING_UTF8, + bool $shouldPreserveEmptyRows = false, + int $startColumnIndex = 0, + int $endColumnIndex = null + ) : array { $allRows = []; $resourcePath = $this->getResourcePath($fileName); /** @var \Box\Spout\Reader\CSV\Reader $reader */ $reader = $this->createCSVReader(); + + if ($endColumnIndex) { + $reader->setEndColumnIndex($endColumnIndex); + } + $reader ->setFieldDelimiter($fieldDelimiter) ->setFieldEnclosure($fieldEnclosure) ->setEncoding($encoding) ->setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + ->setStartColumnIndex($startColumnIndex) ->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + /** + * @var int + * @var Row $row + */ foreach ($sheet->getRowIterator() as $rowIndex => $row) { $allRows[] = $row->toArray(); } @@ -525,4 +673,26 @@ class ReaderTest extends TestCase return $allRows; } + + /** + * @param string $fileName + * @param int $startColumnIndex + * @param int|null $endColumnIndex + * @return array + */ + protected function getAllRowsForFileWithRange( + string $fileName, + int $startColumnIndex = 0, + int $endColumnIndex = null + ) : array { + return $this->getAllRowsForFile( + $fileName, + ',', + '"', + EncodingHelper::ENCODING_UTF8, + false, + $startColumnIndex, + $endColumnIndex + ); + } } diff --git a/tests/resources/csv/csv_with_headers.csv b/tests/resources/csv/csv_with_headers.csv new file mode 100644 index 0000000..a543a26 --- /dev/null +++ b/tests/resources/csv/csv_with_headers.csv @@ -0,0 +1,5 @@ +"Header-1","Header-2","Header-3", +"Test-1","Test-2","Test-3", +"Test-1",,, +"Test-1","Test-2","Test-3","Test-4" +,,"Test-3",