introduce a start and end column index for readers

This commit is contained in:
madflow 2018-06-11 21:38:30 +02:00
parent b105d15f08
commit 962f9d03ef
8 changed files with 265 additions and 20 deletions

View File

@ -7,7 +7,7 @@ namespace Box\Spout\Common\Manager;
*/ */
abstract class OptionsManagerAbstract implements OptionsManagerInterface abstract class OptionsManagerAbstract implements OptionsManagerInterface
{ {
const PREFIX_OPTION = 'OPTION_'; public const PREFIX_OPTION = 'OPTION_';
/** @var string[] List of all supported option names */ /** @var string[] List of all supported option names */
private $supportedOptions = []; private $supportedOptions = [];

View File

@ -23,6 +23,8 @@ class OptionsManager extends OptionsManagerAbstract
Options::FIELD_DELIMITER, Options::FIELD_DELIMITER,
Options::FIELD_ENCLOSURE, Options::FIELD_ENCLOSURE,
Options::ENCODING, Options::ENCODING,
Options::START_COLUMN,
Options::END_COLUMN,
]; ];
} }

View File

@ -8,6 +8,7 @@ use Box\Spout\Common\Helper\GlobalFunctionsHelper;
use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Common\Manager\OptionsManagerInterface;
use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\Common\Entity\Options;
use Box\Spout\Reader\CSV\Creator\InternalEntityFactory; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
use Box\Spout\Reader\Exception\InvalidReaderOptionValueException;
use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\IteratorInterface;
/** /**
@ -54,6 +55,9 @@ class RowIterator implements IteratorInterface
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper; protected $globalFunctionsHelper;
/** @var OptionsManagerInterface */
protected $optionsManager;
/** /**
* @param resource $filePointer Pointer to the CSV file to read * @param resource $filePointer Pointer to the CSV file to read
* @param OptionsManagerInterface $optionsManager * @param OptionsManagerInterface $optionsManager
@ -76,6 +80,7 @@ class RowIterator implements IteratorInterface
$this->encodingHelper = $encodingHelper; $this->encodingHelper = $encodingHelper;
$this->entityFactory = $entityFactory; $this->entityFactory = $entityFactory;
$this->globalFunctionsHelper = $globalFunctionsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper;
$this->optionsManager = $optionsManager;
} }
/** /**
@ -177,9 +182,8 @@ class RowIterator implements IteratorInterface
* Returns the next row, converted if necessary to UTF-8. * Returns the next row, converted if necessary to UTF-8.
* As fgetcsv() does not manage correctly encoding for non UTF-8 data, * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
* we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes) * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
* * @throws InvalidReaderOptionValueException
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 * @return array|false If unable to convert data to UTF-8
* @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
*/ */
protected function getNextUTF8EncodedRow() protected function getNextUTF8EncodedRow()
{ {
@ -188,6 +192,33 @@ class RowIterator implements IteratorInterface
return false; return false;
} }
// The start and end column index should be able to be set after the reader has been opened
$startColumnIndex = $this->optionsManager->getOption(Options::START_COLUMN);
$endColumnIndex = $this->optionsManager->getOption(Options::END_COLUMN);
if ($startColumnIndex < 0) {
throw new InvalidReaderOptionValueException(
'The start column index has to be a non negative number'
);
}
if ($endColumnIndex && $endColumnIndex <= $startColumnIndex) {
throw new InvalidReaderOptionValueException(
'The end column index has to be a larger number than the start index'
);
}
// The range of the cells to be read is determined by the start and end column index
$readerLength = $endColumnIndex ? ($endColumnIndex - $startColumnIndex) + 1 : null;
$encodedRowData = \array_slice($encodedRowData, $startColumnIndex, $readerLength);
// If there is an end column index - the resulting data is a fixed array
// starting at $startColumnIndex and ending at $endColumnIndex.
// Missing array values are filled with the empty value ''.
if ($endColumnIndex && count($encodedRowData) < $readerLength) {
$encodedRowData = $encodedRowData + \array_fill(0, $readerLength, '');
}
foreach ($encodedRowData as $cellIndex => $cellValue) { foreach ($encodedRowData as $cellIndex => $cellValue) {
switch ($this->encoding) { switch ($this->encoding) {
case EncodingHelper::ENCODING_UTF16_LE: case EncodingHelper::ENCODING_UTF16_LE:
@ -202,7 +233,6 @@ class RowIterator implements IteratorInterface
$cellValue = rtrim($cellValue); $cellValue = rtrim($cellValue);
break; break;
} }
$encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding); $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
} }

View File

@ -9,15 +9,18 @@ namespace Box\Spout\Reader\Common\Entity;
abstract class Options abstract class Options
{ {
// Common options // Common options
const SHOULD_FORMAT_DATES = 'shouldFormatDates'; public const SHOULD_FORMAT_DATES = 'shouldFormatDates';
const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows'; public const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows';
public const START_COLUMN = 'startColumn';
public const END_COLUMN = 'endColumn';
// CSV specific options // CSV specific options
const FIELD_DELIMITER = 'fieldDelimiter'; public const FIELD_DELIMITER = 'fieldDelimiter';
const FIELD_ENCLOSURE = 'fieldEnclosure'; public const FIELD_ENCLOSURE = 'fieldEnclosure';
const ENCODING = 'encoding'; public const ENCODING = 'encoding';
// XLSX specific options // XLSX specific options
const TEMP_FOLDER = 'tempFolder'; public const TEMP_FOLDER = 'tempFolder';
const SHOULD_USE_1904_DATES = 'shouldUse1904Dates'; public const SHOULD_USE_1904_DATES = 'shouldUse1904Dates';
} }

View File

@ -0,0 +1,7 @@
<?php
namespace Box\Spout\Reader\Exception;
class InvalidReaderOptionValueException extends ReaderException
{
}

View File

@ -28,6 +28,12 @@ abstract class ReaderAbstract implements ReaderInterface
/** @var OptionsManagerInterface Writer options manager */ /** @var OptionsManagerInterface Writer options manager */
protected $optionsManager; protected $optionsManager;
/** @var int The column index where the reader should start */
protected $startColumnIndex;
/** @var int The column index where the reader should stop */
protected $endColumnIndex;
/** /**
* Returns whether stream wrappers are supported * Returns whether stream wrappers are supported
* *
@ -98,6 +104,28 @@ abstract class ReaderAbstract implements ReaderInterface
return $this; return $this;
} }
/**
* @param int $startColumnIndex The 0 based start column index
* @return ReaderAbstract
*/
public function setStartColumnIndex(int $startColumnIndex) : ReaderAbstract
{
$this->optionsManager->setOption(Options::START_COLUMN, $startColumnIndex);
return $this;
}
/**
* @param int $endColumnIndex
* @return ReaderAbstract
*/
public function setEndColumnIndex(int $endColumnIndex) : ReaderAbstract
{
$this->optionsManager->setOption(Options::END_COLUMN, $endColumnIndex);
return $this;
}
/** /**
* Prepares the reader to read the given file. It also makes sure * Prepares the reader to read the given file. It also makes sure
* that the file exists and is readable. * that the file exists and is readable.

View File

@ -3,13 +3,13 @@
namespace Box\Spout\Reader\CSV; namespace Box\Spout\Reader\CSV;
use Box\Spout\Common\Creator\HelperFactory; use Box\Spout\Common\Creator\HelperFactory;
use Box\Spout\Common\Entity\Row;
use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Exception\IOException;
use Box\Spout\Common\Helper\EncodingHelper; use Box\Spout\Common\Helper\EncodingHelper;
use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Helper\GlobalFunctionsHelper;
use Box\Spout\Reader\CSV\Creator\InternalEntityFactory; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
use Box\Spout\Reader\CSV\Manager\OptionsManager; use Box\Spout\Reader\CSV\Manager\OptionsManager;
use Box\Spout\Reader\Exception\ReaderNotOpenedException; use Box\Spout\Reader\Exception\ReaderNotOpenedException;
use Box\Spout\Reader\ReaderInterface;
use Box\Spout\TestUsingResource; use Box\Spout\TestUsingResource;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
@ -474,10 +474,144 @@ class ReaderTest extends TestCase
$reader->open('unsupported://foobar'); $reader->open('unsupported://foobar');
} }
/**
* @return void
*/
public function testReadWithStartAndEndColumn()
{
$fileName = 'csv_with_headers.csv';
$allRows = $this->getAllRowsForFile($fileName);
$expectedRows = [
['Header-1', 'Header-2', 'Header-3', ''],
['Test-1', 'Test-2', 'Test-3', ''],
['Test-1', '', '', ''],
['Test-1', 'Test-2', 'Test-3', 'Test-4'],
['', '', 'Test-3', ''],
];
$this->assertEquals($expectedRows, $allRows, 'All columns are respected without starting column');
$expectedRowsWithStartAndEnd = [
['Header-2', 'Header-3'],
['Test-2', 'Test-3'],
['', ''],
['Test-2', 'Test-3'],
['', 'Test-3'],
];
$rowsWithRange = $this->getAllRowsForFileWithRange($fileName, 1, 2);
$this->assertEquals(
$expectedRowsWithStartAndEnd,
$rowsWithRange,
'All columns are read starting at index 1 and ending at index 2'
);
$expectedRowsWithStart = [
['Header-3', ''],
['Test-3', ''],
['', ''],
['Test-3', 'Test-4'],
['Test-3', ''],
];
$rowsWithStart = $this->getAllRowsForFileWithRange($fileName, 2);
$this->assertEquals(
$expectedRowsWithStart,
$rowsWithStart,
'All columns are read starting at index 2'
);
$expectedRowsWithEnd = [
['Header-1', 'Header-2', 'Header-3'],
['Test-1', 'Test-2', 'Test-3'],
['Test-1', '', ''],
['Test-1', 'Test-2', 'Test-3'],
['', '', 'Test-3'],
];
$rowsWithEnd = $this->getAllRowsForFileWithRange($fileName, 0, 2);
$this->assertEquals(
$expectedRowsWithEnd,
$rowsWithEnd,
'All columns are read ending at index 2'
);
}
/**
* @return void
*/
public function testSetStartAndEndColumnAfterReaderOpened()
{
$fileName = 'csv_with_headers.csv';
$resourcePath = $this->getResourcePath($fileName);
$allRows = [];
$expectedRowsWithStartAndEnd = [
['Header-2', 'Header-3'],
['Test-2', 'Test-3'],
['', ''],
['Test-2', 'Test-3'],
['', 'Test-3'],
];
/** @var \Box\Spout\Reader\CSV\Reader $reader */
$reader = $this->createCSVReader();
$reader->open($resourcePath);
$reader->setStartColumnIndex(1);
$reader->setEndColumnIndex(2);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
/**
* @var int
* @var Row $row
*/
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
$allRows[] = $row->toArray();
}
}
$reader->close();
$this->assertEquals($expectedRowsWithStartAndEnd, $allRows, 'Correct range set after reader was opened');
}
public function testDifferentCellsAndRange()
{
$fileName = 'csv_with_different_cells_number.csv';
$allRows = $this->getAllRowsForFileWithRange($fileName, 0, 2);
$expectedRows = [
['csv--11', 'csv--12', 'csv--13'],
['csv--21', 'csv--22', ''],
['csv--31', '', ''],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
* @expectedException \Box\Spout\Reader\Exception\InvalidReaderOptionValueException
*/
public function testNegativeStartColumnIndex()
{
$fileName = 'csv_with_headers.csv';
$this->getAllRowsForFileWithRange($fileName, -1);
}
/**
* @return void
* @expectedException \Box\Spout\Reader\Exception\InvalidReaderOptionValueException
*/
public function testEndColumnIndexSmallerThanStartIndex()
{
$fileName = 'csv_with_headers.csv';
$this->getAllRowsForFileWithRange($fileName, 3, 1);
}
/** /**
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper|null $optionsManager * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper|null $optionsManager
* @param \Box\Spout\Common\Manager\OptionsManagerInterface|null $globalFunctionsHelper * @param \Box\Spout\Common\Manager\OptionsManagerInterface|null $globalFunctionsHelper
* @return ReaderInterface * @return Reader
*/ */
private function createCSVReader($optionsManager = null, $globalFunctionsHelper = null) private function createCSVReader($optionsManager = null, $globalFunctionsHelper = null)
{ {
@ -494,28 +628,42 @@ class ReaderTest extends TestCase
* @param string $fieldEnclosure * @param string $fieldEnclosure
* @param string $encoding * @param string $encoding
* @param bool $shouldPreserveEmptyRows * @param bool $shouldPreserveEmptyRows
* @param int $startColumnIndex
* @param int|null $endColumnIndex
* @return array All the read rows the given file * @return array All the read rows the given file
*/ */
private function getAllRowsForFile( private function getAllRowsForFile(
$fileName, string $fileName,
$fieldDelimiter = ',', string $fieldDelimiter = ',',
$fieldEnclosure = '"', string $fieldEnclosure = '"',
$encoding = EncodingHelper::ENCODING_UTF8, string $encoding = EncodingHelper::ENCODING_UTF8,
$shouldPreserveEmptyRows = false bool $shouldPreserveEmptyRows = false,
) { int $startColumnIndex = 0,
int $endColumnIndex = null
) : array {
$allRows = []; $allRows = [];
$resourcePath = $this->getResourcePath($fileName); $resourcePath = $this->getResourcePath($fileName);
/** @var \Box\Spout\Reader\CSV\Reader $reader */ /** @var \Box\Spout\Reader\CSV\Reader $reader */
$reader = $this->createCSVReader(); $reader = $this->createCSVReader();
if ($endColumnIndex) {
$reader->setEndColumnIndex($endColumnIndex);
}
$reader $reader
->setFieldDelimiter($fieldDelimiter) ->setFieldDelimiter($fieldDelimiter)
->setFieldEnclosure($fieldEnclosure) ->setFieldEnclosure($fieldEnclosure)
->setEncoding($encoding) ->setEncoding($encoding)
->setShouldPreserveEmptyRows($shouldPreserveEmptyRows) ->setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
->setStartColumnIndex($startColumnIndex)
->open($resourcePath); ->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
/**
* @var int
* @var Row $row
*/
foreach ($sheet->getRowIterator() as $rowIndex => $row) { foreach ($sheet->getRowIterator() as $rowIndex => $row) {
$allRows[] = $row->toArray(); $allRows[] = $row->toArray();
} }
@ -525,4 +673,26 @@ class ReaderTest extends TestCase
return $allRows; return $allRows;
} }
/**
* @param string $fileName
* @param int $startColumnIndex
* @param int|null $endColumnIndex
* @return array
*/
protected function getAllRowsForFileWithRange(
string $fileName,
int $startColumnIndex = 0,
int $endColumnIndex = null
) : array {
return $this->getAllRowsForFile(
$fileName,
',',
'"',
EncodingHelper::ENCODING_UTF8,
false,
$startColumnIndex,
$endColumnIndex
);
}
} }

View File

@ -0,0 +1,5 @@
"Header-1","Header-2","Header-3",
"Test-1","Test-2","Test-3",
"Test-1",,,
"Test-1","Test-2","Test-3","Test-4"
,,"Test-3",
1 Header-1 Header-2 Header-3
2 Test-1 Test-2 Test-3
3 Test-1
4 Test-1 Test-2 Test-3 Test-4
5 Test-3