introduce a start and end column index for readers
This commit is contained in:
parent
b105d15f08
commit
962f9d03ef
@ -7,7 +7,7 @@ namespace Box\Spout\Common\Manager;
|
||||
*/
|
||||
abstract class OptionsManagerAbstract implements OptionsManagerInterface
|
||||
{
|
||||
const PREFIX_OPTION = 'OPTION_';
|
||||
public const PREFIX_OPTION = 'OPTION_';
|
||||
|
||||
/** @var string[] List of all supported option names */
|
||||
private $supportedOptions = [];
|
||||
|
@ -23,6 +23,8 @@ class OptionsManager extends OptionsManagerAbstract
|
||||
Options::FIELD_DELIMITER,
|
||||
Options::FIELD_ENCLOSURE,
|
||||
Options::ENCODING,
|
||||
Options::START_COLUMN,
|
||||
Options::END_COLUMN,
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@ use Box\Spout\Common\Helper\GlobalFunctionsHelper;
|
||||
use Box\Spout\Common\Manager\OptionsManagerInterface;
|
||||
use Box\Spout\Reader\Common\Entity\Options;
|
||||
use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
|
||||
use Box\Spout\Reader\Exception\InvalidReaderOptionValueException;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
|
||||
/**
|
||||
@ -54,6 +55,9 @@ class RowIterator implements IteratorInterface
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var OptionsManagerInterface */
|
||||
protected $optionsManager;
|
||||
|
||||
/**
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param OptionsManagerInterface $optionsManager
|
||||
@ -76,6 +80,7 @@ class RowIterator implements IteratorInterface
|
||||
$this->encodingHelper = $encodingHelper;
|
||||
$this->entityFactory = $entityFactory;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
$this->optionsManager = $optionsManager;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -177,9 +182,8 @@ class RowIterator implements IteratorInterface
|
||||
* Returns the next row, converted if necessary to UTF-8.
|
||||
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
|
||||
* we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
|
||||
*
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||
* @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
|
||||
* @throws InvalidReaderOptionValueException
|
||||
* @return array|false If unable to convert data to UTF-8
|
||||
*/
|
||||
protected function getNextUTF8EncodedRow()
|
||||
{
|
||||
@ -188,6 +192,33 @@ class RowIterator implements IteratorInterface
|
||||
return false;
|
||||
}
|
||||
|
||||
// The start and end column index should be able to be set after the reader has been opened
|
||||
$startColumnIndex = $this->optionsManager->getOption(Options::START_COLUMN);
|
||||
$endColumnIndex = $this->optionsManager->getOption(Options::END_COLUMN);
|
||||
|
||||
if ($startColumnIndex < 0) {
|
||||
throw new InvalidReaderOptionValueException(
|
||||
'The start column index has to be a non negative number'
|
||||
);
|
||||
}
|
||||
|
||||
if ($endColumnIndex && $endColumnIndex <= $startColumnIndex) {
|
||||
throw new InvalidReaderOptionValueException(
|
||||
'The end column index has to be a larger number than the start index'
|
||||
);
|
||||
}
|
||||
|
||||
// The range of the cells to be read is determined by the start and end column index
|
||||
$readerLength = $endColumnIndex ? ($endColumnIndex - $startColumnIndex) + 1 : null;
|
||||
$encodedRowData = \array_slice($encodedRowData, $startColumnIndex, $readerLength);
|
||||
|
||||
// If there is an end column index - the resulting data is a fixed array
|
||||
// starting at $startColumnIndex and ending at $endColumnIndex.
|
||||
// Missing array values are filled with the empty value ''.
|
||||
if ($endColumnIndex && count($encodedRowData) < $readerLength) {
|
||||
$encodedRowData = $encodedRowData + \array_fill(0, $readerLength, '');
|
||||
}
|
||||
|
||||
foreach ($encodedRowData as $cellIndex => $cellValue) {
|
||||
switch ($this->encoding) {
|
||||
case EncodingHelper::ENCODING_UTF16_LE:
|
||||
@ -202,7 +233,6 @@ class RowIterator implements IteratorInterface
|
||||
$cellValue = rtrim($cellValue);
|
||||
break;
|
||||
}
|
||||
|
||||
$encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
|
||||
}
|
||||
|
||||
|
@ -9,15 +9,18 @@ namespace Box\Spout\Reader\Common\Entity;
|
||||
abstract class Options
|
||||
{
|
||||
// Common options
|
||||
const SHOULD_FORMAT_DATES = 'shouldFormatDates';
|
||||
const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows';
|
||||
public const SHOULD_FORMAT_DATES = 'shouldFormatDates';
|
||||
public const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows';
|
||||
|
||||
public const START_COLUMN = 'startColumn';
|
||||
public const END_COLUMN = 'endColumn';
|
||||
|
||||
// CSV specific options
|
||||
const FIELD_DELIMITER = 'fieldDelimiter';
|
||||
const FIELD_ENCLOSURE = 'fieldEnclosure';
|
||||
const ENCODING = 'encoding';
|
||||
public const FIELD_DELIMITER = 'fieldDelimiter';
|
||||
public const FIELD_ENCLOSURE = 'fieldEnclosure';
|
||||
public const ENCODING = 'encoding';
|
||||
|
||||
// XLSX specific options
|
||||
const TEMP_FOLDER = 'tempFolder';
|
||||
const SHOULD_USE_1904_DATES = 'shouldUse1904Dates';
|
||||
public const TEMP_FOLDER = 'tempFolder';
|
||||
public const SHOULD_USE_1904_DATES = 'shouldUse1904Dates';
|
||||
}
|
||||
|
@ -0,0 +1,7 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Exception;
|
||||
|
||||
class InvalidReaderOptionValueException extends ReaderException
|
||||
{
|
||||
}
|
@ -28,6 +28,12 @@ abstract class ReaderAbstract implements ReaderInterface
|
||||
/** @var OptionsManagerInterface Writer options manager */
|
||||
protected $optionsManager;
|
||||
|
||||
/** @var int The column index where the reader should start */
|
||||
protected $startColumnIndex;
|
||||
|
||||
/** @var int The column index where the reader should stop */
|
||||
protected $endColumnIndex;
|
||||
|
||||
/**
|
||||
* Returns whether stream wrappers are supported
|
||||
*
|
||||
@ -98,6 +104,28 @@ abstract class ReaderAbstract implements ReaderInterface
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $startColumnIndex The 0 based start column index
|
||||
* @return ReaderAbstract
|
||||
*/
|
||||
public function setStartColumnIndex(int $startColumnIndex) : ReaderAbstract
|
||||
{
|
||||
$this->optionsManager->setOption(Options::START_COLUMN, $startColumnIndex);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $endColumnIndex
|
||||
* @return ReaderAbstract
|
||||
*/
|
||||
public function setEndColumnIndex(int $endColumnIndex) : ReaderAbstract
|
||||
{
|
||||
$this->optionsManager->setOption(Options::END_COLUMN, $endColumnIndex);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the reader to read the given file. It also makes sure
|
||||
* that the file exists and is readable.
|
||||
|
@ -3,13 +3,13 @@
|
||||
namespace Box\Spout\Reader\CSV;
|
||||
|
||||
use Box\Spout\Common\Creator\HelperFactory;
|
||||
use Box\Spout\Common\Entity\Row;
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Common\Helper\EncodingHelper;
|
||||
use Box\Spout\Common\Helper\GlobalFunctionsHelper;
|
||||
use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
|
||||
use Box\Spout\Reader\CSV\Manager\OptionsManager;
|
||||
use Box\Spout\Reader\Exception\ReaderNotOpenedException;
|
||||
use Box\Spout\Reader\ReaderInterface;
|
||||
use Box\Spout\TestUsingResource;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
@ -474,10 +474,144 @@ class ReaderTest extends TestCase
|
||||
$reader->open('unsupported://foobar');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadWithStartAndEndColumn()
|
||||
{
|
||||
$fileName = 'csv_with_headers.csv';
|
||||
$allRows = $this->getAllRowsForFile($fileName);
|
||||
|
||||
$expectedRows = [
|
||||
['Header-1', 'Header-2', 'Header-3', ''],
|
||||
['Test-1', 'Test-2', 'Test-3', ''],
|
||||
['Test-1', '', '', ''],
|
||||
['Test-1', 'Test-2', 'Test-3', 'Test-4'],
|
||||
['', '', 'Test-3', ''],
|
||||
];
|
||||
|
||||
$this->assertEquals($expectedRows, $allRows, 'All columns are respected without starting column');
|
||||
|
||||
$expectedRowsWithStartAndEnd = [
|
||||
['Header-2', 'Header-3'],
|
||||
['Test-2', 'Test-3'],
|
||||
['', ''],
|
||||
['Test-2', 'Test-3'],
|
||||
['', 'Test-3'],
|
||||
];
|
||||
|
||||
$rowsWithRange = $this->getAllRowsForFileWithRange($fileName, 1, 2);
|
||||
|
||||
$this->assertEquals(
|
||||
$expectedRowsWithStartAndEnd,
|
||||
$rowsWithRange,
|
||||
'All columns are read starting at index 1 and ending at index 2'
|
||||
);
|
||||
|
||||
$expectedRowsWithStart = [
|
||||
['Header-3', ''],
|
||||
['Test-3', ''],
|
||||
['', ''],
|
||||
['Test-3', 'Test-4'],
|
||||
['Test-3', ''],
|
||||
];
|
||||
|
||||
$rowsWithStart = $this->getAllRowsForFileWithRange($fileName, 2);
|
||||
|
||||
$this->assertEquals(
|
||||
$expectedRowsWithStart,
|
||||
$rowsWithStart,
|
||||
'All columns are read starting at index 2'
|
||||
);
|
||||
|
||||
$expectedRowsWithEnd = [
|
||||
['Header-1', 'Header-2', 'Header-3'],
|
||||
['Test-1', 'Test-2', 'Test-3'],
|
||||
['Test-1', '', ''],
|
||||
['Test-1', 'Test-2', 'Test-3'],
|
||||
['', '', 'Test-3'],
|
||||
];
|
||||
|
||||
$rowsWithEnd = $this->getAllRowsForFileWithRange($fileName, 0, 2);
|
||||
|
||||
$this->assertEquals(
|
||||
$expectedRowsWithEnd,
|
||||
$rowsWithEnd,
|
||||
'All columns are read ending at index 2'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testSetStartAndEndColumnAfterReaderOpened()
|
||||
{
|
||||
$fileName = 'csv_with_headers.csv';
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
$allRows = [];
|
||||
$expectedRowsWithStartAndEnd = [
|
||||
['Header-2', 'Header-3'],
|
||||
['Test-2', 'Test-3'],
|
||||
['', ''],
|
||||
['Test-2', 'Test-3'],
|
||||
['', 'Test-3'],
|
||||
];
|
||||
|
||||
/** @var \Box\Spout\Reader\CSV\Reader $reader */
|
||||
$reader = $this->createCSVReader();
|
||||
$reader->open($resourcePath);
|
||||
$reader->setStartColumnIndex(1);
|
||||
$reader->setEndColumnIndex(2);
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
/**
|
||||
* @var int
|
||||
* @var Row $row
|
||||
*/
|
||||
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
|
||||
$allRows[] = $row->toArray();
|
||||
}
|
||||
}
|
||||
$reader->close();
|
||||
$this->assertEquals($expectedRowsWithStartAndEnd, $allRows, 'Correct range set after reader was opened');
|
||||
}
|
||||
|
||||
public function testDifferentCellsAndRange()
|
||||
{
|
||||
$fileName = 'csv_with_different_cells_number.csv';
|
||||
$allRows = $this->getAllRowsForFileWithRange($fileName, 0, 2);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', ''],
|
||||
['csv--31', '', ''],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
* @expectedException \Box\Spout\Reader\Exception\InvalidReaderOptionValueException
|
||||
*/
|
||||
public function testNegativeStartColumnIndex()
|
||||
{
|
||||
$fileName = 'csv_with_headers.csv';
|
||||
$this->getAllRowsForFileWithRange($fileName, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
* @expectedException \Box\Spout\Reader\Exception\InvalidReaderOptionValueException
|
||||
*/
|
||||
public function testEndColumnIndexSmallerThanStartIndex()
|
||||
{
|
||||
$fileName = 'csv_with_headers.csv';
|
||||
$this->getAllRowsForFileWithRange($fileName, 3, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper|null $optionsManager
|
||||
* @param \Box\Spout\Common\Manager\OptionsManagerInterface|null $globalFunctionsHelper
|
||||
* @return ReaderInterface
|
||||
* @return Reader
|
||||
*/
|
||||
private function createCSVReader($optionsManager = null, $globalFunctionsHelper = null)
|
||||
{
|
||||
@ -494,28 +628,42 @@ class ReaderTest extends TestCase
|
||||
* @param string $fieldEnclosure
|
||||
* @param string $encoding
|
||||
* @param bool $shouldPreserveEmptyRows
|
||||
* @param int $startColumnIndex
|
||||
* @param int|null $endColumnIndex
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile(
|
||||
$fileName,
|
||||
$fieldDelimiter = ',',
|
||||
$fieldEnclosure = '"',
|
||||
$encoding = EncodingHelper::ENCODING_UTF8,
|
||||
$shouldPreserveEmptyRows = false
|
||||
) {
|
||||
string $fileName,
|
||||
string $fieldDelimiter = ',',
|
||||
string $fieldEnclosure = '"',
|
||||
string $encoding = EncodingHelper::ENCODING_UTF8,
|
||||
bool $shouldPreserveEmptyRows = false,
|
||||
int $startColumnIndex = 0,
|
||||
int $endColumnIndex = null
|
||||
) : array {
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Reader\CSV\Reader $reader */
|
||||
$reader = $this->createCSVReader();
|
||||
|
||||
if ($endColumnIndex) {
|
||||
$reader->setEndColumnIndex($endColumnIndex);
|
||||
}
|
||||
|
||||
$reader
|
||||
->setFieldDelimiter($fieldDelimiter)
|
||||
->setFieldEnclosure($fieldEnclosure)
|
||||
->setEncoding($encoding)
|
||||
->setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
|
||||
->setStartColumnIndex($startColumnIndex)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
/**
|
||||
* @var int
|
||||
* @var Row $row
|
||||
*/
|
||||
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
|
||||
$allRows[] = $row->toArray();
|
||||
}
|
||||
@ -525,4 +673,26 @@ class ReaderTest extends TestCase
|
||||
|
||||
return $allRows;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param int $startColumnIndex
|
||||
* @param int|null $endColumnIndex
|
||||
* @return array
|
||||
*/
|
||||
protected function getAllRowsForFileWithRange(
|
||||
string $fileName,
|
||||
int $startColumnIndex = 0,
|
||||
int $endColumnIndex = null
|
||||
) : array {
|
||||
return $this->getAllRowsForFile(
|
||||
$fileName,
|
||||
',',
|
||||
'"',
|
||||
EncodingHelper::ENCODING_UTF8,
|
||||
false,
|
||||
$startColumnIndex,
|
||||
$endColumnIndex
|
||||
);
|
||||
}
|
||||
}
|
||||
|
5
tests/resources/csv/csv_with_headers.csv
Normal file
5
tests/resources/csv/csv_with_headers.csv
Normal file
@ -0,0 +1,5 @@
|
||||
"Header-1","Header-2","Header-3",
|
||||
"Test-1","Test-2","Test-3",
|
||||
"Test-1",,,
|
||||
"Test-1","Test-2","Test-3","Test-4"
|
||||
,,"Test-3",
|
|
Loading…
x
Reference in New Issue
Block a user