diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index d768936..04ec4f5 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -27,8 +27,8 @@ class RowIterator implements IteratorInterface /** @var int Number of read rows */ protected $numReadRows = 0; - /** @var Row|null Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer; + /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */ + protected $rowBuffer; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; @@ -89,7 +89,7 @@ class RowIterator implements IteratorInterface $this->rewindAndSkipBom(); $this->numReadRows = 0; - $this->rowDataBuffer = null; + $this->rowBuffer = null; $this->next(); } @@ -148,7 +148,7 @@ class RowIterator implements IteratorInterface if ($rowData !== false) { // str_replace will replace NULL values by empty strings $rowDataBufferAsArray = str_replace(null, null, $rowData); - $this->rowDataBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray); + $this->rowBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray); $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. @@ -226,7 +226,7 @@ class RowIterator implements IteratorInterface */ public function current() { - return $this->rowDataBuffer; + return $this->rowBuffer; } /** diff --git a/src/Spout/Reader/Common/Entity/Row.php b/src/Spout/Reader/Common/Entity/Row.php index 1663a95..818cfdd 100644 --- a/src/Spout/Reader/Common/Entity/Row.php +++ b/src/Spout/Reader/Common/Entity/Row.php @@ -29,7 +29,7 @@ class Row /** * @param Cell[] $cells - * @return $this + * @return Row */ public function setCells(array $cells) { @@ -41,6 +41,19 @@ class Row return $this; } + /** + * @param Cell $cell + * @param mixed $cellIndex + * @parma int $cellIndex + * @return Row + */ + public function setCellAtIndex(Cell $cell, $cellIndex) + { + $this->cells[$cellIndex] = $cell; + + return $this; + } + /** * @param Cell $cell * @return Row diff --git a/src/Spout/Reader/XLSX/Creator/InternalEntityFactory.php b/src/Spout/Reader/XLSX/Creator/InternalEntityFactory.php index 98b7bb7..db402ad 100644 --- a/src/Spout/Reader/XLSX/Creator/InternalEntityFactory.php +++ b/src/Spout/Reader/XLSX/Creator/InternalEntityFactory.php @@ -3,7 +3,9 @@ namespace Box\Spout\Reader\XLSX\Creator; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; +use Box\Spout\Reader\Common\Entity\Cell; use Box\Spout\Reader\Common\Entity\Options; +use Box\Spout\Reader\Common\Entity\Row; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; @@ -85,6 +87,7 @@ class InternalEntityFactory implements InternalEntityFactoryInterface $xmlProcessor = $this->createXMLProcessor($xmlReader); $styleManager = $this->managerFactory->createStyleManager($filePath, $this); + $rowManager = $this->managerFactory->createRowManager($this); $shouldFormatDates = $optionsManager->getOption(Options::SHOULD_FORMAT_DATES); $shouldUse1904Dates = $optionsManager->getOption(Options::SHOULD_USE_1904_DATES); @@ -103,10 +106,30 @@ class InternalEntityFactory implements InternalEntityFactoryInterface $shouldPreserveEmptyRows, $xmlReader, $xmlProcessor, - $cellValueFormatter + $cellValueFormatter, + $rowManager, + $this ); } + /** + * @param Cell[] $cells + * @return Row + */ + public function createRow(array $cells) + { + return new Row($cells); + } + + /** + * @param mixed $cellValue + * @return Cell + */ + public function createCell($cellValue) + { + return new Cell($cellValue); + } + /** * @return \ZipArchive */ diff --git a/src/Spout/Reader/XLSX/Creator/ManagerFactory.php b/src/Spout/Reader/XLSX/Creator/ManagerFactory.php index 194edf7..20207cc 100644 --- a/src/Spout/Reader/XLSX/Creator/ManagerFactory.php +++ b/src/Spout/Reader/XLSX/Creator/ManagerFactory.php @@ -2,6 +2,7 @@ namespace Box\Spout\Reader\XLSX\Creator; +use Box\Spout\Reader\XLSX\Manager\RowManager; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; use Box\Spout\Reader\XLSX\Manager\SheetManager; @@ -92,4 +93,13 @@ class ManagerFactory return new StyleManager($filePath, $workbookRelationshipsManager, $entityFactory); } + + /** + * @param InternalEntityFactory $entityFactory Factory to create entities + * @return RowManager + */ + public function createRowManager($entityFactory) + { + return new RowManager($entityFactory); + } } diff --git a/src/Spout/Reader/XLSX/Helper/CellHelper.php b/src/Spout/Reader/XLSX/Helper/CellHelper.php index 9346f16..27b5abf 100644 --- a/src/Spout/Reader/XLSX/Helper/CellHelper.php +++ b/src/Spout/Reader/XLSX/Helper/CellHelper.php @@ -18,30 +18,6 @@ class CellHelper 'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25, ]; - /** - * Fills the missing indexes of an array with a given value. - * For instance, $dataArray = []; $a[1] = 1; $a[3] = 3; - * Calling fillMissingArrayIndexes($dataArray, 'FILL') will return this array: ['FILL', 1, 'FILL', 3] - * - * @param array $dataArray The array to fill - * @param string $fillValue optional - * @return array - */ - public static function fillMissingArrayIndexes($dataArray, $fillValue = '') - { - if (empty($dataArray)) { - return []; - } - $existingIndexes = array_keys($dataArray); - - $newIndexes = array_fill_keys(range(0, max($existingIndexes)), $fillValue); - $dataArray += $newIndexes; - - ksort($dataArray); - - return $dataArray; - } - /** * Returns the base 10 column index associated to the cell index (base 26). * Excel uses A to Z letters for column indexing, where A is the 1st column, diff --git a/src/Spout/Reader/XLSX/Manager/RowManager.php b/src/Spout/Reader/XLSX/Manager/RowManager.php new file mode 100644 index 0000000..ec85bdc --- /dev/null +++ b/src/Spout/Reader/XLSX/Manager/RowManager.php @@ -0,0 +1,65 @@ +entityFactory = $entityFactory; + } + + /** + * Detect whether a row is considered empty. + * An empty row has all of its cells empty. + * + * @param Row $row + * @return bool + */ + public function isEmpty(Row $row) + { + foreach ($row->getCells() as $cell) { + if (!$cell->isEmpty()) { + return false; + } + } + + return true; + } + + /** + * Fills the missing indexes of a row with empty cells. + * + * @param Row $row + * @return Row + */ + public function fillMissingIndexesWithEmptyCells(Row $row) + { + $rowCells = $row->getCells(); + if (count($rowCells) === 0) { + return $row; + } + + $maxCellIndex = max(array_keys($rowCells)); + + for ($cellIndex = 0; $cellIndex < $maxCellIndex; $cellIndex++) { + if (!isset($rowCells[$cellIndex])) { + $row->setCellAtIndex($this->entityFactory->createCell(''), $cellIndex); + } + } + + return $row; + } +} diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index b411758..478faf2 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -3,12 +3,16 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Exception\IOException; +use Box\Spout\Reader\Common\Entity\Cell; +use Box\Spout\Reader\Common\Entity\Row; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\Wrapper\XMLReader; +use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; use Box\Spout\Reader\XLSX\Helper\CellHelper; use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; +use Box\Spout\Reader\XLSX\Manager\RowManager; /** * Class RowIterator @@ -42,17 +46,23 @@ class RowIterator implements IteratorInterface /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; + /** @var \Box\Spout\Reader\XLSX\Manager\RowManager Manages rows */ + protected $rowManager; + + /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */ + protected $entityFactory; + /** * TODO: This variable can be deleted when row indices get preserved * @var int Number of read rows */ protected $numReadRows = 0; - /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */ - protected $currentlyProcessedRowData = []; + /** @var Row Contains the row currently processed */ + protected $currentlyProcessedRow; - /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer; + /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */ + protected $rowBuffer; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; @@ -79,14 +89,26 @@ class RowIterator implements IteratorInterface * @param XMLReader $xmlReader XML Reader * @param XMLProcessor $xmlProcessor Helper to process XML files * @param CellValueFormatter $cellValueFormatter Helper to format cell values + * @param RowManager $rowManager Manages rows + * @param InternalEntityFactory $entityFactory Factory to create entities */ - public function __construct($filePath, $sheetDataXMLFilePath, $shouldPreserveEmptyRows, $xmlReader, $xmlProcessor, $cellValueFormatter) - { + public function __construct( + $filePath, + $sheetDataXMLFilePath, + $shouldPreserveEmptyRows, + $xmlReader, + XMLProcessor $xmlProcessor, + CellValueFormatter $cellValueFormatter, + RowManager $rowManager, + InternalEntityFactory $entityFactory + ) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->xmlReader = $xmlReader; $this->cellValueFormatter = $cellValueFormatter; - $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; + $this->rowManager = $rowManager; + $this->entityFactory = $entityFactory; // Register all callbacks to process different nodes when reading the XML file $this->xmlProcessor = $xmlProcessor; @@ -127,7 +149,7 @@ class RowIterator implements IteratorInterface $this->numReadRows = 0; $this->lastRowIndexProcessed = 0; $this->nextRowIndexToBeProcessed = 0; - $this->rowDataBuffer = null; + $this->rowBuffer = null; $this->hasReachedEndOfFile = false; $this->numColumns = 0; @@ -192,7 +214,7 @@ class RowIterator implements IteratorInterface */ protected function readDataForNextRow() { - $this->currentlyProcessedRowData = []; + $this->currentlyProcessedRow = $this->entityFactory->createRow([]); try { $this->xmlProcessor->readUntilStopped(); @@ -200,7 +222,7 @@ class RowIterator implements IteratorInterface throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $this->currentlyProcessedRowData; + $this->rowBuffer = $this->currentlyProcessedRow; } /** @@ -238,7 +260,8 @@ class RowIterator implements IteratorInterface $numberOfColumnsForRow = (int) $numberOfColumnsForRow; } - $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + $cells = array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell('')); + $this->currentlyProcessedRow->setCells($cells); return XMLProcessor::PROCESSING_CONTINUE; } @@ -253,7 +276,9 @@ class RowIterator implements IteratorInterface // NOTE: expand() will automatically decode all XML entities of the child nodes $node = $xmlReader->expand(); - $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node); + $cell = $this->getCell($node); + + $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex); $this->lastColumnIndexProcessed = $currentColumnIndex; return XMLProcessor::PROCESSING_CONTINUE; @@ -265,7 +290,7 @@ class RowIterator implements IteratorInterface protected function processRowEndingNode() { // if the fetched row is empty and we don't want to preserve it.., - if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) { + if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) { // ... skip it return XMLProcessor::PROCESSING_CONTINUE; } @@ -274,7 +299,7 @@ class RowIterator implements IteratorInterface // If needed, we fill the empty cells if ($this->numColumns === 0) { - $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData); + $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow); } // at this point, we have all the data we need for the row @@ -324,34 +349,27 @@ class RowIterator implements IteratorInterface } /** - * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. + * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node - * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) + * @return Cell The cell set with the associated with the cell */ - protected function getCellValue($node) + protected function getCell($node) { - return $this->cellValueFormatter->extractAndFormatNodeValue($node); - } + $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node); - /** - * @param array $rowData - * @return bool Whether the given row is empty - */ - protected function isEmptyRow($rowData) - { - return (count($rowData) === 1 && reset($rowData) === ''); + return $this->entityFactory->createCell($cellValue); } /** * Return the current element, either an empty row or from the buffer. * @see http://php.net/manual/en/iterator.current.php * - * @return array|null + * @return Row|null */ public function current() { - $rowDataForRowToBeProcessed = $this->rowDataBuffer; + $rowToBeProcessed = $this->rowBuffer; if ($this->shouldPreserveEmptyRows) { // when we need to preserve empty rows, we will either return @@ -361,11 +379,11 @@ class RowIterator implements IteratorInterface if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { // return empty row if mismatch between last processed row // and the row that needs to be returned - $rowDataForRowToBeProcessed = ['']; + $rowToBeProcessed = $this->entityFactory->createRow([]); } } - return $rowDataForRowToBeProcessed; + return $rowToBeProcessed; } /** diff --git a/src/Spout/Writer/Common/Manager/RowManager.php b/src/Spout/Writer/Common/Manager/RowManager.php index 7534db5..e709b5e 100644 --- a/src/Spout/Writer/Common/Manager/RowManager.php +++ b/src/Spout/Writer/Common/Manager/RowManager.php @@ -22,15 +22,19 @@ class RowManager /** * Detect whether a row is considered empty. - * An empty row has either no cells at all - or only one empty cell + * An empty row has all of its cells empty. * * @param Row $row * @return bool */ public function isEmpty(Row $row) { - $cells = $row->getCells(); + foreach ($row->getCells() as $cell) { + if (!$cell->isEmpty()) { + return false; + } + } - return count($cells) === 0 || (count($cells) === 1 && $cells[0]->isEmpty()); + return true; } } diff --git a/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php index 4ae3dd3..a322bc6 100644 --- a/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php +++ b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php @@ -9,29 +9,6 @@ use Box\Spout\Common\Exception\InvalidArgumentException; */ class CellHelperTest extends \PHPUnit_Framework_TestCase { - /** - * @return array - */ - public function dataProviderForTestFillMissingArrayIndexes() - { - return [ - [null, []], - [[], []], - [[1 => 1, 3 => 3], ['FILL', 1, 'FILL', 3]], - ]; - } - - /** - * @dataProvider dataProviderForTestFillMissingArrayIndexes - * @param array $arrayToFill - * @param array $expectedFilledArray - */ - public function testFillMissingArrayIndexes($arrayToFill, array $expectedFilledArray) - { - $filledArray = CellHelper::fillMissingArrayIndexes($arrayToFill, 'FILL'); - $this->assertEquals($expectedFilledArray, $filledArray); - } - /** * @return array */ diff --git a/tests/Spout/Reader/XLSX/Manager/RowManagerTest.php b/tests/Spout/Reader/XLSX/Manager/RowManagerTest.php new file mode 100644 index 0000000..b0c25f4 --- /dev/null +++ b/tests/Spout/Reader/XLSX/Manager/RowManagerTest.php @@ -0,0 +1,90 @@ + $cell1, 3 => $cell3], [new Cell(''), $cell1, new Cell(''), $cell3]], + ]; + } + + /** + * @dataProvider dataProviderForTestFillMissingIndexesWithEmptyCells + * + * @param Cell[]|null $rowCells + * @param Cell[] $expectedFilledCells + */ + public function testFillMissingIndexesWithEmptyCells($rowCells, $expectedFilledCells) + { + $rowManager = $this->createRowManager(); + + $rowToFill = new Row([]); + foreach ($rowCells as $cellIndex => $cell) { + $rowToFill->setCellAtIndex($cell, $cellIndex); + } + + $filledRow = $rowManager->fillMissingIndexesWithEmptyCells($rowToFill); + $this->assertEquals($expectedFilledCells, $filledRow->getCells()); + } + + /** + * @return array + */ + public function dataProviderForTestIsEmptyRow() + { + return [ + // cells, expected isEmpty + [[], true], + [[new Cell('')], true], + [[new Cell(''), new Cell('')], true], + [[new Cell(''), new Cell(''), new Cell('Okay')], false], + ]; + } + + /** + * @dataProvider dataProviderForTestIsEmptyRow + * + * @param array $cells + * @param bool $expectedIsEmpty + * @return void + */ + public function testIsEmptyRow(array $cells, $expectedIsEmpty) + { + $rowManager = $this->createRowManager(); + $row = new Row($cells); + + $this->assertEquals($expectedIsEmpty, $rowManager->isEmpty($row)); + } + + /** + * @return RowManager + */ + private function createRowManager() + { + $entityFactory = new InternalEntityFactory( + $this->createMock(ManagerFactory::class), + $this->createMock(HelperFactory::class) + ); + + return new RowManager($entityFactory); + } +} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index 3908257..8b35f0e 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -237,7 +237,6 @@ class ReaderTest extends \PHPUnit_Framework_TestCase 'weird string', // valid 'str' string null, // invalid date ], - ['', '', '', '', '', '', '', '', ''], ]; $this->assertEquals($expectedRows, $allRows); } @@ -451,10 +450,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals(6, count($allRows), 'There should be 6 rows'); $expectedRows = [ - [''], + [], ['s1--A2', 's1--B2', 's1--C2'], - [''], - [''], + [], + [], ['s1--A5', 's1--B5', 's1--C5'], ['s1--A6', 's1--B6', 's1--C6'], ]; @@ -580,14 +579,14 @@ class ReaderTest extends \PHPUnit_Framework_TestCase foreach ($reader->getSheetIterator() as $sheet) { // this loop should only add the first row of the first sheet foreach ($sheet->getRowIterator() as $row) { - $allRows[] = $row; + $allRows[] = $row->toArray(); break; } // this loop should rewind the iterator and restart reading from the 1st row again // therefore, it should only add the first row of the first sheet foreach ($sheet->getRowIterator() as $row) { - $allRows[] = $row; + $allRows[] = $row->toArray(); break; } @@ -598,7 +597,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase foreach ($reader->getSheetIterator() as $sheet) { // this loop should only add the first row of the current sheet foreach ($sheet->getRowIterator() as $row) { - $allRows[] = $row; + $allRows[] = $row->toArray(); break; } @@ -708,7 +707,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { foreach ($sheet->getRowIterator() as $rowIndex => $row) { - $allRows[] = $row; + $allRows[] = $row->toArray(); } } diff --git a/tests/Spout/Writer/Common/Manager/RowManagerTest.php b/tests/Spout/Writer/Common/Manager/RowManagerTest.php index 56d6128..730f4f8 100644 --- a/tests/Spout/Writer/Common/Manager/RowManagerTest.php +++ b/tests/Spout/Writer/Common/Manager/RowManagerTest.php @@ -19,6 +19,7 @@ class RowManagerTest extends TestCase // cells, expected isEmpty [[], true], [[new Cell('')], true], + [[new Cell(''), new Cell('')], true], [[new Cell(''), new Cell(''), new Cell('Okay')], false], ]; } @@ -34,7 +35,7 @@ class RowManagerTest extends TestCase { $rowManager = new RowManager(new StyleMerger()); - $row = new Row($cells, null, $rowManager); + $row = new Row($cells, null); $this->assertEquals($expectedIsEmpty, $rowManager->isEmpty($row)); } }