Merge e5c57d679457ed0c088439f8f9d1e9ee306d5df1 into b02d13cd406cf3489b490215fa2316b2b7c484ec

This commit is contained in:
Ingmar Runge 2016-07-08 12:30:56 +00:00 committed by GitHub
commit d46045745e
16 changed files with 428 additions and 101 deletions

View File

@ -19,8 +19,16 @@ abstract class AbstractReader implements ReaderInterface
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;
/** @var \Box\Spout\Reader\ReaderOptions */
protected $readerOptions;
/**
* The constructor.
*/
public function __construct()
{
$this->readerOptions = new ReaderOptions();
}
/**
* Returns whether stream wrappers are supported
@ -69,7 +77,19 @@ abstract class AbstractReader implements ReaderInterface
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
$this->readerOptions->setShouldFormatDates($shouldFormatDates);
return $this;
}
/**
* Sets whether to skip or return "empty" rows.
*
* @param bool $shouldPreserveEmptyRows
* @return AbstractReader
*/
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
{
$this->readerOptions->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
return $this;
}

View File

@ -42,7 +42,7 @@ class Reader extends AbstractReader
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
$this->sheetIterator = new SheetIterator($filePath, $this->readerOptions);
} else {
throw new IOException("Could not open $filePath for reading.");
}

View File

@ -8,6 +8,7 @@ use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\ReaderOptions;
/**
* Class RowIterator
@ -21,10 +22,14 @@ class RowIterator implements IteratorInterface
const XML_NODE_ROW = 'table:table-row';
const XML_NODE_CELL = 'table:table-cell';
const MAX_COLUMNS_EXCEL = 16384;
const MAX_ROWS_EXCEL = 1048576;
/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
@ -34,23 +39,27 @@ class RowIterator implements IteratorInterface
/** @var bool Whether the iterator has already been rewound once */
protected $hasAlreadyBeenRewound = false;
/** @var int Number of read rows */
protected $numReadRows = 0;
/** @var int Key for iterator */
protected $rowIndex = 0;
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = null;
/** @var array Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = [];
/** @var bool Indicates whether all rows have been read */
protected $hasReachedEndOfFile = false;
/** @var \Box\Spout\Reader\ReaderOptions */
protected $readerOptions;
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
*/
public function __construct($xmlReader, $shouldFormatDates)
public function __construct($xmlReader, ReaderOptions $readerOptions)
{
$this->xmlReader = $xmlReader;
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
$this->readerOptions = $readerOptions;
$this->cellValueFormatter = new CellValueFormatter($readerOptions->shouldFormatDates());
}
/**
@ -71,8 +80,8 @@ class RowIterator implements IteratorInterface
}
$this->hasAlreadyBeenRewound = true;
$this->numReadRows = 0;
$this->rowDataBuffer = null;
$this->rowIndex = 0;
$this->rowDataBuffer = [];
$this->hasReachedEndOfFile = false;
$this->next();
@ -90,7 +99,7 @@ class RowIterator implements IteratorInterface
}
/**
* Move forward to next element. Empty rows will be skipped.
* Move forward to next element. Empty rows can be skipped.
* @link http://php.net/manual/en/iterator.next.php
*
* @return void
@ -99,15 +108,34 @@ class RowIterator implements IteratorInterface
*/
public function next()
{
$prevRow = null;
if (count($this->rowDataBuffer) > 1) {
array_shift($this->rowDataBuffer);
$this->rowIndex++;
return;
} else {
$prevRow = $this->current();
$this->rowDataBuffer = [];
}
$rowData = [];
$cellValue = null;
$numRowsRepeated = 0;
$numColumnsRepeated = 1;
$numCellsRead = 0;
$hasAlreadyReadOneCell = false;
try {
while ($this->xmlReader->read()) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
// Start of a row description
$this->rowIndex++;
$numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode();
} elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
@ -127,30 +155,37 @@ class RowIterator implements IteratorInterface
$numCellsRead++;
$hasAlreadyReadOneCell = true;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
} elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
$isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
if ($isEmptyRow) {
// skip empty rows
$this->next();
return;
if (!$isEmptyRow) {
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
// The current count of read columns is determined by counting the values in $rowData.
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with
// always 16384 cells.
if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
for ($i = 0; $i < $numColumnsRepeated; $i++) {
$rowData[] = $cellValue;
}
}
} elseif ($this->readerOptions->shouldPreserveEmptyRows()) {
// Take number of cells from the previously read line.
$rowData = empty($prevRow) ? [] : array_fill(0, count($prevRow), '');
} else {
return $this->next();
}
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
// The current count of read columns is determined by counting the values in $rowData.
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with
// always 16384 cells.
if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
for ($i = 0; $i < $numColumnsRepeated; $i++) {
$rowData[] = $cellValue;
}
$this->numReadRows++;
// see above, now check number of rows...
if ($this->rowIndex - 1 + $numRowsRepeated >= self::MAX_ROWS_EXCEL) {
$numRowsRepeated = 0;
$this->hasReachedEndOfFile = true;
}
break;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
} elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
// The closing "</table:table>" marks the end of the file
$this->hasReachedEndOfFile = true;
break;
@ -161,7 +196,9 @@ class RowIterator implements IteratorInterface
throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
}
$this->rowDataBuffer = $rowData;
for ($i = 0; $i < $numRowsRepeated; ++$i) {
$this->rowDataBuffer[] = $rowData;
}
}
/**
@ -173,6 +210,15 @@ class RowIterator implements IteratorInterface
return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
}
/**
* @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
*/
protected function getNumRowsRepeatedForCurrentNode()
{
$numRowsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
}
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*
@ -203,7 +249,7 @@ class RowIterator implements IteratorInterface
*/
public function current()
{
return $this->rowDataBuffer;
return isset($this->rowDataBuffer[0]) ? $this->rowDataBuffer[0] : null;
}
/**
@ -214,10 +260,9 @@ class RowIterator implements IteratorInterface
*/
public function key()
{
return $this->numReadRows;
return $this->rowIndex;
}
/**
* Cleans up what was created to iterate over the object.
*

View File

@ -4,6 +4,7 @@ namespace Box\Spout\Reader\ODS;
use Box\Spout\Reader\SheetInterface;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\ReaderOptions;
/**
* Class Sheet
@ -27,13 +28,13 @@ class Sheet implements SheetInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
public function __construct($xmlReader, ReaderOptions $readerOptions, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
$this->rowIterator = new RowIterator($xmlReader, $readerOptions);
$this->index = $sheetIndex;
$this->name = $sheetName;
}

View File

@ -6,6 +6,7 @@ use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\ReaderOptions;
/**
* Class SheetIterator
@ -24,8 +25,8 @@ class SheetIterator implements IteratorInterface
/** @var string $filePath Path of the file to be read */
protected $filePath;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Reader\ReaderOptions */
protected $readerOptions;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
@ -41,13 +42,13 @@ class SheetIterator implements IteratorInterface
/**
* @param string $filePath Path of the file to be read
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath, $shouldFormatDates)
public function __construct($filePath, ReaderOptions $readerOptions)
{
$this->filePath = $filePath;
$this->shouldFormatDates = $shouldFormatDates;
$this->readerOptions = $readerOptions;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
@ -116,7 +117,7 @@ class SheetIterator implements IteratorInterface
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
$sheetName = $this->escaper->unescape($escapedSheetName);
return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
return new Sheet($this->xmlReader, $this->readerOptions, $sheetName, $this->currentSheetIndex);
}
/**

View File

@ -0,0 +1,62 @@
<?php
namespace Box\Spout\Reader;
/**
* Class ReaderOptions
* This helper class is used to hold common reader options.
*
* @package Box\Spout\Reader
*/
class ReaderOptions
{
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;
/** @var bool Whether to skip "empty" rows. The exact definition of empty may depend on the reader implementation. */
protected $shouldPreserveEmptyRows = false;
/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @param bool $shouldFormatDates
* @return ReaderOptions
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = (bool)$shouldFormatDates;
return $this;
}
/**
* Sets whether to skip or return "empty" rows.
*
* @param bool $shouldPreserveEmptyRows
* @return ReaderOptions
*/
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
{
$this->shouldPreserveEmptyRows = (bool)$shouldPreserveEmptyRows;
return $this;
}
/**
* @see setShouldFormatDates
* @return bool
*/
public function shouldFormatDates()
{
return $this->shouldFormatDates;
}
/**
* @see setShouldPreserveEmptyRows
* @return bool
*/
public function shouldPreserveEmptyRows()
{
return $this->shouldPreserveEmptyRows;
}
}

View File

@ -4,6 +4,7 @@ namespace Box\Spout\Reader\XLSX\Helper;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Sheet;
use Box\Spout\Reader\ReaderOptions;
/**
* Class SheetHelper
@ -26,21 +27,21 @@ class SheetHelper
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Reader\ReaderOptions */
protected $readerOptions;
/**
* @param string $filePath Path of the XLSX file being read
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, ReaderOptions $readerOptions)
{
$this->filePath = $filePath;
$this->sharedStringsHelper = $sharedStringsHelper;
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->shouldFormatDates = $shouldFormatDates;
$this->readerOptions = $readerOptions;
}
/**
@ -92,7 +93,7 @@ class SheetHelper
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->readerOptions, $sheetIndexZeroBased, $sheetName);
}
/**

View File

@ -69,7 +69,7 @@ class Reader extends AbstractReader
$this->sharedStringsHelper->extractSharedStrings();
}
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates);
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->readerOptions);
} else {
throw new IOException("Could not open $filePath for reading.");
}

View File

@ -9,6 +9,7 @@ use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Helper\CellHelper;
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
use Box\Spout\Reader\ReaderOptions;
/**
* Class RowIterator
@ -43,11 +44,11 @@ class RowIterator implements IteratorInterface
/** @var Helper\StyleHelper $styleHelper Helper to work with styles */
protected $styleHelper;
/** @var int Number of read rows */
protected $numReadRows = 0;
/** @var int Key for iterator */
protected $rowIndex = 0;
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = null;
/** @var array Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = [];
/** @var bool Indicates whether all rows have been read */
protected $hasReachedEndOfFile = false;
@ -55,13 +56,16 @@ class RowIterator implements IteratorInterface
/** @var int The number of columns the sheet has (0 meaning undefined) */
protected $numColumns = 0;
/** @var \Box\Spout\Reader\ReaderOptions */
protected $readerOptions;
/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, ReaderOptions $readerOptions)
{
$this->filePath = $filePath;
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
@ -69,7 +73,8 @@ class RowIterator implements IteratorInterface
$this->xmlReader = new XMLReader();
$this->styleHelper = new StyleHelper($filePath);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
$this->readerOptions = $readerOptions;
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $readerOptions->shouldFormatDates());
}
/**
@ -101,7 +106,7 @@ class RowIterator implements IteratorInterface
}
$this->numReadRows = 0;
$this->rowDataBuffer = null;
$this->rowDataBuffer = [];
$this->hasReachedEndOfFile = false;
$this->numColumns = 0;
@ -131,6 +136,15 @@ class RowIterator implements IteratorInterface
{
$rowData = [];
if (count($this->rowDataBuffer) > 1) {
array_shift($this->rowDataBuffer);
$this->rowIndex++;
return;
} else {
$this->rowDataBuffer = [];
}
try {
while ($this->xmlReader->read()) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
@ -141,8 +155,10 @@ class RowIterator implements IteratorInterface
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
}
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
} elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
// Start of the row description
$prevRowIndex = $this->rowIndex;
$newRowIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
// Read spans info if present
$numberOfColumnsForRow = $this->numColumns;
@ -153,7 +169,15 @@ class RowIterator implements IteratorInterface
}
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
if ($this->readerOptions->shouldPreserveEmptyRows()) {
for ($i = $prevRowIndex + 1; $i < $newRowIndex; ++$i) {
$this->rowDataBuffer[] = $rowData; // fake empty rows
}
}
$this->rowIndex = $newRowIndex - count($this->rowDataBuffer);
} elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
@ -161,14 +185,14 @@ class RowIterator implements IteratorInterface
$node = $this->xmlReader->expand();
$rowData[$currentColumnIndex] = $this->getCellValue($node);
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
} elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
// If needed, we fill the empty cells
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
$this->numReadRows++;
break;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
} elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
// The closing "</worksheet>" marks the end of the file
$this->hasReachedEndOfFile = true;
break;
@ -179,7 +203,7 @@ class RowIterator implements IteratorInterface
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
}
$this->rowDataBuffer = $rowData;
$this->rowDataBuffer[] = $rowData;
}
/**
@ -201,7 +225,7 @@ class RowIterator implements IteratorInterface
*/
public function current()
{
return $this->rowDataBuffer;
return isset($this->rowDataBuffer[0]) ? $this->rowDataBuffer[0] : null;
}
/**
@ -212,7 +236,7 @@ class RowIterator implements IteratorInterface
*/
public function key()
{
return $this->numReadRows;
return $this->rowIndex;
}

View File

@ -3,6 +3,7 @@
namespace Box\Spout\Reader\XLSX;
use Box\Spout\Reader\SheetInterface;
use Box\Spout\Reader\ReaderOptions;
/**
* Class Sheet
@ -25,13 +26,13 @@ class Sheet implements SheetInterface
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, ReaderOptions $readerOptions, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates);
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $readerOptions);
$this->index = $sheetIndex;
$this->name = $sheetName;
}

View File

@ -5,6 +5,7 @@ namespace Box\Spout\Reader\XLSX;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\XLSX\Helper\SheetHelper;
use Box\Spout\Reader\Exception\NoSheetsFoundException;
use Box\Spout\Reader\ReaderOptions;
/**
* Class SheetIterator
@ -24,13 +25,13 @@ class SheetIterator implements IteratorInterface
* @param string $filePath Path of the file to be read
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, ReaderOptions $readerOptions)
{
// Fetch all available sheets
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates);
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $readerOptions);
$this->sheets = $sheetHelper->getSheets();
if (count($this->sheets) === 0) {

View File

@ -137,6 +137,24 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows);
}
/**
* @dataProvider dataProviderForTestReadWithFilesGeneratedByExternalSoftwares
*
* @param bool $skipLastEmptyValues
* @param string $fileName
* @return void
*/
public function testReadWithFilesGeneratedByExternalSoftwareAndEmptyRowsPreserved($fileName, $skipLastEmptyValues)
{
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldPreserveEmptyRows(true);
$allRows = $this->getAllRowsForFile($fileName, $reader);
foreach ($allRows as $index => $row) {
// :TODO: write useful test
// $this->assertCount($expectedColumns, $row);
}
}
/**
* @return void
@ -169,8 +187,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
*/
public function testReadShouldSupportFormatDatesAndTimesIfSpecified()
{
$shouldFormatDates = true;
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $shouldFormatDates);
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldFormatDates(true);
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $reader);
$expectedRows = [
['05/19/2016', '5/19/16', '05/19/2016 16:39:00', '05/19/16 04:39 PM', '5/19/2016'],
@ -213,13 +233,48 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
*/
public function testReadShouldSkipEmptyRow()
{
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods');
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.ods');
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
$expectedRows = [
['ods--11', 'ods--12', 'ods--13'],
1 => ['ods--11', 'ods--12', 'ods--13'],
// row skipped here
['ods--21', 'ods--22', 'ods--23'],
3 => ['ods--21', 'ods--22', 'ods--23'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldPreserveEmptyRow()
{
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldPreserveEmptyRows(true);
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.ods', $reader);
$expectedRows = [
1 => ['ods--11', 'ods--12', 'ods--13'],
2 => ['', '', ''],
3 => ['ods--21', 'ods--22', 'ods--23'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldPreserveConsecutiveEmptyRows()
{
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldPreserveEmptyRows(true);
$allRows = $this->getAllRowsForFirstSheet('sheet_with_consecutive_empty_rows.ods', $reader);
$expectedRows = [
1 => ['First'],
2 => [''],
3 => [''],
4 => ['Second'],
];
$this->assertEquals($expectedRows, $allRows);
}
@ -241,6 +296,29 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals([$expectedRow], $allRows);
}
/**
* @return void
*/
public function testReadShouldHandleRepeatedRows()
{
$expectedRows = [
1 => ['First'],
2 => ['First'],
3 => ['First'],
4 => ['Second'],
5 => ['Third'],
6 => ['Third'],
];
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldPreserveEmptyRows(false);
$allRows = $this->getAllRowsForFirstSheet('sheet_with_repeated_rows.ods', $reader);
$this->assertEquals($expectedRows, $allRows);
$reader->setShouldPreserveEmptyRows(true);
$allRows = $this->getAllRowsForFirstSheet('sheet_with_repeated_rows.ods', $reader);
$this->assertEquals($expectedRows, $allRows);
}
/**
* @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used)
@ -484,22 +562,49 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
/**
* @param string $fileName
* @param bool|void $shouldFormatDates
* @return array All the read rows the given file
* @param Reader $reader
* @return array
*/
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
private function getAllRowsForFile($fileName, Reader $reader = null)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldFormatDates($shouldFormatDates);
if (!$reader) {
$reader = ReaderFactory::create(Type::ODS);
}
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
$allRows[] = $row;
}
$allRows = array_merge($allRows, iterator_to_array($sheet->getRowIterator(), false));
}
$reader->close();
return $allRows;
}
/**
* @param string $fileName
* @param Reader $reader
* @return array
*/
private function getAllRowsForFirstSheet($fileName, Reader $reader = null)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
if (!$reader) {
$reader = ReaderFactory::create(Type::ODS);
}
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheet) {
$allRows = iterator_to_array($sheet->getRowIterator(), true);
break;
}
$reader->close();

View File

@ -239,8 +239,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
*/
public function testReadShouldSupportFormatDatesAndTimesIfSpecified()
{
$shouldFormatDates = true;
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $shouldFormatDates);
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldFormatDates(true);
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $reader);
$expectedRows = [
['1/13/2016', '01/13/2016', '13-Jan-16', 'Wednesday January 13, 16', 'Today is 1/13/2016'],
@ -307,16 +309,53 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
/**
* @return void
*/
public function testReadShouldSkipEmptyRows()
public function testReadShouldSkipEmptyRow()
{
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx');
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.xlsx');
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
$expectedRows = [
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
1 => ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
// skipped row here
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
3 => ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldPreserveEmptyRow()
{
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldPreserveEmptyRows(true);
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.xlsx', $reader);
$expectedRows = [
1 => ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
2 => ['', '', '', '', ''],
3 => ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldPreserveConsecutiveEmptyRows()
{
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldPreserveEmptyRows(true);
$allRows = $this->getAllRowsForFirstSheet('sheet_with_consecutive_empty_rows.xlsx', $reader);
$expectedRows = [
1 => ['First'],
2 => [''],
3 => [''],
4 => [''],
5 => ['Second'],
6 => ['Third'],
];
$this->assertEquals($expectedRows, $allRows);
}
@ -549,22 +588,49 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
/**
* @param string $fileName
* @param bool|void $shouldFormatDates
* @return array All the read rows the given file
* @param Reader $reader
* @return array
*/
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
private function getAllRowsForFile($fileName, Reader $reader = null)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldFormatDates($shouldFormatDates);
if (!$reader) {
$reader = ReaderFactory::create(Type::XLSX);
}
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
$allRows[] = $row;
}
$allRows = array_merge($allRows, iterator_to_array($sheet->getRowIterator(), false));
}
$reader->close();
return $allRows;
}
/**
* @param string $fileName
* @param Reader $reader
* @return array
*/
private function getAllRowsForFirstSheet($fileName, Reader $reader = null)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
if (!$reader) {
$reader = ReaderFactory::create(Type::XLSX);
}
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheet) {
$allRows = iterator_to_array($sheet->getRowIterator(), true);
break;
}
$reader->close();

Binary file not shown.