Merge e5c57d679457ed0c088439f8f9d1e9ee306d5df1 into b02d13cd406cf3489b490215fa2316b2b7c484ec
This commit is contained in:
commit
d46045745e
@ -19,8 +19,16 @@ abstract class AbstractReader implements ReaderInterface
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates = false;
|
||||
/** @var \Box\Spout\Reader\ReaderOptions */
|
||||
protected $readerOptions;
|
||||
|
||||
/**
|
||||
* The constructor.
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->readerOptions = new ReaderOptions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether stream wrappers are supported
|
||||
@ -69,7 +77,19 @@ abstract class AbstractReader implements ReaderInterface
|
||||
*/
|
||||
public function setShouldFormatDates($shouldFormatDates)
|
||||
{
|
||||
$this->shouldFormatDates = $shouldFormatDates;
|
||||
$this->readerOptions->setShouldFormatDates($shouldFormatDates);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to skip or return "empty" rows.
|
||||
*
|
||||
* @param bool $shouldPreserveEmptyRows
|
||||
* @return AbstractReader
|
||||
*/
|
||||
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->readerOptions->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ class Reader extends AbstractReader
|
||||
$this->zip = new \ZipArchive();
|
||||
|
||||
if ($this->zip->open($filePath) === true) {
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->readerOptions);
|
||||
} else {
|
||||
throw new IOException("Could not open $filePath for reading.");
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class RowIterator
|
||||
@ -21,10 +22,14 @@ class RowIterator implements IteratorInterface
|
||||
const XML_NODE_ROW = 'table:table-row';
|
||||
const XML_NODE_CELL = 'table:table-cell';
|
||||
const MAX_COLUMNS_EXCEL = 16384;
|
||||
const MAX_ROWS_EXCEL = 1048576;
|
||||
|
||||
/** Definition of XML attribute used to parse data */
|
||||
const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
|
||||
|
||||
/** Definition of XML attribute used to parse data */
|
||||
const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
|
||||
|
||||
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
protected $xmlReader;
|
||||
|
||||
@ -34,23 +39,27 @@ class RowIterator implements IteratorInterface
|
||||
/** @var bool Whether the iterator has already been rewound once */
|
||||
protected $hasAlreadyBeenRewound = false;
|
||||
|
||||
/** @var int Number of read rows */
|
||||
protected $numReadRows = 0;
|
||||
/** @var int Key for iterator */
|
||||
protected $rowIndex = 0;
|
||||
|
||||
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = null;
|
||||
/** @var array Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = [];
|
||||
|
||||
/** @var bool Indicates whether all rows have been read */
|
||||
protected $hasReachedEndOfFile = false;
|
||||
|
||||
/** @var \Box\Spout\Reader\ReaderOptions */
|
||||
protected $readerOptions;
|
||||
|
||||
/**
|
||||
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
*/
|
||||
public function __construct($xmlReader, $shouldFormatDates)
|
||||
public function __construct($xmlReader, ReaderOptions $readerOptions)
|
||||
{
|
||||
$this->xmlReader = $xmlReader;
|
||||
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
|
||||
$this->readerOptions = $readerOptions;
|
||||
$this->cellValueFormatter = new CellValueFormatter($readerOptions->shouldFormatDates());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -71,8 +80,8 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
$this->hasAlreadyBeenRewound = true;
|
||||
$this->numReadRows = 0;
|
||||
$this->rowDataBuffer = null;
|
||||
$this->rowIndex = 0;
|
||||
$this->rowDataBuffer = [];
|
||||
$this->hasReachedEndOfFile = false;
|
||||
|
||||
$this->next();
|
||||
@ -90,7 +99,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forward to next element. Empty rows will be skipped.
|
||||
* Move forward to next element. Empty rows can be skipped.
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
@ -99,15 +108,34 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
$prevRow = null;
|
||||
|
||||
if (count($this->rowDataBuffer) > 1) {
|
||||
array_shift($this->rowDataBuffer);
|
||||
$this->rowIndex++;
|
||||
|
||||
return;
|
||||
} else {
|
||||
$prevRow = $this->current();
|
||||
$this->rowDataBuffer = [];
|
||||
}
|
||||
|
||||
$rowData = [];
|
||||
$cellValue = null;
|
||||
$numRowsRepeated = 0;
|
||||
$numColumnsRepeated = 1;
|
||||
$numCellsRead = 0;
|
||||
$hasAlreadyReadOneCell = false;
|
||||
|
||||
try {
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
|
||||
// Start of a row description
|
||||
$this->rowIndex++;
|
||||
|
||||
$numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode();
|
||||
|
||||
} elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
// Start of a cell description
|
||||
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
|
||||
|
||||
@ -130,12 +158,8 @@ class RowIterator implements IteratorInterface
|
||||
} elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
|
||||
// End of the row description
|
||||
$isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
|
||||
if ($isEmptyRow) {
|
||||
// skip empty rows
|
||||
$this->next();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!$isEmptyRow) {
|
||||
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
|
||||
// The current count of read columns is determined by counting the values in $rowData.
|
||||
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
|
||||
@ -146,7 +170,18 @@ class RowIterator implements IteratorInterface
|
||||
for ($i = 0; $i < $numColumnsRepeated; $i++) {
|
||||
$rowData[] = $cellValue;
|
||||
}
|
||||
$this->numReadRows++;
|
||||
}
|
||||
} elseif ($this->readerOptions->shouldPreserveEmptyRows()) {
|
||||
// Take number of cells from the previously read line.
|
||||
$rowData = empty($prevRow) ? [] : array_fill(0, count($prevRow), '');
|
||||
} else {
|
||||
return $this->next();
|
||||
}
|
||||
|
||||
// see above, now check number of rows...
|
||||
if ($this->rowIndex - 1 + $numRowsRepeated >= self::MAX_ROWS_EXCEL) {
|
||||
$numRowsRepeated = 0;
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -161,7 +196,9 @@ class RowIterator implements IteratorInterface
|
||||
throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
|
||||
}
|
||||
|
||||
$this->rowDataBuffer = $rowData;
|
||||
for ($i = 0; $i < $numRowsRepeated; ++$i) {
|
||||
$this->rowDataBuffer[] = $rowData;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -173,6 +210,15 @@ class RowIterator implements IteratorInterface
|
||||
return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
|
||||
*/
|
||||
protected function getNumRowsRepeatedForCurrentNode()
|
||||
{
|
||||
$numRowsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
|
||||
return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
|
||||
*
|
||||
@ -203,7 +249,7 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function current()
|
||||
{
|
||||
return $this->rowDataBuffer;
|
||||
return isset($this->rowDataBuffer[0]) ? $this->rowDataBuffer[0] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -214,10 +260,9 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->numReadRows;
|
||||
return $this->rowIndex;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cleans up what was created to iterate over the object.
|
||||
*
|
||||
|
@ -4,6 +4,7 @@ namespace Box\Spout\Reader\ODS;
|
||||
|
||||
use Box\Spout\Reader\SheetInterface;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class Sheet
|
||||
@ -27,13 +28,13 @@ class Sheet implements SheetInterface
|
||||
|
||||
/**
|
||||
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
*/
|
||||
public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
|
||||
public function __construct($xmlReader, ReaderOptions $readerOptions, $sheetIndex, $sheetName)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
|
||||
$this->rowIterator = new RowIterator($xmlReader, $readerOptions);
|
||||
$this->index = $sheetIndex;
|
||||
$this->name = $sheetName;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class SheetIterator
|
||||
@ -24,8 +25,8 @@ class SheetIterator implements IteratorInterface
|
||||
/** @var string $filePath Path of the file to be read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates;
|
||||
/** @var \Box\Spout\Reader\ReaderOptions */
|
||||
protected $readerOptions;
|
||||
|
||||
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
protected $xmlReader;
|
||||
@ -41,13 +42,13 @@ class SheetIterator implements IteratorInterface
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
|
||||
*/
|
||||
public function __construct($filePath, $shouldFormatDates)
|
||||
public function __construct($filePath, ReaderOptions $readerOptions)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->shouldFormatDates = $shouldFormatDates;
|
||||
$this->readerOptions = $readerOptions;
|
||||
$this->xmlReader = new XMLReader();
|
||||
|
||||
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
|
||||
@ -116,7 +117,7 @@ class SheetIterator implements IteratorInterface
|
||||
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
|
||||
$sheetName = $this->escaper->unescape($escapedSheetName);
|
||||
|
||||
return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
|
||||
return new Sheet($this->xmlReader, $this->readerOptions, $sheetName, $this->currentSheetIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
|
62
src/Spout/Reader/ReaderOptions.php
Normal file
62
src/Spout/Reader/ReaderOptions.php
Normal file
@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
/**
|
||||
* Class ReaderOptions
|
||||
* This helper class is used to hold common reader options.
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class ReaderOptions
|
||||
{
|
||||
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates = false;
|
||||
|
||||
/** @var bool Whether to skip "empty" rows. The exact definition of empty may depend on the reader implementation. */
|
||||
protected $shouldPreserveEmptyRows = false;
|
||||
|
||||
/**
|
||||
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
|
||||
*
|
||||
* @param bool $shouldFormatDates
|
||||
* @return ReaderOptions
|
||||
*/
|
||||
public function setShouldFormatDates($shouldFormatDates)
|
||||
{
|
||||
$this->shouldFormatDates = (bool)$shouldFormatDates;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to skip or return "empty" rows.
|
||||
*
|
||||
* @param bool $shouldPreserveEmptyRows
|
||||
* @return ReaderOptions
|
||||
*/
|
||||
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->shouldPreserveEmptyRows = (bool)$shouldPreserveEmptyRows;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see setShouldFormatDates
|
||||
* @return bool
|
||||
*/
|
||||
public function shouldFormatDates()
|
||||
{
|
||||
return $this->shouldFormatDates;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see setShouldPreserveEmptyRows
|
||||
* @return bool
|
||||
*/
|
||||
public function shouldPreserveEmptyRows()
|
||||
{
|
||||
return $this->shouldPreserveEmptyRows;
|
||||
}
|
||||
|
||||
}
|
@ -4,6 +4,7 @@ namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Sheet;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class SheetHelper
|
||||
@ -26,21 +27,21 @@ class SheetHelper
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates;
|
||||
/** @var \Box\Spout\Reader\ReaderOptions */
|
||||
protected $readerOptions;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
*/
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, ReaderOptions $readerOptions)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->sharedStringsHelper = $sharedStringsHelper;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
$this->shouldFormatDates = $shouldFormatDates;
|
||||
$this->readerOptions = $readerOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -92,7 +93,7 @@ class SheetHelper
|
||||
|
||||
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
|
||||
|
||||
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName);
|
||||
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->readerOptions, $sheetIndexZeroBased, $sheetName);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -69,7 +69,7 @@ class Reader extends AbstractReader
|
||||
$this->sharedStringsHelper->extractSharedStrings();
|
||||
}
|
||||
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates);
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->readerOptions);
|
||||
} else {
|
||||
throw new IOException("Could not open $filePath for reading.");
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\CellHelper;
|
||||
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
|
||||
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class RowIterator
|
||||
@ -43,11 +44,11 @@ class RowIterator implements IteratorInterface
|
||||
/** @var Helper\StyleHelper $styleHelper Helper to work with styles */
|
||||
protected $styleHelper;
|
||||
|
||||
/** @var int Number of read rows */
|
||||
protected $numReadRows = 0;
|
||||
/** @var int Key for iterator */
|
||||
protected $rowIndex = 0;
|
||||
|
||||
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = null;
|
||||
/** @var array Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = [];
|
||||
|
||||
/** @var bool Indicates whether all rows have been read */
|
||||
protected $hasReachedEndOfFile = false;
|
||||
@ -55,13 +56,16 @@ class RowIterator implements IteratorInterface
|
||||
/** @var int The number of columns the sheet has (0 meaning undefined) */
|
||||
protected $numColumns = 0;
|
||||
|
||||
/** @var \Box\Spout\Reader\ReaderOptions */
|
||||
protected $readerOptions;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
*/
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, ReaderOptions $readerOptions)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
|
||||
@ -69,7 +73,8 @@ class RowIterator implements IteratorInterface
|
||||
$this->xmlReader = new XMLReader();
|
||||
|
||||
$this->styleHelper = new StyleHelper($filePath);
|
||||
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
|
||||
$this->readerOptions = $readerOptions;
|
||||
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $readerOptions->shouldFormatDates());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -101,7 +106,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
$this->numReadRows = 0;
|
||||
$this->rowDataBuffer = null;
|
||||
$this->rowDataBuffer = [];
|
||||
$this->hasReachedEndOfFile = false;
|
||||
$this->numColumns = 0;
|
||||
|
||||
@ -131,6 +136,15 @@ class RowIterator implements IteratorInterface
|
||||
{
|
||||
$rowData = [];
|
||||
|
||||
if (count($this->rowDataBuffer) > 1) {
|
||||
array_shift($this->rowDataBuffer);
|
||||
$this->rowIndex++;
|
||||
|
||||
return;
|
||||
} else {
|
||||
$this->rowDataBuffer = [];
|
||||
}
|
||||
|
||||
try {
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
|
||||
@ -143,6 +157,8 @@ class RowIterator implements IteratorInterface
|
||||
|
||||
} elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
|
||||
// Start of the row description
|
||||
$prevRowIndex = $this->rowIndex;
|
||||
$newRowIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numColumns;
|
||||
@ -153,6 +169,14 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
|
||||
if ($this->readerOptions->shouldPreserveEmptyRows()) {
|
||||
for ($i = $prevRowIndex + 1; $i < $newRowIndex; ++$i) {
|
||||
$this->rowDataBuffer[] = $rowData; // fake empty rows
|
||||
}
|
||||
}
|
||||
|
||||
$this->rowIndex = $newRowIndex - count($this->rowDataBuffer);
|
||||
|
||||
} elseif ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
// Start of a cell description
|
||||
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||
@ -165,7 +189,7 @@ class RowIterator implements IteratorInterface
|
||||
// End of the row description
|
||||
// If needed, we fill the empty cells
|
||||
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||
$this->numReadRows++;
|
||||
|
||||
break;
|
||||
|
||||
} elseif ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
|
||||
@ -179,7 +203,7 @@ class RowIterator implements IteratorInterface
|
||||
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
|
||||
}
|
||||
|
||||
$this->rowDataBuffer = $rowData;
|
||||
$this->rowDataBuffer[] = $rowData;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -201,7 +225,7 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function current()
|
||||
{
|
||||
return $this->rowDataBuffer;
|
||||
return isset($this->rowDataBuffer[0]) ? $this->rowDataBuffer[0] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -212,7 +236,7 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->numReadRows;
|
||||
return $this->rowIndex;
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Reader\SheetInterface;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class Sheet
|
||||
@ -25,13 +26,13 @@ class Sheet implements SheetInterface
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param Helper\SharedStringsHelper Helper to work with shared strings
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
*/
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName)
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, ReaderOptions $readerOptions, $sheetIndex, $sheetName)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates);
|
||||
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $readerOptions);
|
||||
$this->index = $sheetIndex;
|
||||
$this->name = $sheetName;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ namespace Box\Spout\Reader\XLSX;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Reader\XLSX\Helper\SheetHelper;
|
||||
use Box\Spout\Reader\Exception\NoSheetsFoundException;
|
||||
use Box\Spout\Reader\ReaderOptions;
|
||||
|
||||
/**
|
||||
* Class SheetIterator
|
||||
@ -24,13 +25,13 @@ class SheetIterator implements IteratorInterface
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param \Box\Spout\Reader\ReaderOptions $readerOptions
|
||||
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
|
||||
*/
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, ReaderOptions $readerOptions)
|
||||
{
|
||||
// Fetch all available sheets
|
||||
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates);
|
||||
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $readerOptions);
|
||||
$this->sheets = $sheetHelper->getSheets();
|
||||
|
||||
if (count($this->sheets) === 0) {
|
||||
|
@ -137,6 +137,24 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadWithFilesGeneratedByExternalSoftwares
|
||||
*
|
||||
* @param bool $skipLastEmptyValues
|
||||
* @param string $fileName
|
||||
* @return void
|
||||
*/
|
||||
public function testReadWithFilesGeneratedByExternalSoftwareAndEmptyRowsPreserved($fileName, $skipLastEmptyValues)
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldPreserveEmptyRows(true);
|
||||
$allRows = $this->getAllRowsForFile($fileName, $reader);
|
||||
|
||||
foreach ($allRows as $index => $row) {
|
||||
// :TODO: write useful test
|
||||
// $this->assertCount($expectedColumns, $row);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
@ -169,8 +187,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testReadShouldSupportFormatDatesAndTimesIfSpecified()
|
||||
{
|
||||
$shouldFormatDates = true;
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $shouldFormatDates);
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldFormatDates(true);
|
||||
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $reader);
|
||||
|
||||
$expectedRows = [
|
||||
['05/19/2016', '5/19/16', '05/19/2016 16:39:00', '05/19/16 04:39 PM', '5/19/2016'],
|
||||
@ -213,13 +233,48 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testReadShouldSkipEmptyRow()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods');
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.ods');
|
||||
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
|
||||
|
||||
$expectedRows = [
|
||||
['ods--11', 'ods--12', 'ods--13'],
|
||||
1 => ['ods--11', 'ods--12', 'ods--13'],
|
||||
// row skipped here
|
||||
['ods--21', 'ods--22', 'ods--23'],
|
||||
3 => ['ods--21', 'ods--22', 'ods--23'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldPreserveEmptyRow()
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldPreserveEmptyRows(true);
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.ods', $reader);
|
||||
|
||||
$expectedRows = [
|
||||
1 => ['ods--11', 'ods--12', 'ods--13'],
|
||||
2 => ['', '', ''],
|
||||
3 => ['ods--21', 'ods--22', 'ods--23'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldPreserveConsecutiveEmptyRows()
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldPreserveEmptyRows(true);
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_consecutive_empty_rows.ods', $reader);
|
||||
|
||||
$expectedRows = [
|
||||
1 => ['First'],
|
||||
2 => [''],
|
||||
3 => [''],
|
||||
4 => ['Second'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -241,6 +296,29 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals([$expectedRow], $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldHandleRepeatedRows()
|
||||
{
|
||||
$expectedRows = [
|
||||
1 => ['First'],
|
||||
2 => ['First'],
|
||||
3 => ['First'],
|
||||
4 => ['Second'],
|
||||
5 => ['Third'],
|
||||
6 => ['Third'],
|
||||
];
|
||||
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldPreserveEmptyRows(false);
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_repeated_rows.ods', $reader);
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
|
||||
$reader->setShouldPreserveEmptyRows(true);
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_repeated_rows.ods', $reader);
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used)
|
||||
@ -484,22 +562,49 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param bool|void $shouldFormatDates
|
||||
* @return array All the read rows the given file
|
||||
* @param Reader $reader
|
||||
* @return array
|
||||
*/
|
||||
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
|
||||
private function getAllRowsForFile($fileName, Reader $reader = null)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
if (!$reader) {
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldFormatDates($shouldFormatDates);
|
||||
}
|
||||
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
|
||||
$allRows[] = $row;
|
||||
$allRows = array_merge($allRows, iterator_to_array($sheet->getRowIterator(), false));
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
return $allRows;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param Reader $reader
|
||||
* @return array
|
||||
*/
|
||||
private function getAllRowsForFirstSheet($fileName, Reader $reader = null)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
if (!$reader) {
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
}
|
||||
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheet) {
|
||||
$allRows = iterator_to_array($sheet->getRowIterator(), true);
|
||||
break;
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
@ -239,8 +239,10 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testReadShouldSupportFormatDatesAndTimesIfSpecified()
|
||||
{
|
||||
$shouldFormatDates = true;
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $shouldFormatDates);
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->setShouldFormatDates(true);
|
||||
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $reader);
|
||||
|
||||
$expectedRows = [
|
||||
['1/13/2016', '01/13/2016', '13-Jan-16', 'Wednesday January 13, 16', 'Today is 1/13/2016'],
|
||||
@ -307,16 +309,53 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyRows()
|
||||
public function testReadShouldSkipEmptyRow()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx');
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.xlsx');
|
||||
|
||||
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
1 => ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
// skipped row here
|
||||
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
|
||||
3 => ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldPreserveEmptyRow()
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->setShouldPreserveEmptyRows(true);
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_empty_row.xlsx', $reader);
|
||||
|
||||
$expectedRows = [
|
||||
1 => ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
2 => ['', '', '', '', ''],
|
||||
3 => ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldPreserveConsecutiveEmptyRows()
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->setShouldPreserveEmptyRows(true);
|
||||
$allRows = $this->getAllRowsForFirstSheet('sheet_with_consecutive_empty_rows.xlsx', $reader);
|
||||
|
||||
$expectedRows = [
|
||||
1 => ['First'],
|
||||
2 => [''],
|
||||
3 => [''],
|
||||
4 => [''],
|
||||
5 => ['Second'],
|
||||
6 => ['Third'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -549,22 +588,49 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param bool|void $shouldFormatDates
|
||||
* @return array All the read rows the given file
|
||||
* @param Reader $reader
|
||||
* @return array
|
||||
*/
|
||||
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
|
||||
private function getAllRowsForFile($fileName, Reader $reader = null)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
if (!$reader) {
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->setShouldFormatDates($shouldFormatDates);
|
||||
}
|
||||
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
|
||||
$allRows[] = $row;
|
||||
$allRows = array_merge($allRows, iterator_to_array($sheet->getRowIterator(), false));
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
return $allRows;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param Reader $reader
|
||||
* @return array
|
||||
*/
|
||||
private function getAllRowsForFirstSheet($fileName, Reader $reader = null)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
if (!$reader) {
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
}
|
||||
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheet) {
|
||||
$allRows = iterator_to_array($sheet->getRowIterator(), true);
|
||||
break;
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
BIN
tests/resources/ods/sheet_with_consecutive_empty_rows.ods
Normal file
BIN
tests/resources/ods/sheet_with_consecutive_empty_rows.ods
Normal file
Binary file not shown.
BIN
tests/resources/ods/sheet_with_repeated_rows.ods
Normal file
BIN
tests/resources/ods/sheet_with_repeated_rows.ods
Normal file
Binary file not shown.
BIN
tests/resources/xlsx/sheet_with_consecutive_empty_rows.xlsx
Normal file
BIN
tests/resources/xlsx/sheet_with_consecutive_empty_rows.xlsx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user