Option to keep empty rows (#331)
* Add option to preserve empty rows when reading an XLSX file * Add option to preserve empty rows when reading a CSV file * Add option to preserve empty rows when reading an ODS file
This commit is contained in:
parent
77178122c3
commit
0978d340f0
@ -22,6 +22,9 @@ abstract class AbstractReader implements ReaderInterface
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates = false;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows = false;
|
||||
|
||||
/**
|
||||
* Returns whether stream wrappers are supported
|
||||
*
|
||||
@ -64,6 +67,7 @@ abstract class AbstractReader implements ReaderInterface
|
||||
/**
|
||||
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
|
||||
*
|
||||
* @api
|
||||
* @param bool $shouldFormatDates
|
||||
* @return AbstractReader
|
||||
*/
|
||||
@ -73,6 +77,19 @@ abstract class AbstractReader implements ReaderInterface
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether empty rows should be returned or skipped.
|
||||
*
|
||||
* @api
|
||||
* @param bool $shouldPreserveEmptyRows
|
||||
* @return AbstractReader
|
||||
*/
|
||||
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the reader to read the given file. It also makes sure
|
||||
* that the file exists and is readable.
|
||||
|
@ -119,8 +119,9 @@ class Reader extends AbstractReader
|
||||
$this->filePointer,
|
||||
$this->fieldDelimiter,
|
||||
$this->fieldEnclosure,
|
||||
$this->encoding,
|
||||
$this->endOfLineCharacter,
|
||||
$this->encoding,
|
||||
$this->shouldPreserveEmptyRows,
|
||||
$this->globalFunctionsHelper
|
||||
);
|
||||
}
|
||||
|
@ -52,21 +52,26 @@ class RowIterator implements IteratorInterface
|
||||
/** @var string End of line delimiter, given by the user as input. */
|
||||
protected $inputEOLDelimiter;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/**
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param string $endOfLineDelimiter End of line delimiter
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $endOfLineDelimiter, $encoding, $shouldPreserveEmptyRows, $globalFunctionsHelper)
|
||||
{
|
||||
$this->filePointer = $filePointer;
|
||||
$this->fieldDelimiter = $fieldDelimiter;
|
||||
$this->fieldEnclosure = $fieldEnclosure;
|
||||
$this->encoding = $encoding;
|
||||
$this->inputEOLDelimiter = $endOfLineDelimiter;
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
|
||||
$this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
|
||||
@ -114,7 +119,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forward to next element. Empty rows are skipped.
|
||||
* Move forward to next element. Reads data for the next unprocessed row.
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
@ -124,25 +129,48 @@ class RowIterator implements IteratorInterface
|
||||
{
|
||||
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
|
||||
if ($this->hasReachedEndOfFile) {
|
||||
return;
|
||||
if (!$this->hasReachedEndOfFile) {
|
||||
$this->readDataForNextRow();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||
*/
|
||||
protected function readDataForNextRow()
|
||||
{
|
||||
do {
|
||||
$rowData = $this->getNextUTF8EncodedRow();
|
||||
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
} while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
|
||||
} while ($this->shouldReadNextRow($rowData));
|
||||
|
||||
if ($rowData !== false) {
|
||||
$this->rowDataBuffer = $rowData;
|
||||
// str_replace will replace NULL values by empty strings
|
||||
$this->rowDataBuffer = str_replace(null, null, $rowData);
|
||||
$this->numReadRows++;
|
||||
} else {
|
||||
// If we reach this point, it means end of file was reached.
|
||||
// This happens when the last lines are empty lines.
|
||||
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array|bool $currentRowData
|
||||
* @return bool Whether the data for the current row can be returned or if we need to keep reading
|
||||
*/
|
||||
protected function shouldReadNextRow($currentRowData)
|
||||
{
|
||||
$hasSuccessfullyFetchedRowData = ($currentRowData !== false);
|
||||
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
|
||||
$isEmptyLine = $this->isEmptyLine($currentRowData);
|
||||
|
||||
return (
|
||||
(!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
|
||||
(!$this->shouldPreserveEmptyRows && $isEmptyLine)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next row, converted if necessary to UTF-8.
|
||||
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
|
||||
@ -154,7 +182,7 @@ class RowIterator implements IteratorInterface
|
||||
protected function getNextUTF8EncodedRow()
|
||||
{
|
||||
$encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
if (false === $encodedRowData) {
|
||||
if ($encodedRowData === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -195,7 +223,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $lineData Array containing the cells value for the line
|
||||
* @param array|bool $lineData Array containing the cells value for the line
|
||||
* @return bool Whether the given line is empty
|
||||
*/
|
||||
protected function isEmptyLine($lineData)
|
||||
|
@ -18,12 +18,21 @@ class Sheet implements SheetInterface
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @param string $endOfLineCharacter Character defining the end of a line
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper)
|
||||
public function __construct(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper);
|
||||
$this->rowIterator = new RowIterator(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -22,12 +22,21 @@ class SheetIterator implements IteratorInterface
|
||||
* @param resource $filePointer
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @param string $endOfLineCharacter Character defining the end of a line
|
||||
* @param string $encoding Encoding of the CSV file to be read
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper)
|
||||
public function __construct(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper)
|
||||
{
|
||||
$this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper);
|
||||
$this->sheet = new Sheet(
|
||||
$filePointer, $fieldDelimiter, $fieldEnclosure,
|
||||
$endOfLineCharacter, $encoding, $shouldPreserveEmptyRows,
|
||||
$globalFunctionsHelper
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -42,7 +42,7 @@ class Reader extends AbstractReader
|
||||
$this->zip = new \ZipArchive();
|
||||
|
||||
if ($this->zip->open($filePath) === true) {
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates, $this->shouldPreserveEmptyRows);
|
||||
} else {
|
||||
throw new IOException("Could not open $filePath for reading.");
|
||||
}
|
||||
|
@ -23,33 +23,55 @@ class RowIterator implements IteratorInterface
|
||||
const MAX_COLUMNS_EXCEL = 16384;
|
||||
|
||||
/** Definition of XML attribute used to parse data */
|
||||
const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
|
||||
const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
|
||||
|
||||
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
protected $xmlReader;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/** @var Helper\CellValueFormatter Helper to format cell values */
|
||||
protected $cellValueFormatter;
|
||||
|
||||
/** @var bool Whether the iterator has already been rewound once */
|
||||
protected $hasAlreadyBeenRewound = false;
|
||||
|
||||
/** @var int Number of read rows */
|
||||
protected $numReadRows = 0;
|
||||
|
||||
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = null;
|
||||
|
||||
/** @var bool Indicates whether all rows have been read */
|
||||
protected $hasReachedEndOfFile = false;
|
||||
|
||||
/** @var int Last row index processed (one-based) */
|
||||
protected $lastRowIndexProcessed = 0;
|
||||
|
||||
/** @var int Row index to be processed next (one-based) */
|
||||
protected $nextRowIndexToBeProcessed = 1;
|
||||
|
||||
/** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
|
||||
protected $lastProcessedCellValue = null;
|
||||
|
||||
/** @var int Number of times the last processed row should be repeated */
|
||||
protected $numRowsRepeated = 1;
|
||||
|
||||
/** @var int Number of times the last cell value should be copied to the cells on its right */
|
||||
protected $numColumnsRepeated = 1;
|
||||
|
||||
/** @var bool Whether at least one cell has been read for the row currently being processed */
|
||||
protected $hasAlreadyReadOneCellInCurrentRow = false;
|
||||
|
||||
|
||||
/**
|
||||
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
*/
|
||||
public function __construct($xmlReader, $shouldFormatDates)
|
||||
public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->xmlReader = $xmlReader;
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
|
||||
}
|
||||
|
||||
@ -71,7 +93,8 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
$this->hasAlreadyBeenRewound = true;
|
||||
$this->numReadRows = 0;
|
||||
$this->lastRowIndexProcessed = 0;
|
||||
$this->nextRowIndexToBeProcessed = 1;
|
||||
$this->rowDataBuffer = null;
|
||||
$this->hasReachedEndOfFile = false;
|
||||
|
||||
@ -98,61 +121,72 @@ class RowIterator implements IteratorInterface
|
||||
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
if ($this->doesNeedDataForNextRowToBeProcessed()) {
|
||||
$this->readDataForNextRow($this->xmlReader);
|
||||
}
|
||||
|
||||
$this->lastRowIndexProcessed++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether we need data for the next row to be processed.
|
||||
* We don't need to read data if:
|
||||
* we have already read at least one row
|
||||
* AND
|
||||
* we need to preserve empty rows
|
||||
* AND
|
||||
* the last row that was read is not the row that need to be processed
|
||||
* (i.e. if we need to return empty rows)
|
||||
*
|
||||
* @return bool Whether we need data for the next row to be processed.
|
||||
*/
|
||||
protected function doesNeedDataForNextRowToBeProcessed()
|
||||
{
|
||||
$hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
|
||||
|
||||
return (
|
||||
!$hasReadAtLeastOneRow ||
|
||||
!$this->shouldPreserveEmptyRows ||
|
||||
$this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
|
||||
* @return void
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
|
||||
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
|
||||
*/
|
||||
protected function readDataForNextRow($xmlReader)
|
||||
{
|
||||
$rowData = [];
|
||||
$cellValue = null;
|
||||
$numColumnsRepeated = 1;
|
||||
$numCellsRead = 0;
|
||||
$hasAlreadyReadOneCell = false;
|
||||
|
||||
try {
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
// Start of a cell description
|
||||
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
|
||||
$this->processRowStartingNode($xmlReader);
|
||||
|
||||
$node = $this->xmlReader->expand();
|
||||
$currentCellValue = $this->getCellValue($node);
|
||||
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
$rowData = $this->processCellStartingNode($xmlReader, $rowData);
|
||||
|
||||
// process cell N only after having read cell N+1 (see below why)
|
||||
if ($hasAlreadyReadOneCell) {
|
||||
for ($i = 0; $i < $numColumnsRepeated; $i++) {
|
||||
$rowData[] = $cellValue;
|
||||
}
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
|
||||
$isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue);
|
||||
|
||||
// if the fetched row is empty and we don't want to preserve it...
|
||||
if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
|
||||
// ... skip it
|
||||
continue;
|
||||
}
|
||||
|
||||
$cellValue = $currentCellValue;
|
||||
$numColumnsRepeated = $currentNumColumnsRepeated;
|
||||
$rowData = $this->processRowEndingNode($rowData, $isEmptyRow);
|
||||
|
||||
$numCellsRead++;
|
||||
$hasAlreadyReadOneCell = true;
|
||||
|
||||
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
|
||||
// End of the row description
|
||||
$isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
|
||||
if ($isEmptyRow) {
|
||||
// skip empty rows
|
||||
$this->next();
|
||||
return;
|
||||
}
|
||||
|
||||
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
|
||||
// The current count of read columns is determined by counting the values in $rowData.
|
||||
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
|
||||
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
|
||||
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with
|
||||
// always 16384 cells.
|
||||
if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
|
||||
for ($i = 0; $i < $numColumnsRepeated; $i++) {
|
||||
$rowData[] = $cellValue;
|
||||
}
|
||||
$this->numReadRows++;
|
||||
}
|
||||
// at this point, we have all the data we need for the row
|
||||
// so that we can populate the buffer
|
||||
break;
|
||||
|
||||
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
|
||||
// The closing "</table:table>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
|
||||
$this->processTableEndingNode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -165,11 +199,99 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
|
||||
* @return void
|
||||
*/
|
||||
protected function processRowStartingNode($xmlReader)
|
||||
{
|
||||
// Reset data from current row
|
||||
$this->hasAlreadyReadOneCellInCurrentRow = false;
|
||||
$this->lastProcessedCellValue = null;
|
||||
$this->numColumnsRepeated = 1;
|
||||
$this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
|
||||
* @param array $rowData Data of all cells read so far
|
||||
* @return array Original row data + data for the cell that was just read
|
||||
*/
|
||||
protected function processCellStartingNode($xmlReader, $rowData)
|
||||
{
|
||||
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
|
||||
|
||||
$node = $xmlReader->expand();
|
||||
$currentCellValue = $this->getCellValue($node);
|
||||
|
||||
// process cell N only after having read cell N+1 (see below why)
|
||||
if ($this->hasAlreadyReadOneCellInCurrentRow) {
|
||||
for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
|
||||
$rowData[] = $this->lastProcessedCellValue;
|
||||
}
|
||||
}
|
||||
|
||||
$this->hasAlreadyReadOneCellInCurrentRow = true;
|
||||
$this->lastProcessedCellValue = $currentCellValue;
|
||||
$this->numColumnsRepeated = $currentNumColumnsRepeated;
|
||||
|
||||
return $rowData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $rowData Data of all cells read so far
|
||||
* @param bool $isEmptyRow Whether the given row is empty
|
||||
* @return array
|
||||
*/
|
||||
protected function processRowEndingNode($rowData, $isEmptyRow)
|
||||
{
|
||||
// if the row is empty, we don't want to return more than one cell
|
||||
$actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
|
||||
|
||||
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
|
||||
// The current count of read columns is determined by counting the values in $rowData.
|
||||
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
|
||||
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
|
||||
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with
|
||||
// always 16384 cells.
|
||||
if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
|
||||
for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
|
||||
$rowData[] = $this->lastProcessedCellValue;
|
||||
}
|
||||
}
|
||||
|
||||
// If we are processing row N and the row is repeated M times,
|
||||
// then the next row to be processed will be row (N+M).
|
||||
$this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
|
||||
|
||||
return $rowData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
protected function processTableEndingNode()
|
||||
{
|
||||
// The closing "</table:table>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
|
||||
* @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
|
||||
*/
|
||||
protected function getNumRowsRepeatedForCurrentNode($xmlReader)
|
||||
{
|
||||
$numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
|
||||
return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
|
||||
* @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
|
||||
*/
|
||||
protected function getNumColumnsRepeatedForCurrentNode()
|
||||
protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
|
||||
{
|
||||
$numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
|
||||
$numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
|
||||
return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
|
||||
}
|
||||
|
||||
@ -185,14 +307,21 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* empty() replacement that honours 0 as a valid value
|
||||
* After finishing processing each cell, a row is considered empty if it contains
|
||||
* no cells or if the value of the last read cell is an empty string.
|
||||
* After finishing processing each cell, the last read cell is not part of the
|
||||
* row data yet (as we still need to apply the "num-columns-repeated" attribute).
|
||||
*
|
||||
* @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
|
||||
* @return bool
|
||||
* @param array $rowData
|
||||
* @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
|
||||
* @return bool Whether the row is empty
|
||||
*/
|
||||
protected function isEmptyCellValue($value)
|
||||
protected function isEmptyRow($rowData, $lastReadCellValue)
|
||||
{
|
||||
return (!isset($value) || trim($value) === '');
|
||||
return (
|
||||
count($rowData) === 0 &&
|
||||
(!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -214,7 +343,7 @@ class RowIterator implements IteratorInterface
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->numReadRows;
|
||||
return $this->lastRowIndexProcessed;
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,12 +28,13 @@ class Sheet implements SheetInterface
|
||||
/**
|
||||
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
*/
|
||||
public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
|
||||
public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
|
||||
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows);
|
||||
$this->index = $sheetIndex;
|
||||
$this->name = $sheetName;
|
||||
}
|
||||
|
@ -27,6 +27,9 @@ class SheetIterator implements IteratorInterface
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
protected $xmlReader;
|
||||
|
||||
@ -42,12 +45,14 @@ class SheetIterator implements IteratorInterface
|
||||
/**
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
|
||||
*/
|
||||
public function __construct($filePath, $shouldFormatDates)
|
||||
public function __construct($filePath, $shouldFormatDates, $shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->shouldFormatDates = $shouldFormatDates;
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
$this->xmlReader = new XMLReader();
|
||||
|
||||
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
|
||||
@ -116,7 +121,7 @@ class SheetIterator implements IteratorInterface
|
||||
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
|
||||
$sheetName = $this->escaper->unescape($escapedSheetName);
|
||||
|
||||
return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
|
||||
return new Sheet($this->xmlReader, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetName, $this->currentSheetIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -29,18 +29,23 @@ class SheetHelper
|
||||
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
|
||||
protected $shouldFormatDates;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
*/
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->sharedStringsHelper = $sharedStringsHelper;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
$this->shouldFormatDates = $shouldFormatDates;
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -92,7 +97,7 @@ class SheetHelper
|
||||
|
||||
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
|
||||
|
||||
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName);
|
||||
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $this->shouldPreserveEmptyRows, $sheetIndexZeroBased, $sheetName);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -69,7 +69,7 @@ class Reader extends AbstractReader
|
||||
$this->sharedStringsHelper->extractSharedStrings();
|
||||
}
|
||||
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates);
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates, $this->shouldPreserveEmptyRows);
|
||||
} else {
|
||||
throw new IOException("Could not open $filePath for reading.");
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ class RowIterator implements IteratorInterface
|
||||
/** Definition of XML attributes used to parse data */
|
||||
const XML_ATTRIBUTE_REF = 'ref';
|
||||
const XML_ATTRIBUTE_SPANS = 'spans';
|
||||
const XML_ATTRIBUTE_ROW_INDEX = 'r';
|
||||
const XML_ATTRIBUTE_CELL_INDEX = 'r';
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
@ -43,7 +44,10 @@ class RowIterator implements IteratorInterface
|
||||
/** @var Helper\StyleHelper $styleHelper Helper to work with styles */
|
||||
protected $styleHelper;
|
||||
|
||||
/** @var int Number of read rows */
|
||||
/**
|
||||
* TODO: This variable can be deleted when row indices get preserved
|
||||
* @var int Number of read rows
|
||||
*/
|
||||
protected $numReadRows = 0;
|
||||
|
||||
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
|
||||
@ -55,6 +59,15 @@ class RowIterator implements IteratorInterface
|
||||
/** @var int The number of columns the sheet has (0 meaning undefined) */
|
||||
protected $numColumns = 0;
|
||||
|
||||
/** @var bool Whether empty rows should be returned or skipped */
|
||||
protected $shouldPreserveEmptyRows;
|
||||
|
||||
/** @var int Last row index processed (one-based) */
|
||||
protected $lastRowIndexProcessed = 0;
|
||||
|
||||
/** @var int Row index to be processed next (one-based) */
|
||||
protected $nextRowIndexToBeProcessed = 0;
|
||||
|
||||
/** @var int Last column index processed (zero-based) */
|
||||
protected $lastColumnIndexProcessed = -1;
|
||||
|
||||
@ -63,8 +76,9 @@ class RowIterator implements IteratorInterface
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
*/
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
|
||||
@ -73,6 +87,8 @@ class RowIterator implements IteratorInterface
|
||||
|
||||
$this->styleHelper = new StyleHelper($filePath);
|
||||
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
|
||||
|
||||
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -104,6 +120,8 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
$this->numReadRows = 0;
|
||||
$this->lastRowIndexProcessed = 0;
|
||||
$this->nextRowIndexToBeProcessed = 0;
|
||||
$this->rowDataBuffer = null;
|
||||
$this->hasReachedEndOfFile = false;
|
||||
$this->numColumns = 0;
|
||||
@ -123,7 +141,7 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* Move forward to next element. Empty rows will be skipped.
|
||||
* Move forward to next element. Reads data describing the next unprocessed row.
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
@ -131,53 +149,73 @@ class RowIterator implements IteratorInterface
|
||||
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
$this->nextRowIndexToBeProcessed++;
|
||||
|
||||
if ($this->doesNeedDataForNextRowToBeProcessed()) {
|
||||
$this->readDataForNextRow($this->xmlReader);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether we need data for the next row to be processed.
|
||||
* We don't need to read data if:
|
||||
* we have already read at least one row
|
||||
* AND
|
||||
* we need to preserve empty rows
|
||||
* AND
|
||||
* the last row that was read is not the row that need to be processed
|
||||
* (i.e. if we need to return empty rows)
|
||||
*
|
||||
* @return bool Whether we need data for the next row to be processed.
|
||||
*/
|
||||
protected function doesNeedDataForNextRowToBeProcessed()
|
||||
{
|
||||
$hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
|
||||
|
||||
return (
|
||||
!$hasReadAtLeastOneRow ||
|
||||
!$this->shouldPreserveEmptyRows ||
|
||||
$this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
|
||||
* @return void
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
|
||||
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
|
||||
*/
|
||||
protected function readDataForNextRow($xmlReader)
|
||||
{
|
||||
$rowData = [];
|
||||
|
||||
try {
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
|
||||
// Read dimensions of the sheet
|
||||
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
||||
$lastCellIndex = $matches[1];
|
||||
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
||||
while ($xmlReader->read()) {
|
||||
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
|
||||
$this->processDimensionStartingNode($xmlReader);
|
||||
|
||||
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
|
||||
$rowData = $this->processRowStartingNode($xmlReader);
|
||||
|
||||
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
$rowData = $this->processCellStartingNode($xmlReader, $rowData);
|
||||
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
|
||||
// if the fetched row is empty and we don't want to preserve it..,
|
||||
if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($rowData)) {
|
||||
// ... skip it
|
||||
continue;
|
||||
}
|
||||
|
||||
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
|
||||
// Start of the row description
|
||||
$rowData = $this->processRowEndingNode($rowData);
|
||||
|
||||
// Reset index of the last processed column
|
||||
$this->lastColumnIndexProcessed = -1;
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numColumns;
|
||||
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
||||
if ($spans) {
|
||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||
}
|
||||
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
|
||||
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
|
||||
// Start of a cell description
|
||||
$currentColumnIndex = $this->getCellIndex($this->xmlReader);
|
||||
|
||||
$node = $this->xmlReader->expand();
|
||||
$rowData[$currentColumnIndex] = $this->getCellValue($node);
|
||||
|
||||
$this->lastColumnIndexProcessed = $currentColumnIndex;
|
||||
|
||||
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
|
||||
// End of the row description
|
||||
// If needed, we fill the empty cells
|
||||
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||
$this->numReadRows++;
|
||||
// at this point, we have all the data we need for the row
|
||||
// so that we can populate the buffer
|
||||
break;
|
||||
|
||||
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
|
||||
// The closing "</worksheet>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
|
||||
$this->processWorksheetEndingNode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -190,11 +228,101 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" tag
|
||||
* @return int
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
|
||||
* @return void
|
||||
*/
|
||||
protected function processDimensionStartingNode($xmlReader)
|
||||
{
|
||||
// Read dimensions of the sheet
|
||||
$dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
||||
$lastCellIndex = $matches[1];
|
||||
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
|
||||
* @return array
|
||||
*/
|
||||
protected function processRowStartingNode($xmlReader)
|
||||
{
|
||||
// Reset index of the last processed column
|
||||
$this->lastColumnIndexProcessed = -1;
|
||||
|
||||
// Mark the last processed row as the one currently being read
|
||||
$this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numColumns;
|
||||
$spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
||||
if ($spans) {
|
||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||
}
|
||||
|
||||
return ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
|
||||
* @param array $rowData Data of all cells read so far (key = cell index, value = cell value)
|
||||
* @return array Original row data + data for the cell that was just read (key = cell index, value = cell value)
|
||||
*/
|
||||
protected function processCellStartingNode($xmlReader, $rowData)
|
||||
{
|
||||
$currentColumnIndex = $this->getColumnIndex($xmlReader);
|
||||
|
||||
$node = $xmlReader->expand();
|
||||
$rowData[$currentColumnIndex] = $this->getCellValue($node);
|
||||
|
||||
$this->lastColumnIndexProcessed = $currentColumnIndex;
|
||||
|
||||
return $rowData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $rowData Data of all cells read so far (key = cell index, value = cell value)
|
||||
* @return array
|
||||
*/
|
||||
protected function processRowEndingNode($rowData)
|
||||
{
|
||||
$this->numReadRows++;
|
||||
|
||||
// If needed, we fill the empty cells
|
||||
return ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
protected function processWorksheetEndingNode()
|
||||
{
|
||||
// The closing "</worksheet>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
|
||||
* @return int Row index
|
||||
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
|
||||
*/
|
||||
protected function getCellIndex($xmlReader)
|
||||
protected function getRowIndex($xmlReader)
|
||||
{
|
||||
// Get "r" attribute if present (from something like <row r="3"...>
|
||||
$currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
|
||||
|
||||
return ($currentRowIndex !== null) ?
|
||||
intval($currentRowIndex) :
|
||||
$this->lastRowIndexProcessed + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
|
||||
* @return int Column index
|
||||
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
|
||||
*/
|
||||
protected function getColumnIndex($xmlReader)
|
||||
{
|
||||
// Get "r" attribute if present (from something like <c r="A1"...>
|
||||
$currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||
@ -216,25 +344,53 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current element, from the buffer.
|
||||
* @param array $rowData
|
||||
* @return bool Whether the given row is empty
|
||||
*/
|
||||
protected function isEmptyRow($rowData)
|
||||
{
|
||||
return (count($rowData) === 1 && $rowData[0] === '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current element, either an empty row or from the buffer.
|
||||
* @link http://php.net/manual/en/iterator.current.php
|
||||
*
|
||||
* @return array|null
|
||||
*/
|
||||
public function current()
|
||||
{
|
||||
return $this->rowDataBuffer;
|
||||
$rowDataForRowToBeProcessed = $this->rowDataBuffer;
|
||||
|
||||
if ($this->shouldPreserveEmptyRows) {
|
||||
// when we need to preserve empty rows, we will either return
|
||||
// an empty row or the last row read. This depends whether the
|
||||
// index of last row that was read matches the index of the last
|
||||
// row whose value should be returned.
|
||||
if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
|
||||
// return empty row if mismatch between last processed row
|
||||
// and the row that needs to be returned
|
||||
$rowDataForRowToBeProcessed = [''];
|
||||
}
|
||||
}
|
||||
|
||||
return $rowDataForRowToBeProcessed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the key of the current element
|
||||
* Return the key of the current element. Here, the row index.
|
||||
* @link http://php.net/manual/en/iterator.key.php
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->numReadRows;
|
||||
// TODO: This should return $this->nextRowIndexToBeProcessed
|
||||
// but to avoid a breaking change, the return value for
|
||||
// this function has been kept as the number of rows read.
|
||||
return $this->shouldPreserveEmptyRows ?
|
||||
$this->nextRowIndexToBeProcessed :
|
||||
$this->numReadRows;
|
||||
}
|
||||
|
||||
|
||||
|
@ -26,12 +26,13 @@ class Sheet implements SheetInterface
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param Helper\SharedStringsHelper Helper to work with shared strings
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
*/
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName)
|
||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows, $sheetIndex, $sheetName)
|
||||
{
|
||||
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates);
|
||||
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows);
|
||||
$this->index = $sheetIndex;
|
||||
$this->name = $sheetName;
|
||||
}
|
||||
|
@ -25,12 +25,13 @@ class SheetIterator implements IteratorInterface
|
||||
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
|
||||
* @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
|
||||
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
|
||||
*/
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
|
||||
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows)
|
||||
{
|
||||
// Fetch all available sheets
|
||||
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates);
|
||||
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates, $shouldPreserveEmptyRows);
|
||||
$this->sheets = $sheetHelper->getSheets();
|
||||
|
||||
if (count($this->sheets) === 0) {
|
||||
|
@ -115,29 +115,40 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
* @return void
|
||||
*/
|
||||
public function dataProviderForTestReadShouldSkipEmptyLines()
|
||||
public function testReadShouldSkipEmptyLinesIfShouldPreserveEmptyRowsNotSet()
|
||||
{
|
||||
return [
|
||||
['csv_with_empty_line.csv'],
|
||||
['csv_with_empty_last_line.csv'],
|
||||
$allRows = $this->getAllRowsForFile('csv_with_multiple_empty_lines.csv');
|
||||
|
||||
$expectedRows = [
|
||||
// skipped row here
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
// skipped row here
|
||||
['csv--41', 'csv--42', 'csv--43'],
|
||||
// skipped row here
|
||||
// last row empty
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldSkipEmptyLines
|
||||
*
|
||||
* @param string $fileName
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyLines($fileName)
|
||||
public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile($fileName);
|
||||
$allRows = $this->getAllRowsForFile(
|
||||
'csv_with_multiple_empty_lines.csv',
|
||||
',', '"', "\n", EncodingHelper::ENCODING_UTF8,
|
||||
$shouldPreserveEmptyRows = true
|
||||
);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
[''],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
[''],
|
||||
['csv--41', 'csv--42', 'csv--43'],
|
||||
[''],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -204,6 +215,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals('This is, a comma', $allRows[0][0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadCustomEOLs()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_CR_EOL.csv', ',', '"', "\r");
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
@ -236,7 +262,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testReadShouldSkipBom($fileName, $fileEncoding)
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile($fileName, ',', '"', $fileEncoding);
|
||||
$allRows = $this->getAllRowsForFile($fileName, ',', '"', "\n", $fileEncoding);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
@ -275,6 +301,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper|\PHPUnit_Framework_MockObject_MockObject $helperStub */
|
||||
$helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper')
|
||||
->setMethods(['function_exists'])
|
||||
->getMock();
|
||||
@ -405,14 +432,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
* @param string $fileName
|
||||
* @param string|void $fieldDelimiter
|
||||
* @param string|void $fieldEnclosure
|
||||
* @param string|void $endOfLineCharacter
|
||||
* @param string|void $encoding
|
||||
* @param bool|void $shouldPreserveEmptyRows
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile(
|
||||
$fileName,
|
||||
$fieldDelimiter = ',',
|
||||
$fieldEnclosure = '"',
|
||||
$encoding = EncodingHelper::ENCODING_UTF8)
|
||||
$endOfLineCharacter = "\n",
|
||||
$encoding = EncodingHelper::ENCODING_UTF8,
|
||||
$shouldPreserveEmptyRows = false)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
@ -422,7 +453,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$reader
|
||||
->setFieldDelimiter($fieldDelimiter)
|
||||
->setFieldEnclosure($fieldEnclosure)
|
||||
->setEndOfLineCharacter($endOfLineCharacter)
|
||||
->setEncoding($encoding)
|
||||
->setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
@ -436,51 +469,6 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
return $allRows;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadCustomEOL()
|
||||
{
|
||||
return [
|
||||
['csv_with_CR_EOL.csv', "\r"],
|
||||
['csv_standard.csv', "\n"],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadCustomEOL
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param string $customEOL
|
||||
* @return void
|
||||
*/
|
||||
public function testReadCustomEOLs($fileName, $customEOL)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Reader\CSV\Reader $reader */
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader
|
||||
->setEndOfLineCharacter($customEOL)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheet) {
|
||||
foreach ($sheet->getRowIterator() as $row) {
|
||||
$allRows[] = $row;
|
||||
}
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
|
@ -211,15 +211,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyRow()
|
||||
public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods');
|
||||
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods');
|
||||
|
||||
$this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped');
|
||||
|
||||
$expectedRows = [
|
||||
['ods--11', 'ods--12', 'ods--13'],
|
||||
// row skipped here
|
||||
// skipped row here
|
||||
['ods--21', 'ods--22', 'ods--23'],
|
||||
// skipped row here
|
||||
// skipped row here
|
||||
['ods--51', 'ods--52', 'ods--53'],
|
||||
['ods--61', 'ods--62', 'ods--63'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_rows.ods', false, true);
|
||||
|
||||
$this->assertEquals(6, count($allRows), 'There should be 6 rows');
|
||||
|
||||
$expectedRows = [
|
||||
[''],
|
||||
['ods--21', 'ods--22', 'ods--23'],
|
||||
[''],
|
||||
[''],
|
||||
['ods--51', 'ods--52', 'ods--53'],
|
||||
['ods--61', 'ods--62', 'ods--63'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -485,15 +509,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param bool|void $shouldFormatDates
|
||||
* @param bool|void $shouldPreserveEmptyRows
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
|
||||
private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Reader\ODS\Reader $reader */
|
||||
$reader = ReaderFactory::create(Type::ODS);
|
||||
$reader->setShouldFormatDates($shouldFormatDates);
|
||||
$reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
|
@ -352,16 +352,39 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyRows()
|
||||
public function testReadShouldSkipEmptyRowsIfShouldPreserveEmptyRowsNotSet()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx');
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_rows_and_missing_row_index.xlsx');
|
||||
|
||||
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
|
||||
$this->assertEquals(3, count($allRows), 'There should be only 3 rows, because the empty rows are skipped');
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
// skipped row here
|
||||
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
|
||||
['s1--A2', 's1--B2', 's1--C2'],
|
||||
// skipped row here
|
||||
// skipped row here
|
||||
['s1--A5', 's1--B5', 's1--C5'],
|
||||
['s1--A6', 's1--B6', 's1--C6'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldReturnEmptyLinesIfShouldPreserveEmptyRowsSet()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_rows_and_missing_row_index.xlsx', false, true);
|
||||
|
||||
$this->assertEquals(6, count($allRows), 'There should be 6 rows');
|
||||
|
||||
$expectedRows = [
|
||||
[''],
|
||||
['s1--A2', 's1--B2', 's1--C2'],
|
||||
[''],
|
||||
[''],
|
||||
['s1--A5', 's1--B5', 's1--C5'],
|
||||
['s1--A6', 's1--B6', 's1--C6'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -595,15 +618,18 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param bool|void $shouldFormatDates
|
||||
* @param bool|void $shouldPreserveEmptyRows
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
|
||||
private function getAllRowsForFile($fileName, $shouldFormatDates = false, $shouldPreserveEmptyRows = false)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Reader\XLSX\Reader $reader */
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->setShouldFormatDates($shouldFormatDates);
|
||||
$reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
|
@ -1,2 +0,0 @@
|
||||
csv--11,csv--12,csv--13
|
||||
csv--31,csv--32,csv--33
|
|
@ -1,3 +0,0 @@
|
||||
csv--11,csv--12,csv--13
|
||||
|
||||
csv--31,csv--32,csv--33
|
|
5
tests/resources/csv/csv_with_multiple_empty_lines.csv
Normal file
5
tests/resources/csv/csv_with_multiple_empty_lines.csv
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
csv--21,csv--22,csv--23
|
||||
|
||||
csv--41,csv--42,csv--43
|
||||
|
|
Binary file not shown.
BIN
tests/resources/ods/sheet_with_empty_rows.ods
Normal file
BIN
tests/resources/ods/sheet_with_empty_rows.ods
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user