ODS Reader

Spout can now read ODS files.
It's on par with the XLSX reader. The only difference is that the row iterator cannot be rewound.
It supports the different output formats from LibreOffice and Excel, skipping extra rows/cells if needed.
This commit is contained in:
Adrien Loison 2015-08-28 23:15:42 -07:00
parent 3f0016f753
commit e4154dfdc3
35 changed files with 1025 additions and 65 deletions

View File

@ -0,0 +1,12 @@
<?php
namespace Box\Spout\Reader\Exception;
/**
* Class IteratorNotRewindableException
*
* @package Box\Spout\Reader\Exception
*/
class IteratorNotRewindableException extends ReaderException
{
}

View File

@ -0,0 +1,62 @@
<?php
namespace Box\Spout\Reader\ODS;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\AbstractReader;
/**
* Class Reader
* This class provides support to read data from a ODS file
*
* @package Box\Spout\Reader\ODS
*/
class Reader extends AbstractReader
{
/** @var \ZipArchive */
protected $zip;
/** @var SheetIterator To iterator over the ODS sheets */
protected $sheetIterator;
/**
* Opens the file at the given file path to make it ready to be read.
*
* @param string $filePath Path of the file to be read
* @return void
* @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
protected function openReader($filePath)
{
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
$this->sheetIterator = new SheetIterator($filePath);
} else {
throw new IOException("Could not open $filePath for reading.");
}
}
/**
* Returns an iterator to iterate over sheets.
*
* @return SheetIterator To iterate over sheets
*/
public function getConcreteSheetIterator()
{
return $this->sheetIterator;
}
/**
* Closes the reader. To be used after reading the file.
*
* @return void
*/
protected function closeReader()
{
if ($this->zip) {
$this->zip->close();
}
}
}

View File

@ -0,0 +1,314 @@
<?php
namespace Box\Spout\Reader\ODS;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
* Class RowIterator
*
* @package Box\Spout\Reader\ODS
*/
class RowIterator implements IteratorInterface
{
/** Definition of all possible cell types */
const CELL_TYPE_STRING = 'string';
const CELL_TYPE_BOOLEAN = 'boolean';
const CELL_TYPE_FLOAT = 'float';
/** Definition of XML nodes names used to parse data */
const XML_NODE_TABLE = 'table:table';
const XML_NODE_ROW = 'table:table-row';
const XML_NODE_CELL = 'table:table-cell';
const XML_NODE_P = 'p';
const XML_NODE_S = 'text:s';
/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_TYPE = 'office:value-type';
const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
const XML_ATTRIBUTE_C = 'text:c';
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var bool Whether the iterator has already been rewound once */
protected $hasAlreadyBeenRewound = false;
/** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */
protected $escaper;
/** @var int Number of read rows */
protected $numReadRows = 0;
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = null;
/** @var bool Indicates whether all rows have been read */
protected $hasReachedEndOfFile = false;
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
*/
public function __construct($xmlReader)
{
$this->xmlReader = $xmlReader;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\ODS();
}
/**
* Rewind the Iterator to the first element.
* NOTE: It can only be done once, as it is not possible to read an XML file backwards.
* @link http://php.net/manual/en/iterator.rewind.php
*
* @return void
* @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
*/
public function rewind()
{
// Because sheet and row data is located in the file, we can't rewind both the
// sheet iterator and the row iterator, as XML file cannot be read backwards.
// Therefore, rewinding the row iterator has been disabled.
if ($this->hasAlreadyBeenRewound) {
throw new IteratorNotRewindableException();
}
$this->hasAlreadyBeenRewound = true;
$this->numReadRows = 0;
$this->rowDataBuffer = null;
$this->hasReachedEndOfFile = false;
$this->next();
}
/**
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
*/
public function valid()
{
return (!$this->hasReachedEndOfFile);
}
/**
* Move forward to next element. Empty rows will be skipped.
* @link http://php.net/manual/en/iterator.next.php
*
* @return void
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
*/
public function next()
{
$rowData = [];
$cellValue = null;
$numColumnsRepeated = 1;
$numCellsRead = 0;
$hasAlreadyReadOneCell = false;
try {
while ($this->xmlReader->read()) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
$node = $this->xmlReader->expand();
$currentCellValue = $this->getCellValue($node);
// process cell N only after having read cell N+1 (see below why)
if ($hasAlreadyReadOneCell) {
for ($i = 0; $i < $numColumnsRepeated; $i++) {
$rowData[] = $cellValue;
}
}
$cellValue = $currentCellValue;
$numColumnsRepeated = $currentNumColumnsRepeated;
$numCellsRead++;
$hasAlreadyReadOneCell = true;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
$isEmptyRow = ($numCellsRead <= 1 && empty($cellValue));
if ($isEmptyRow) {
// skip empty rows
$this->next();
return;
}
// Only add value if the last read cell is not empty or does not need to repeat cell values.
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with always 16384 cells.
if (!empty($cellValue) || $numColumnsRepeated === 1) {
for ($i = 0; $i < $numColumnsRepeated; $i++) {
$rowData[] = $cellValue;
}
$this->numReadRows++;
}
break;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
// The closing "</table:table>" marks the end of the file
$this->hasReachedEndOfFile = true;
break;
}
}
} catch (XMLProcessingException $exception) {
throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
}
$this->rowDataBuffer = $rowData;
}
/**
* @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
*/
protected function getNumColumnsRepeatedForCurrentNode()
{
$numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
}
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
* @TODO Add other types !!
*
* @param \DOMNode $node
* @return string|int|float|bool The value associated with the cell (or empty string if cell's type is undefined)
*/
protected function getCellValue($node)
{
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
$pNodeValue = $this->getTextPNodeValue($node);
switch ($cellType) {
case self::CELL_TYPE_STRING:
return $this->formatStringCellValue($node);
case self::CELL_TYPE_FLOAT:
return $this->formatFloatCellValue($pNodeValue);
case self::CELL_TYPE_BOOLEAN:
return $this->formatBooleanCellValue($pNodeValue);
default:
return '';
}
}
/**
* Returns the value of the first "<text:p>" node within the given node.
*
* @param \DOMNode $node
* @return string Value for the first "<text:p>" node or empty string if no "<text:p>" found
*/
protected function getTextPNodeValue($node)
{
$nodeValue = '';
$pNodes = $node->getElementsByTagName(self::XML_NODE_P);
if ($pNodes->length > 0) {
$nodeValue = $pNodes->item(0)->nodeValue;
}
return $nodeValue;
}
/**
* Returns the cell String value.
*
* @param \DOMNode $node
* @return string The value associated with the cell
*/
protected function formatStringCellValue($node)
{
$pNodeValues = [];
$pNodes = $node->getElementsByTagName(self::XML_NODE_P);
foreach ($pNodes as $pNode) {
$currentPValue = '';
foreach ($pNode->childNodes as $childNode) {
if ($childNode instanceof \DOMText) {
$currentPValue .= $childNode->nodeValue;
} else if ($childNode->nodeName === self::XML_NODE_S) {
$spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C);
$numSpaces = (!empty($spaceAttribute)) ? intval($spaceAttribute) : 1;
$currentPValue .= str_repeat(' ', $numSpaces);
}
}
$pNodeValues[] = $currentPValue;
}
$escapedCellValue = implode("\n", $pNodeValues);
$cellValue = $this->escaper->unescape($escapedCellValue);
return $cellValue;
}
/**
* Returns the cell Numeric value from string of nodeValue.
*
* @param string $pNodeValue
* @return int|float The value associated with the cell
*/
protected function formatFloatCellValue($pNodeValue)
{
$cellValue = is_int($pNodeValue) ? intval($pNodeValue) : floatval($pNodeValue);
return $cellValue;
}
/**
* Returns the cell Boolean value from a specific node's Value.
*
* @param string $pNodeValue
* @return bool The value associated with the cell
*/
protected function formatBooleanCellValue($pNodeValue)
{
// !! is similar to boolval()
$cellValue = !!$pNodeValue;
return $cellValue;
}
/**
* Return the current element, from the buffer.
* @link http://php.net/manual/en/iterator.current.php
*
* @return array|null
*/
public function current()
{
return $this->rowDataBuffer;
}
/**
* Return the key of the current element
* @link http://php.net/manual/en/iterator.key.php
*
* @return int
*/
public function key()
{
return $this->numReadRows;
}
/**
* Cleans up what was created to iterate over the object.
*
* @return void
*/
public function end()
{
$this->xmlReader->close();
}
}

View File

@ -0,0 +1,63 @@
<?php
namespace Box\Spout\Reader\ODS;
use Box\Spout\Reader\SheetInterface;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
* Class Sheet
* Represents a sheet within a ODS file
*
* @package Box\Spout\Reader\ODS
*/
class Sheet implements SheetInterface
{
/** @var RowIterator To iterate over sheet's rows */
protected $rowIterator;
/** @var int ID of the sheet */
protected $id;
/** @var int Index of the sheet, based on order of creation (zero-based) */
protected $index;
/** @var string Name of the sheet */
protected $name;
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($xmlReader, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($xmlReader);
$this->index = $sheetIndex;
$this->name = $sheetName;
}
/**
* @return RowIterator
*/
public function getRowIterator()
{
return $this->rowIterator;
}
/**
* @return int Index of the sheet, based on order of creation (zero-based)
*/
public function getIndex()
{
return $this->index;
}
/**
* @return string Name of the sheet
*/
public function getName()
{
return $this->name;
}
}

View File

@ -0,0 +1,135 @@
<?php
namespace Box\Spout\Reader\ODS;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
* Class SheetIterator
* Iterate over ODS sheet.
*
* @package Box\Spout\Reader\ODS
*/
class SheetIterator implements IteratorInterface
{
/** Definition of XML nodes name and attribute used to parse sheet data */
const XML_NODE_TABLE = 'table:table';
const XML_ATTRIBUTE_TABLE_NAME = 'table:name';
/** @var string $filePath Path of the file to be read */
protected $filePath;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */
protected $escaper;
/** @var bool Whether there are still at least a sheet to be read */
protected $hasFoundSheet;
/** @var int The index of the sheet being read (zero-based) */
protected $currentSheetIndex;
/**
* @param string $filePath Path of the file to be read
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath)
{
$this->filePath = $filePath;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\ODS();
}
/**
* Rewind the Iterator to the first element
* @link http://php.net/manual/en/iterator.rewind.php
*
* @return void
* @throws \Box\Spout\Common\Exception\IOException If unable to open the XML file containing sheets' data
*/
public function rewind()
{
$this->xmlReader->close();
$contentXmlFilePath = $this->filePath . '#content.xml';
if ($this->xmlReader->open('zip://' . $contentXmlFilePath) === false) {
throw new IOException("Could not open \"{$contentXmlFilePath}\".");
}
try {
$this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE);
} catch (XMLProcessingException $exception) {
throw new IOException("The content.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
}
$this->currentSheetIndex = 0;
}
/**
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
*/
public function valid()
{
return $this->hasFoundSheet;
}
/**
* Move forward to next element
* @link http://php.net/manual/en/iterator.next.php
*
* @return void
*/
public function next()
{
$this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE);
if ($this->hasFoundSheet) {
$this->currentSheetIndex++;
}
}
/**
* Return the current element
* @link http://php.net/manual/en/iterator.current.php
*
* @return Sheet
*/
public function current()
{
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
$sheetName = $this->escaper->unescape($escapedSheetName);
return new Sheet($this->xmlReader, $sheetName, $this->currentSheetIndex);
}
/**
* Return the key of the current element
* @link http://php.net/manual/en/iterator.key.php
*
* @return int
*/
public function key()
{
return $this->currentSheetIndex + 1;
}
/**
* Cleans up what was created to iterate over the object.
*
* @return void
*/
public function end()
{
$this->xmlReader->close();
}
}

View File

@ -33,6 +33,9 @@ class ReaderFactory
case Type::XLSX:
$reader = new XLSX\Reader();
break;
case Type::ODS:
$reader = new ODS\Reader();
break;
default:
throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType);
}

View File

@ -138,4 +138,22 @@ class XMLReader extends \XMLReader
return $wasNextSuccessful;
}
/**
* @param string $nodeName
* @return bool Whether the XML Reader is currently positioned on the starting node with given name
*/
public function isPositionedOnStartingNode($nodeName)
{
return ($this->nodeType === XMLReader::ELEMENT && $this->name === $nodeName);
}
/**
* @param string $nodeName
* @return bool Whether the XML Reader is currently positioned on the ending node with given name
*/
public function isPositionedOnEndingNode($nodeName)
{
return ($this->nodeType === XMLReader::END_ELEMENT && $this->name === $nodeName);
}
}

View File

@ -83,7 +83,7 @@ class SharedStringsHelper
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$sharedStringsFilePath = $this->getSharedStringsFilePath();
if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) {
if ($xmlReader->open($sharedStringsFilePath) === false) {
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
}

View File

@ -101,7 +101,6 @@ class SheetHelper
*/
protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased)
{
$sheetId = $sheetIndexZeroBased + 1;
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
/*
@ -123,7 +122,6 @@ class SheetHelper
if (count($sheetNodes) === 1) {
$sheetNode = $sheetNodes[0];
$sheetId = (int) $sheetNode->getAttribute('sheetId');
$escapedSheetName = $sheetNode->getAttribute('name');
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
@ -132,7 +130,7 @@ class SheetHelper
}
}
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetId, $sheetIndexZeroBased, $sheetName);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
}
/**

View File

@ -77,6 +77,7 @@ class RowIterator implements IteratorInterface
$this->sharedStringsHelper = $sharedStringsHelper;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\XLSX();
}
@ -143,7 +144,7 @@ class RowIterator implements IteratorInterface
try {
while ($this->xmlReader->read()) {
if ($this->xmlReader->nodeType === XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
// Read dimensions of the sheet
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
@ -151,7 +152,7 @@ class RowIterator implements IteratorInterface
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
}
} else if ($this->xmlReader->nodeType === XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
// Start of the row description
$isInsideRowTag = true;
@ -164,7 +165,7 @@ class RowIterator implements IteratorInterface
}
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
} else if ($isInsideRowTag && $this->xmlReader->nodeType === XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) {
} else if ($isInsideRowTag && $this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
@ -172,16 +173,17 @@ class RowIterator implements IteratorInterface
$node = $this->xmlReader->expand();
$rowData[$currentColumnIndex] = $this->getCellValue($node);
} else if ($this->xmlReader->nodeType === XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
// If needed, we fill the empty cells
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
$this->numReadRows++;
break;
} else if ($this->xmlReader->nodeType === XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) {
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
// The closing "</worksheet>" marks the end of the file
$this->hasReachedEndOfFile = true;
break;
}
}
@ -192,6 +194,40 @@ class RowIterator implements IteratorInterface
$this->rowDataBuffer = $rowData;
}
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMNode $node
* @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
*/
protected function getCellValue($node)
{
// Default cell type is "n"
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
$vNodeValue = $this->getVNodeValue($node);
if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
return $vNodeValue;
}
switch ($cellType) {
case self::CELL_TYPE_INLINE_STRING:
return $this->formatInlineStringCellValue($node);
case self::CELL_TYPE_SHARED_STRING:
return $this->formatSharedStringCellValue($vNodeValue);
case self::CELL_TYPE_STR:
return $this->formatStrCellValue($vNodeValue);
case self::CELL_TYPE_BOOLEAN:
return $this->formatBooleanCellValue($vNodeValue);
case self::CELL_TYPE_NUMERIC:
return $this->formatNumericCellValue($vNodeValue);
case self::CELL_TYPE_DATE:
return $this->formatDateCellValue($vNodeValue);
default:
return null;
}
}
/**
* Returns the cell's string value from a node's nested value node
*
@ -203,10 +239,7 @@ class RowIterator implements IteratorInterface
// for cell types having a "v" tag containing the value.
// if not, the returned value should be empty string.
$vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
if ($vNode !== null) {
return $vNode->nodeValue;
}
return "";
return ($vNode !== null) ? $vNode->nodeValue : '';
}
/**
@ -296,40 +329,6 @@ class RowIterator implements IteratorInterface
}
}
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMNode $node
* @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
*/
protected function getCellValue($node)
{
// Default cell type is "n"
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
$vNodeValue = $this->getVNodeValue($node);
if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
return $vNodeValue;
}
switch ($cellType) {
case self::CELL_TYPE_INLINE_STRING:
return $this->formatInlineStringCellValue($node);
case self::CELL_TYPE_SHARED_STRING:
return $this->formatSharedStringCellValue($vNodeValue);
case self::CELL_TYPE_STR:
return $this->formatStrCellValue($vNodeValue);
case self::CELL_TYPE_BOOLEAN:
return $this->formatBooleanCellValue($vNodeValue);
case self::CELL_TYPE_NUMERIC:
return $this->formatNumericCellValue($vNodeValue);
case self::CELL_TYPE_DATE:
return $this->formatDateCellValue($vNodeValue);
default:
return null;
}
}
/**
* Return the current element, from the buffer.
* @link http://php.net/manual/en/iterator.current.php

View File

@ -15,9 +15,6 @@ class Sheet implements SheetInterface
/** @var RowIterator To iterate over sheet's rows */
protected $rowIterator;
/** @var int ID of the sheet */
protected $id;
/** @var int Index of the sheet, based on order of creation (zero-based) */
protected $index;
@ -28,14 +25,12 @@ class Sheet implements SheetInterface
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings
* @param int $sheetId ID of the sheet
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetId, $sheetIndex, $sheetName)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper);
$this->id = $sheetId;
$this->index = $sheetIndex;
$this->name = $sheetName;
}
@ -48,14 +43,6 @@ class Sheet implements SheetInterface
return $this->rowIterator;
}
/**
* @return int ID of the sheet
*/
public function getId()
{
return $this->id;
}
/**
* @return int Index of the sheet, based on order of creation (zero-based)
*/

View File

@ -0,0 +1,371 @@
<?php
namespace Box\Spout\Reader\ODS;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Common\Type;
use Box\Spout\Reader\ReaderFactory;
use Box\Spout\TestUsingResource;
/**
* Class ReaderTest
*
* @package Box\Spout\Reader\ODS
*/
class ReaderTest extends \PHPUnit_Framework_TestCase
{
use TestUsingResource;
/**
* @return array
*/
public function dataProviderForTestReadShouldThrowException()
{
return [
['/path/to/fake/file.ods'],
['file_corrupted.ods'],
];
}
/**
* @dataProvider dataProviderForTestReadShouldThrowException
* @expectedException \Box\Spout\Common\Exception\IOException
*
* @param string $filePath
* @return void
*/
public function testReadShouldThrowException($filePath)
{
// using @ to prevent warnings/errors from being displayed
@$this->getAllRowsForFile($filePath);
}
/**
* @return array
*/
public function dataProviderForTestReadForAllWorksheets()
{
return [
['one_sheet_with_strings.ods', 2, 3],
['two_sheets_with_strings.ods', 4, 3],
];
}
/**
* @dataProvider dataProviderForTestReadForAllWorksheets
*
* @param string $resourceName
* @param int $expectedNumOfRows
* @param int $expectedNumOfCellsPerRow
* @return void
*/
public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow)
{
$allRows = $this->getAllRowsForFile($resourceName);
$this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows");
foreach ($allRows as $row) {
$this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row");
}
}
/**
* @return void
*/
public function testReadShouldSupportRowWithOnlyOneCell()
{
$allRows = $this->getAllRowsForFile('sheet_with_only_one_cell.ods');
$this->assertEquals([['foo']], $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportNumberColumnsRepeated()
{
$allRows = $this->getAllRowsForFile('sheet_with_number_columns_repeated.ods');
$expectedRows = [
[
'foo', 'foo', 'foo',
'', '',
true, true,
10.43, 10.43, 10.43, 10.43,
],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return array
*/
public function dataProviderForTestReadWithFilesGeneratedByExternalSoftwares()
{
return [
['file_generated_by_libre_office.ods', true],
['file_generated_by_excel_2010_windows.ods', false],
['file_generated_by_excel_office_online.ods', false],
];
}
/**
* @dataProvider dataProviderForTestReadWithFilesGeneratedByExternalSoftwares
* The files contain styles, different value types, gaps between cells,
* repeated values, empty row, different number of cells per row.
*
* @param bool $skipLastEmptyValues
* @param string $fileName
* @return void
*/
public function testReadWithFilesGeneratedByExternalSoftwares($fileName, $skipLastEmptyValues)
{
$allRows = $this->getAllRowsForFile($fileName);
$expectedRows = [
['header1','header2','header3','header4'],
['val11','val12','val13','val14'],
['val21','','val23','val23'],
['', 10.43, 29.11],
];
// In the description of the last cell, Excel specifies that the empty value needs to be repeated
// a lot of times (16384 - number of cells used in the row). To avoid creating 16384 cells all the time,
// this cell is skipped alltogether.
if ($skipLastEmptyValues) {
$expectedRows[3][] = '';
}
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportAllCellTypes()
{
$allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.ods');
$expectedRows = [
[
'ods--11', 'ods--12',
true, false,
0, 10.43,
'',
],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldReturnEmptyStringOnUndefinedCellType()
{
$allRows = $this->getAllRowsForFile('sheet_with_undefined_value_type.ods');
$this->assertEquals([['ods--11', '', 'ods--13']], $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportMultilineStrings()
{
$allRows = $this->getAllRowsForFile('sheet_with_multiline_string.ods');
$expectedRows = [["string\non multiple\nlines!"]];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldSkipEmptyRow()
{
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods');
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
$expectedRows = [
['ods--11', 'ods--12', 'ods--13'],
// row skipped here
['ods--21', 'ods--22', 'ods--23'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldPreserveSpacing()
{
$allRows = $this->getAllRowsForFile('sheet_with_various_spaces.ods');
$expectedRow = [
' 4 spaces before and after ',
' 1 space before and after ',
'2 spaces after ',
' 2 spaces before',
"3 spaces in the middle\nand 2 spaces in the middle",
];
$this->assertEquals([$expectedRow], $allRows);
}
/**
* @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used)
*
* @return void
*/
public function testReadShouldBeProtectedAgainstBillionLaughAttack()
{
$startTime = microtime(true);
$fileName = 'attack_billion_laughs.ods';
try {
// using @ to prevent warnings/errors from being displayed
@$this->getAllRowsForFile($fileName);
$this->fail('An exception should have been thrown');
} catch (IOException $exception) {
$duration = microtime(true) - $startTime;
$this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.');
$expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB
$this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.');
}
}
/**
* @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used)
*
* @return void
*/
public function testReadShouldBeProtectedAgainstQuadraticBlowupAttack()
{
$startTime = microtime(true);
$fileName = 'attack_quadratic_blowup.ods';
$allRows = $this->getAllRowsForFile($fileName);
$this->assertEquals('', $allRows[0][0], 'Entities should not have been expanded');
$duration = microtime(true) - $startTime;
$this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.');
$expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB
$this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.');
}
/**
* @return void
*/
public function testReadShouldBeAbleToProcessEmptySheets()
{
$allRows = $this->getAllRowsForFile('sheet_with_no_cells.ods');
$this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.');
}
/**
* @return void
*/
public function testReadShouldSkipFormulas()
{
$allRows = $this->getAllRowsForFile('sheet_with_formulas.ods');
$expectedRows = [
['val1', 'val2', 'total1', 'total2'],
[10, 20, 30, 21],
[11, 21, 32, 41],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @expectedException \Box\Spout\Reader\Exception\IteratorNotRewindableException
*
* @return void
*/
public function testReadShouldThrowIfTryingToRewindRowIterator()
{
$resourcePath = $this->getResourcePath('one_sheet_with_strings.ods');
$reader = ReaderFactory::create(Type::ODS);
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheet) {
// start looping throw the rows
foreach ($sheet->getRowIterator() as $row) {
break;
}
// this will rewind the row iterator
foreach ($sheet->getRowIterator() as $row) {
break;
}
}
}
/**
* @return void
*/
public function testReadMultipleTimesShouldRewindReader()
{
$allRows = [];
$resourcePath = $this->getResourcePath('two_sheets_with_strings.ods');
$reader = ReaderFactory::create(Type::ODS);
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheet) {
// do nothing
}
// this loop should only add the first row of each sheet
foreach ($reader->getSheetIterator() as $sheet) {
foreach ($sheet->getRowIterator() as $row) {
$allRows[] = $row;
break;
}
}
// this loop should only add the first row of the first sheet
foreach ($reader->getSheetIterator() as $sheet) {
foreach ($sheet->getRowIterator() as $row) {
$allRows[] = $row;
break;
}
// stop reading more sheets
break;
}
$reader->close();
$expectedRows = [
['ods--sheet1--11', 'ods--sheet1--12', 'ods--sheet1--13'], // 1st row, 1st sheet
['ods--sheet2--11', 'ods--sheet2--12', 'ods--sheet2--13'], // 1st row, 2nd sheet
['ods--sheet1--11', 'ods--sheet1--12', 'ods--sheet1--13'], // 1st row, 1st sheet
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @param string $fileName
* @return array All the read rows the given file
*/
private function getAllRowsForFile($fileName)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderFactory::create(Type::ODS);
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
$allRows[] = $row;
}
}
$reader->close();
return $allRows;
}
}

View File

@ -173,12 +173,13 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
*/
public function testReadShouldSkipEmptyRows()
{
$allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx');
$allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx');
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
$expectedRows = [
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
// skipped row here
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
];
$this->assertEquals($expectedRows, $allRows);

View File

@ -24,11 +24,9 @@ class SheetTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('CustomName1', $sheets[0]->getName());
$this->assertEquals(0, $sheets[0]->getIndex());
$this->assertEquals(1, $sheets[0]->getId());
$this->assertEquals('CustomName2', $sheets[1]->getName());
$this->assertEquals(1, $sheets[1]->getIndex());
$this->assertEquals(2, $sheets[1]->getId());
}
/**

View File

@ -5,7 +5,6 @@ namespace Box\Spout\Writer\ODS;
use Box\Spout\Common\Type;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\TestUsingResource;
use Box\Spout\Writer\Style\StyleBuilder;
use Box\Spout\Writer\WriterFactory;
/**

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.