diff --git a/src/Spout/Reader/Exception/IteratorNotRewindableException.php b/src/Spout/Reader/Exception/IteratorNotRewindableException.php new file mode 100644 index 0000000..0277fa3 --- /dev/null +++ b/src/Spout/Reader/Exception/IteratorNotRewindableException.php @@ -0,0 +1,12 @@ +zip = new \ZipArchive(); + + if ($this->zip->open($filePath) === true) { + $this->sheetIterator = new SheetIterator($filePath); + } else { + throw new IOException("Could not open $filePath for reading."); + } + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getConcreteSheetIterator() + { + return $this->sheetIterator; + } + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->zip) { + $this->zip->close(); + } + } +} diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php new file mode 100644 index 0000000..1130226 --- /dev/null +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -0,0 +1,314 @@ +" element + */ + public function __construct($xmlReader) + { + $this->xmlReader = $xmlReader; + + /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ + $this->escaper = new \Box\Spout\Common\Escaper\ODS(); + } + + /** + * Rewind the Iterator to the first element. + * NOTE: It can only be done once, as it is not possible to read an XML file backwards. + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once + */ + public function rewind() + { + // Because sheet and row data is located in the file, we can't rewind both the + // sheet iterator and the row iterator, as XML file cannot be read backwards. + // Therefore, rewinding the row iterator has been disabled. + if ($this->hasAlreadyBeenRewound) { + throw new IteratorNotRewindableException(); + } + + $this->hasAlreadyBeenRewound = true; + $this->numReadRows = 0; + $this->rowDataBuffer = null; + $this->hasReachedEndOfFile = false; + + $this->next(); + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows will be skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + public function next() + { + $rowData = []; + $cellValue = null; + $numColumnsRepeated = 1; + $numCellsRead = 0; + $hasAlreadyReadOneCell = false; + + try { + while ($this->xmlReader->read()) { + if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { + // Start of a cell description + $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode(); + + $node = $this->xmlReader->expand(); + $currentCellValue = $this->getCellValue($node); + + // process cell N only after having read cell N+1 (see below why) + if ($hasAlreadyReadOneCell) { + for ($i = 0; $i < $numColumnsRepeated; $i++) { + $rowData[] = $cellValue; + } + } + + $cellValue = $currentCellValue; + $numColumnsRepeated = $currentNumColumnsRepeated; + + $numCellsRead++; + $hasAlreadyReadOneCell = true; + + } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { + // End of the row description + $isEmptyRow = ($numCellsRead <= 1 && empty($cellValue)); + if ($isEmptyRow) { + // skip empty rows + $this->next(); + return; + } + + // Only add value if the last read cell is not empty or does not need to repeat cell values. + // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" + // with a number-columns-repeated value equals to the number of (supported columns - used columns). + // In Excel, the number of supported columns is 16384, but we don't want to returns rows with always 16384 cells. + if (!empty($cellValue) || $numColumnsRepeated === 1) { + for ($i = 0; $i < $numColumnsRepeated; $i++) { + $rowData[] = $cellValue; + } + + $this->numReadRows++; + } + break; + + } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + break; + } + } + + } catch (XMLProcessingException $exception) { + throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); + } + + $this->rowDataBuffer = $rowData; + } + + /** + * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing + */ + protected function getNumColumnsRepeatedForCurrentNode() + { + $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); + return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; + } + + /** + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. + * @TODO Add other types !! + * + * @param \DOMNode $node + * @return string|int|float|bool The value associated with the cell (or empty string if cell's type is undefined) + */ + protected function getCellValue($node) + { + $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE); + $pNodeValue = $this->getTextPNodeValue($node); + + switch ($cellType) { + case self::CELL_TYPE_STRING: + return $this->formatStringCellValue($node); + case self::CELL_TYPE_FLOAT: + return $this->formatFloatCellValue($pNodeValue); + case self::CELL_TYPE_BOOLEAN: + return $this->formatBooleanCellValue($pNodeValue); + default: + return ''; + } + } + + /** + * Returns the value of the first "" node within the given node. + * + * @param \DOMNode $node + * @return string Value for the first "" node or empty string if no "" found + */ + protected function getTextPNodeValue($node) + { + $nodeValue = ''; + $pNodes = $node->getElementsByTagName(self::XML_NODE_P); + + if ($pNodes->length > 0) { + $nodeValue = $pNodes->item(0)->nodeValue; + } + + return $nodeValue; + } + + /** + * Returns the cell String value. + * + * @param \DOMNode $node + * @return string The value associated with the cell + */ + protected function formatStringCellValue($node) + { + $pNodeValues = []; + $pNodes = $node->getElementsByTagName(self::XML_NODE_P); + + foreach ($pNodes as $pNode) { + $currentPValue = ''; + + foreach ($pNode->childNodes as $childNode) { + if ($childNode instanceof \DOMText) { + $currentPValue .= $childNode->nodeValue; + } else if ($childNode->nodeName === self::XML_NODE_S) { + $spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C); + $numSpaces = (!empty($spaceAttribute)) ? intval($spaceAttribute) : 1; + $currentPValue .= str_repeat(' ', $numSpaces); + } + } + + $pNodeValues[] = $currentPValue; + } + + $escapedCellValue = implode("\n", $pNodeValues); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value from string of nodeValue. + * + * @param string $pNodeValue + * @return int|float The value associated with the cell + */ + protected function formatFloatCellValue($pNodeValue) + { + $cellValue = is_int($pNodeValue) ? intval($pNodeValue) : floatval($pNodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value from a specific node's Value. + * + * @param string $pNodeValue + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue($pNodeValue) + { + // !! is similar to boolval() + $cellValue = !!$pNodeValue; + return $cellValue; + } + + /** + * Return the current element, from the buffer. + * @link http://php.net/manual/en/iterator.current.php + * + * @return array|null + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + $this->xmlReader->close(); + } +} diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php new file mode 100644 index 0000000..c023182 --- /dev/null +++ b/src/Spout/Reader/ODS/Sheet.php @@ -0,0 +1,63 @@ +" element + * @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) + * @param string $sheetName Name of the sheet + */ + public function __construct($xmlReader, $sheetIndex, $sheetName) + { + $this->rowIterator = new RowIterator($xmlReader); + $this->index = $sheetIndex; + $this->name = $sheetName; + } + + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } + + /** + * @return int Index of the sheet, based on order of creation (zero-based) + */ + public function getIndex() + { + return $this->index; + } + + /** + * @return string Name of the sheet + */ + public function getName() + { + return $this->name; + } +} diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php new file mode 100644 index 0000000..f8b9203 --- /dev/null +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -0,0 +1,135 @@ +filePath = $filePath; + $this->xmlReader = new XMLReader(); + + /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ + $this->escaper = new \Box\Spout\Common\Escaper\ODS(); + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + * @throws \Box\Spout\Common\Exception\IOException If unable to open the XML file containing sheets' data + */ + public function rewind() + { + $this->xmlReader->close(); + + $contentXmlFilePath = $this->filePath . '#content.xml'; + if ($this->xmlReader->open('zip://' . $contentXmlFilePath) === false) { + throw new IOException("Could not open \"{$contentXmlFilePath}\"."); + } + + try { + $this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE); + } catch (XMLProcessingException $exception) { + throw new IOException("The content.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); + } + + $this->currentSheetIndex = 0; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return $this->hasFoundSheet; + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE); + + if ($this->hasFoundSheet) { + $this->currentSheetIndex++; + } + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); + $sheetName = $this->escaper->unescape($escapedSheetName); + + return new Sheet($this->xmlReader, $sheetName, $this->currentSheetIndex); + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->currentSheetIndex + 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + $this->xmlReader->close(); + } +} diff --git a/src/Spout/Reader/ReaderFactory.php b/src/Spout/Reader/ReaderFactory.php index 0e39f59..3a32094 100644 --- a/src/Spout/Reader/ReaderFactory.php +++ b/src/Spout/Reader/ReaderFactory.php @@ -33,6 +33,9 @@ class ReaderFactory case Type::XLSX: $reader = new XLSX\Reader(); break; + case Type::ODS: + $reader = new ODS\Reader(); + break; default: throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType); } diff --git a/src/Spout/Reader/Wrapper/XMLReader.php b/src/Spout/Reader/Wrapper/XMLReader.php index fd33359..d48013f 100644 --- a/src/Spout/Reader/Wrapper/XMLReader.php +++ b/src/Spout/Reader/Wrapper/XMLReader.php @@ -138,4 +138,22 @@ class XMLReader extends \XMLReader return $wasNextSuccessful; } + + /** + * @param string $nodeName + * @return bool Whether the XML Reader is currently positioned on the starting node with given name + */ + public function isPositionedOnStartingNode($nodeName) + { + return ($this->nodeType === XMLReader::ELEMENT && $this->name === $nodeName); + } + + /** + * @param string $nodeName + * @return bool Whether the XML Reader is currently positioned on the ending node with given name + */ + public function isPositionedOnEndingNode($nodeName) + { + return ($this->nodeType === XMLReader::END_ELEMENT && $this->name === $nodeName); + } } diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php index 9c4f746..6aafb52 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -83,7 +83,7 @@ class SharedStringsHelper $escaper = new \Box\Spout\Common\Escaper\XLSX(); $sharedStringsFilePath = $this->getSharedStringsFilePath(); - if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) { + if ($xmlReader->open($sharedStringsFilePath) === false) { throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); } diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index 577d58d..b1c393e 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -101,7 +101,6 @@ class SheetHelper */ protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased) { - $sheetId = $sheetIndexZeroBased + 1; $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); /* @@ -123,7 +122,6 @@ class SheetHelper if (count($sheetNodes) === 1) { $sheetNode = $sheetNodes[0]; - $sheetId = (int) $sheetNode->getAttribute('sheetId'); $escapedSheetName = $sheetNode->getAttribute('name'); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ @@ -132,7 +130,7 @@ class SheetHelper } } - return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetId, $sheetIndexZeroBased, $sheetName); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName); } /** diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index ed9db60..5b266e1 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -77,6 +77,7 @@ class RowIterator implements IteratorInterface $this->sharedStringsHelper = $sharedStringsHelper; $this->xmlReader = new XMLReader(); + /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->escaper = new \Box\Spout\Common\Escaper\XLSX(); } @@ -143,7 +144,7 @@ class RowIterator implements IteratorInterface try { while ($this->xmlReader->read()) { - if ($this->xmlReader->nodeType === XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) { + if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { // Read dimensions of the sheet $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { @@ -151,7 +152,7 @@ class RowIterator implements IteratorInterface $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; } - } else if ($this->xmlReader->nodeType === XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { // Start of the row description $isInsideRowTag = true; @@ -164,7 +165,7 @@ class RowIterator implements IteratorInterface } $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - } else if ($isInsideRowTag && $this->xmlReader->nodeType === XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) { + } else if ($isInsideRowTag && $this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { // Start of a cell description $currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); @@ -172,16 +173,17 @@ class RowIterator implements IteratorInterface $node = $this->xmlReader->expand(); $rowData[$currentColumnIndex] = $this->getCellValue($node); - } else if ($this->xmlReader->nodeType === XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { // End of the row description // If needed, we fill the empty cells $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); $this->numReadRows++; break; - } else if ($this->xmlReader->nodeType === XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) { + } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { // The closing "" marks the end of the file $this->hasReachedEndOfFile = true; + break; } } @@ -192,6 +194,40 @@ class RowIterator implements IteratorInterface $this->rowDataBuffer = $rowData; } + /** + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. + * + * @param \DOMNode $node + * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function getCellValue($node) + { + // Default cell type is "n" + $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; + $vNodeValue = $this->getVNodeValue($node); + + if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { + return $vNodeValue; + } + + switch ($cellType) { + case self::CELL_TYPE_INLINE_STRING: + return $this->formatInlineStringCellValue($node); + case self::CELL_TYPE_SHARED_STRING: + return $this->formatSharedStringCellValue($vNodeValue); + case self::CELL_TYPE_STR: + return $this->formatStrCellValue($vNodeValue); + case self::CELL_TYPE_BOOLEAN: + return $this->formatBooleanCellValue($vNodeValue); + case self::CELL_TYPE_NUMERIC: + return $this->formatNumericCellValue($vNodeValue); + case self::CELL_TYPE_DATE: + return $this->formatDateCellValue($vNodeValue); + default: + return null; + } + } + /** * Returns the cell's string value from a node's nested value node * @@ -203,10 +239,7 @@ class RowIterator implements IteratorInterface // for cell types having a "v" tag containing the value. // if not, the returned value should be empty string. $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); - if ($vNode !== null) { - return $vNode->nodeValue; - } - return ""; + return ($vNode !== null) ? $vNode->nodeValue : ''; } /** @@ -296,40 +329,6 @@ class RowIterator implements IteratorInterface } } - /** - * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. - * - * @param \DOMNode $node - * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) - */ - protected function getCellValue($node) - { - // Default cell type is "n" - $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; - $vNodeValue = $this->getVNodeValue($node); - - if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { - return $vNodeValue; - } - - switch ($cellType) { - case self::CELL_TYPE_INLINE_STRING: - return $this->formatInlineStringCellValue($node); - case self::CELL_TYPE_SHARED_STRING: - return $this->formatSharedStringCellValue($vNodeValue); - case self::CELL_TYPE_STR: - return $this->formatStrCellValue($vNodeValue); - case self::CELL_TYPE_BOOLEAN: - return $this->formatBooleanCellValue($vNodeValue); - case self::CELL_TYPE_NUMERIC: - return $this->formatNumericCellValue($vNodeValue); - case self::CELL_TYPE_DATE: - return $this->formatDateCellValue($vNodeValue); - default: - return null; - } - } - /** * Return the current element, from the buffer. * @link http://php.net/manual/en/iterator.current.php diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php index 9510ecd..ce88212 100644 --- a/src/Spout/Reader/XLSX/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -15,9 +15,6 @@ class Sheet implements SheetInterface /** @var RowIterator To iterate over sheet's rows */ protected $rowIterator; - /** @var int ID of the sheet */ - protected $id; - /** @var int Index of the sheet, based on order of creation (zero-based) */ protected $index; @@ -28,14 +25,12 @@ class Sheet implements SheetInterface * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper Helper to work with shared strings - * @param int $sheetId ID of the sheet * @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetId, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName) { $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper); - $this->id = $sheetId; $this->index = $sheetIndex; $this->name = $sheetName; } @@ -48,14 +43,6 @@ class Sheet implements SheetInterface return $this->rowIterator; } - /** - * @return int ID of the sheet - */ - public function getId() - { - return $this->id; - } - /** * @return int Index of the sheet, based on order of creation (zero-based) */ diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php new file mode 100644 index 0000000..81d808e --- /dev/null +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -0,0 +1,371 @@ +getAllRowsForFile($filePath); + } + + /** + * @return array + */ + public function dataProviderForTestReadForAllWorksheets() + { + return [ + ['one_sheet_with_strings.ods', 2, 3], + ['two_sheets_with_strings.ods', 4, 3], + ]; + } + + /** + * @dataProvider dataProviderForTestReadForAllWorksheets + * + * @param string $resourceName + * @param int $expectedNumOfRows + * @param int $expectedNumOfCellsPerRow + * @return void + */ + public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow) + { + $allRows = $this->getAllRowsForFile($resourceName); + + $this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows"); + foreach ($allRows as $row) { + $this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row"); + } + } + + /** + * @return void + */ + public function testReadShouldSupportRowWithOnlyOneCell() + { + $allRows = $this->getAllRowsForFile('sheet_with_only_one_cell.ods'); + $this->assertEquals([['foo']], $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportNumberColumnsRepeated() + { + $allRows = $this->getAllRowsForFile('sheet_with_number_columns_repeated.ods'); + $expectedRows = [ + [ + 'foo', 'foo', 'foo', + '', '', + true, true, + 10.43, 10.43, 10.43, 10.43, + ], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return array + */ + public function dataProviderForTestReadWithFilesGeneratedByExternalSoftwares() + { + return [ + ['file_generated_by_libre_office.ods', true], + ['file_generated_by_excel_2010_windows.ods', false], + ['file_generated_by_excel_office_online.ods', false], + ]; + } + + /** + * @dataProvider dataProviderForTestReadWithFilesGeneratedByExternalSoftwares + * The files contain styles, different value types, gaps between cells, + * repeated values, empty row, different number of cells per row. + * + * @param bool $skipLastEmptyValues + * @param string $fileName + * @return void + */ + public function testReadWithFilesGeneratedByExternalSoftwares($fileName, $skipLastEmptyValues) + { + $allRows = $this->getAllRowsForFile($fileName); + + $expectedRows = [ + ['header1','header2','header3','header4'], + ['val11','val12','val13','val14'], + ['val21','','val23','val23'], + ['', 10.43, 29.11], + ]; + + // In the description of the last cell, Excel specifies that the empty value needs to be repeated + // a lot of times (16384 - number of cells used in the row). To avoid creating 16384 cells all the time, + // this cell is skipped alltogether. + if ($skipLastEmptyValues) { + $expectedRows[3][] = ''; + } + + $this->assertEquals($expectedRows, $allRows); + } + + + /** + * @return void + */ + public function testReadShouldSupportAllCellTypes() + { + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.ods'); + + $expectedRows = [ + [ + 'ods--11', 'ods--12', + true, false, + 0, 10.43, + '', + ], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldReturnEmptyStringOnUndefinedCellType() + { + $allRows = $this->getAllRowsForFile('sheet_with_undefined_value_type.ods'); + $this->assertEquals([['ods--11', '', 'ods--13']], $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportMultilineStrings() + { + $allRows = $this->getAllRowsForFile('sheet_with_multiline_string.ods'); + + $expectedRows = [["string\non multiple\nlines!"]]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyRow() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_row.ods'); + $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + + $expectedRows = [ + ['ods--11', 'ods--12', 'ods--13'], + // row skipped here + ['ods--21', 'ods--22', 'ods--23'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveSpacing() + { + $allRows = $this->getAllRowsForFile('sheet_with_various_spaces.ods'); + + $expectedRow = [ + ' 4 spaces before and after ', + ' 1 space before and after ', + '2 spaces after ', + ' 2 spaces before', + "3 spaces in the middle\nand 2 spaces in the middle", + ]; + $this->assertEquals([$expectedRow], $allRows); + } + + + /** + * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) + * + * @return void + */ + public function testReadShouldBeProtectedAgainstBillionLaughAttack() + { + $startTime = microtime(true); + $fileName = 'attack_billion_laughs.ods'; + + try { + // using @ to prevent warnings/errors from being displayed + @$this->getAllRowsForFile($fileName); + $this->fail('An exception should have been thrown'); + } catch (IOException $exception) { + $duration = microtime(true) - $startTime; + $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); + + $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB + $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); + } + } + + /** + * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) + * + * @return void + */ + public function testReadShouldBeProtectedAgainstQuadraticBlowupAttack() + { + $startTime = microtime(true); + + $fileName = 'attack_quadratic_blowup.ods'; + $allRows = $this->getAllRowsForFile($fileName); + + $this->assertEquals('', $allRows[0][0], 'Entities should not have been expanded'); + + $duration = microtime(true) - $startTime; + $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); + + $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB + $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); + } + + /** + * @return void + */ + public function testReadShouldBeAbleToProcessEmptySheets() + { + $allRows = $this->getAllRowsForFile('sheet_with_no_cells.ods'); + $this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.'); + } + + /** + * @return void + */ + public function testReadShouldSkipFormulas() + { + $allRows = $this->getAllRowsForFile('sheet_with_formulas.ods'); + + $expectedRows = [ + ['val1', 'val2', 'total1', 'total2'], + [10, 20, 30, 21], + [11, 21, 32, 41], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @expectedException \Box\Spout\Reader\Exception\IteratorNotRewindableException + * + * @return void + */ + public function testReadShouldThrowIfTryingToRewindRowIterator() + { + $resourcePath = $this->getResourcePath('one_sheet_with_strings.ods'); + $reader = ReaderFactory::create(Type::ODS); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + // start looping throw the rows + foreach ($sheet->getRowIterator() as $row) { + break; + } + + // this will rewind the row iterator + foreach ($sheet->getRowIterator() as $row) { + break; + } + } + } + + /** + * @return void + */ + public function testReadMultipleTimesShouldRewindReader() + { + $allRows = []; + $resourcePath = $this->getResourcePath('two_sheets_with_strings.ods'); + + $reader = ReaderFactory::create(Type::ODS); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + // do nothing + } + + // this loop should only add the first row of each sheet + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + } + + // this loop should only add the first row of the first sheet + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // stop reading more sheets + break; + } + + $reader->close(); + + $expectedRows = [ + ['ods--sheet1--11', 'ods--sheet1--12', 'ods--sheet1--13'], // 1st row, 1st sheet + ['ods--sheet2--11', 'ods--sheet2--12', 'ods--sheet2--13'], // 1st row, 2nd sheet + ['ods--sheet1--11', 'ods--sheet1--12', 'ods--sheet1--13'], // 1st row, 1st sheet + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @param string $fileName + * @return array All the read rows the given file + */ + private function getAllRowsForFile($fileName) + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + $reader = ReaderFactory::create(Type::ODS); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; + } + } + + $reader->close(); + + return $allRows; + } +} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index 1ec4290..eb42b84 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -173,12 +173,13 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSkipEmptyRows() { - $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx'); + $allRows = $this->getAllRowsForFile('sheet_with_empty_row.xlsx'); $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); $expectedRows = [ ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + // skipped row here ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], ]; $this->assertEquals($expectedRows, $allRows); diff --git a/tests/Spout/Reader/XLSX/SheetTest.php b/tests/Spout/Reader/XLSX/SheetTest.php index 8f3f9df..3464819 100644 --- a/tests/Spout/Reader/XLSX/SheetTest.php +++ b/tests/Spout/Reader/XLSX/SheetTest.php @@ -24,11 +24,9 @@ class SheetTest extends \PHPUnit_Framework_TestCase $this->assertEquals('CustomName1', $sheets[0]->getName()); $this->assertEquals(0, $sheets[0]->getIndex()); - $this->assertEquals(1, $sheets[0]->getId()); $this->assertEquals('CustomName2', $sheets[1]->getName()); $this->assertEquals(1, $sheets[1]->getIndex()); - $this->assertEquals(2, $sheets[1]->getId()); } /** diff --git a/tests/Spout/Writer/ODS/WriterTest.php b/tests/Spout/Writer/ODS/WriterTest.php index 731e873..8146bb7 100644 --- a/tests/Spout/Writer/ODS/WriterTest.php +++ b/tests/Spout/Writer/ODS/WriterTest.php @@ -5,7 +5,6 @@ namespace Box\Spout\Writer\ODS; use Box\Spout\Common\Type; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\TestUsingResource; -use Box\Spout\Writer\Style\StyleBuilder; use Box\Spout\Writer\WriterFactory; /** diff --git a/tests/resources/ods/attack_billion_laughs.ods b/tests/resources/ods/attack_billion_laughs.ods new file mode 100644 index 0000000..0c29831 Binary files /dev/null and b/tests/resources/ods/attack_billion_laughs.ods differ diff --git a/tests/resources/ods/attack_quadratic_blowup.ods b/tests/resources/ods/attack_quadratic_blowup.ods new file mode 100644 index 0000000..104ccd4 Binary files /dev/null and b/tests/resources/ods/attack_quadratic_blowup.ods differ diff --git a/tests/resources/ods/file_corrupted.ods b/tests/resources/ods/file_corrupted.ods new file mode 100644 index 0000000..bf012eb Binary files /dev/null and b/tests/resources/ods/file_corrupted.ods differ diff --git a/tests/resources/ods/file_generated_by_excel_2010_windows.ods b/tests/resources/ods/file_generated_by_excel_2010_windows.ods new file mode 100755 index 0000000..955a465 Binary files /dev/null and b/tests/resources/ods/file_generated_by_excel_2010_windows.ods differ diff --git a/tests/resources/ods/file_generated_by_excel_office_online.ods b/tests/resources/ods/file_generated_by_excel_office_online.ods new file mode 100644 index 0000000..6c3f1f0 Binary files /dev/null and b/tests/resources/ods/file_generated_by_excel_office_online.ods differ diff --git a/tests/resources/ods/file_generated_by_libre_office.ods b/tests/resources/ods/file_generated_by_libre_office.ods new file mode 100644 index 0000000..dbc3f96 Binary files /dev/null and b/tests/resources/ods/file_generated_by_libre_office.ods differ diff --git a/tests/resources/ods/one_sheet_with_strings.ods b/tests/resources/ods/one_sheet_with_strings.ods new file mode 100644 index 0000000..c3df611 Binary files /dev/null and b/tests/resources/ods/one_sheet_with_strings.ods differ diff --git a/tests/resources/ods/sheet_with_all_cell_types.ods b/tests/resources/ods/sheet_with_all_cell_types.ods new file mode 100644 index 0000000..5843ac8 Binary files /dev/null and b/tests/resources/ods/sheet_with_all_cell_types.ods differ diff --git a/tests/resources/ods/sheet_with_empty_row.ods b/tests/resources/ods/sheet_with_empty_row.ods new file mode 100644 index 0000000..4763df0 Binary files /dev/null and b/tests/resources/ods/sheet_with_empty_row.ods differ diff --git a/tests/resources/ods/sheet_with_formulas.ods b/tests/resources/ods/sheet_with_formulas.ods new file mode 100644 index 0000000..a5bce1f Binary files /dev/null and b/tests/resources/ods/sheet_with_formulas.ods differ diff --git a/tests/resources/ods/sheet_with_multiline_string.ods b/tests/resources/ods/sheet_with_multiline_string.ods new file mode 100644 index 0000000..6913c24 Binary files /dev/null and b/tests/resources/ods/sheet_with_multiline_string.ods differ diff --git a/tests/resources/ods/sheet_with_no_cells.ods b/tests/resources/ods/sheet_with_no_cells.ods new file mode 100644 index 0000000..0d30af9 Binary files /dev/null and b/tests/resources/ods/sheet_with_no_cells.ods differ diff --git a/tests/resources/ods/sheet_with_number_columns_repeated.ods b/tests/resources/ods/sheet_with_number_columns_repeated.ods new file mode 100644 index 0000000..0f4a802 Binary files /dev/null and b/tests/resources/ods/sheet_with_number_columns_repeated.ods differ diff --git a/tests/resources/ods/sheet_with_only_one_cell.ods b/tests/resources/ods/sheet_with_only_one_cell.ods new file mode 100644 index 0000000..c678ff6 Binary files /dev/null and b/tests/resources/ods/sheet_with_only_one_cell.ods differ diff --git a/tests/resources/ods/sheet_with_undefined_value_type.ods b/tests/resources/ods/sheet_with_undefined_value_type.ods new file mode 100644 index 0000000..bb8b00d Binary files /dev/null and b/tests/resources/ods/sheet_with_undefined_value_type.ods differ diff --git a/tests/resources/ods/sheet_with_various_spaces.ods b/tests/resources/ods/sheet_with_various_spaces.ods new file mode 100644 index 0000000..30e3ed2 Binary files /dev/null and b/tests/resources/ods/sheet_with_various_spaces.ods differ diff --git a/tests/resources/ods/two_sheets_with_strings.ods b/tests/resources/ods/two_sheets_with_strings.ods new file mode 100644 index 0000000..c2a78e7 Binary files /dev/null and b/tests/resources/ods/two_sheets_with_strings.ods differ diff --git a/tests/resources/xlsx/attack_billion_laughs.xlsx b/tests/resources/xlsx/attack_billion_laughs.xlsx index d6cdc75..735c1f9 100644 Binary files a/tests/resources/xlsx/attack_billion_laughs.xlsx and b/tests/resources/xlsx/attack_billion_laughs.xlsx differ diff --git a/tests/resources/xlsx/attack_quadratic_blowup.xlsx b/tests/resources/xlsx/attack_quadratic_blowup.xlsx index a317c18..7c67c95 100644 Binary files a/tests/resources/xlsx/attack_quadratic_blowup.xlsx and b/tests/resources/xlsx/attack_quadratic_blowup.xlsx differ diff --git a/tests/resources/xlsx/sheet_with_empty_rows.xlsx b/tests/resources/xlsx/sheet_with_empty_row.xlsx similarity index 100% rename from tests/resources/xlsx/sheet_with_empty_rows.xlsx rename to tests/resources/xlsx/sheet_with_empty_row.xlsx