diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index adb9544..83bee9d 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -177,6 +177,7 @@ class XLSX extends AbstractReader throw new BadUsageException('You must call nextSheet() before calling hasNextRow() or nextRow()'); } + $escaper = new \Box\Spout\Common\Escaper\XLSX(); $isInsideRowTag = false; $rowData = []; @@ -188,6 +189,7 @@ class XLSX extends AbstractReader $lastCellIndex = $matches[1]; $this->numberOfColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; } + } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'row') { // Start of the row description $isInsideRowTag = true; @@ -200,32 +202,15 @@ class XLSX extends AbstractReader $numberOfColumnsForRow = intval($numberOfColumnsForRow); } $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'c') { // Start of a cell description $currentCellIndex = $this->xmlReader->getAttribute('r'); $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); + $node = $this->xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node, $escaper); - $hasInlineString = ($this->xmlReader->getAttribute('t') === 'inlineStr'); - $hasSharedString = ($this->xmlReader->getAttribute('t') === 's'); - - if ($hasInlineString) { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $rowData[$currentColumnIndex] = trim($tNode->nodeValue); - } else if ($hasSharedString) { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $vNode = $node->getElementsByTagName('v')->item(0); - $sharedStringIndex = intval($vNode->nodeValue); - $rowData[$currentColumnIndex] = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - } else { - // other values are formatted this way: - // [VALUE] - $vNode = $node->getElementsByTagName('v')->item(0); - $rowData[$currentColumnIndex] = intval($vNode->nodeValue); - } } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === 'row') { // End of the row description // If needed, we fill the empty cells @@ -238,6 +223,58 @@ class XLSX extends AbstractReader return ($rowData !== []) ? $rowData : null; } + /** + * Returns the (unescaped) cell value associated to the given XML node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string|int|float|bool|null The value associated with the cell (null when the cell has an error) + */ + protected function getCellValue($node, $escaper) + { + $cellValue = ''; + + // Default cell type is "n" + $cellType = $node->getAttribute('t') ?: 'n'; + + if ($cellType === 'inlineStr') { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName('t')->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + } else { + // all other cell types should have a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName('v')->item(0); + + if ($vNode !== null) { + if ($cellType === 's') { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($vNode->nodeValue); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $escaper->unescape($escapedCellValue); + } else if ($cellType === 'b') { + // !! is similar to boolval() + $cellValue = !!$vNode->nodeValue; + } else if ($cellType === 'n') { + $nodeValue = $vNode->nodeValue; + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + } else if ($cellType === 'd') { + $cellValue = new \DateTime($vNode->nodeValue); + } else if ($cellType === 'e') { + $cellValue = null; + } else if ($cellType === 'str') { + $escapedCellValue = trim($vNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + } + } + } + + return $cellValue; + } + /** * Closes the reader. To be used after reading the file. * diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php index 24aa89c..f611db4 100644 --- a/tests/Spout/Reader/XLSXTest.php +++ b/tests/Spout/Reader/XLSXTest.php @@ -114,6 +114,27 @@ class XLSXTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadShouldSupportAllCellTypes() + { + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); + + $expectedRows = [ + [ + 's1--A1', 's1--A2', + false, true, + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'), + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), + 10, 10.43, + null, + ], + ['', '', '', '', '', '', '', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ diff --git a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx new file mode 100644 index 0000000..17b7303 Binary files /dev/null and b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx differ