diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 5471f85..5865d20 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -19,6 +19,14 @@ use Box\Spout\Reader\Helper\XLSX\WorksheetHelper; */ class XLSX extends AbstractReader { + const CELL_TYPE_INLINE_STRING = 'inlineStr'; + const CELL_TYPE_STR = 'str'; + const CELL_TYPE_SHARED_STRING = 's'; + const CELL_TYPE_BOOLEAN = 'b'; + const CELL_TYPE_NUMERIC = 'n'; + const CELL_TYPE_DATE = 'd'; + const CELL_TYPE_ERROR = 'e'; + /** @var string Real path of the file to read */ protected $filePath; @@ -224,7 +232,115 @@ class XLSX extends AbstractReader } /** - * Returns the (unescaped) cell value associated to the given XML node. + * Returns the cell's string value from a node's nested value node + * + * @param \DOMNode $node + * @return string The value associated with the cell + */ + protected function getVNodeValue($node) + { + // for cell types having a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName('v')->item(0); + if ($vNode !== null) { + return $vNode->nodeValue; + } + return ""; + } + + /** + * Returns the cell String value where string is inline. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatInlineStringCellValue($node, $escaper) + { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName('t')->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value from shared-strings file using nodeValue index. + * + * @param string $nodeValue + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatSharedStringCellValue($nodeValue, $escaper) + { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($nodeValue); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value, where string is stored in value node. + * + * @param string $nodeValue + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatStrCellValue($nodeValue, $escaper) + { + $escapedCellValue = trim($nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value from string of nodeValue. + * + * @param string $nodeValue + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return int|float The value associated with the cell + */ + protected function formatNumericCellValue($nodeValue) + { + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value from a specific node's Value. + * + * @param string $nodeValue + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue($nodeValue) + { + // !! is similar to boolval() + $cellValue = !!$nodeValue; + return $cellValue; + } + + /** + * Returns a cell's PHP Date value, associated to the given stored nodeValue. + * + * @param string $nodeValue + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function formatDateCellValue($nodeValue) + { + try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + $cellValue = new \DateTime($nodeValue); + return $cellValue; + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node * @param \Box\Spout\Common\Escaper\XLSX $escaper @@ -232,47 +348,30 @@ class XLSX extends AbstractReader */ protected function getCellValue($node, $escaper) { - $cellValue = ''; - // Default cell type is "n" $cellType = $node->getAttribute('t') ?: 'n'; - - if ($cellType === 'inlineStr') { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - } else { - // all other cell types should have a "v" tag containing the value. - // if not, the returned value should be empty string. - $vNode = $node->getElementsByTagName('v')->item(0); - - if ($vNode !== null) { - if ($cellType === 's') { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($vNode->nodeValue); - $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - $cellValue = $escaper->unescape($escapedCellValue); - } else if ($cellType === 'b') { - // !! is similar to boolval() - $cellValue = !!$vNode->nodeValue; - } else if ($cellType === 'n') { - $nodeValue = $vNode->nodeValue; - $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); - } else if ($cellType === 'd') { - $cellValue = new \DateTime($vNode->nodeValue); - } else if ($cellType === 'e') { - $cellValue = null; - } else if ($cellType === 'str') { - $escapedCellValue = trim($vNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - } - } + $vNodeValue = $this->getVNodeValue($node); + if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) { + return $vNodeValue; } - return $cellValue; + switch ($cellType) + { + case self::CELL_TYPE_INLINE_STRING: + return $this->formatInlineStringCellValue($node, $escaper); + case self::CELL_TYPE_SHARED_STRING: + return $this->formatSharedStringCellValue($vNodeValue, $escaper); + case self::CELL_TYPE_STR: + return $this->formatStrCellValue($vNodeValue, $escaper); + case self::CELL_TYPE_BOOLEAN: + return $this->formatBooleanCellValue($vNodeValue); + case self::CELL_TYPE_NUMERIC: + return $this->formatNumericCellValue($vNodeValue); + case self::CELL_TYPE_DATE: + return $this->formatDateCellValue($vNodeValue); + default: + return null; + } } /** diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php index 5856887..f1e3d99 100644 --- a/tests/Spout/Reader/XLSXTest.php +++ b/tests/Spout/Reader/XLSXTest.php @@ -129,6 +129,8 @@ class XLSXTest extends \PHPUnit_Framework_TestCase \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), 10, 10.43, null, + 'weird string', // valid 'str' string + null, // invalid date ], ['', '', '', '', '', '', '', '', ''], ]; diff --git a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx index 17b7303..ec109de 100644 Binary files a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx and b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx differ