diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php
index 5471f85..5865d20 100644
--- a/src/Spout/Reader/XLSX.php
+++ b/src/Spout/Reader/XLSX.php
@@ -19,6 +19,14 @@ use Box\Spout\Reader\Helper\XLSX\WorksheetHelper;
*/
class XLSX extends AbstractReader
{
+ const CELL_TYPE_INLINE_STRING = 'inlineStr';
+ const CELL_TYPE_STR = 'str';
+ const CELL_TYPE_SHARED_STRING = 's';
+ const CELL_TYPE_BOOLEAN = 'b';
+ const CELL_TYPE_NUMERIC = 'n';
+ const CELL_TYPE_DATE = 'd';
+ const CELL_TYPE_ERROR = 'e';
+
/** @var string Real path of the file to read */
protected $filePath;
@@ -224,7 +232,115 @@ class XLSX extends AbstractReader
}
/**
- * Returns the (unescaped) cell value associated to the given XML node.
+ * Returns the cell's string value from a node's nested value node
+ *
+ * @param \DOMNode $node
+ * @return string The value associated with the cell
+ */
+ protected function getVNodeValue($node)
+ {
+ // for cell types having a "v" tag containing the value.
+ // if not, the returned value should be empty string.
+ $vNode = $node->getElementsByTagName('v')->item(0);
+ if ($vNode !== null) {
+ return $vNode->nodeValue;
+ }
+ return "";
+ }
+
+ /**
+ * Returns the cell String value where string is inline.
+ *
+ * @param \DOMNode $node
+ * @param \Box\Spout\Common\Escaper\XLSX $escaper
+ * @return string The value associated with the cell (null when the cell has an error)
+ */
+ protected function formatInlineStringCellValue($node, $escaper)
+ {
+ // inline strings are formatted this way:
+ // [INLINE_STRING]
+ $tNode = $node->getElementsByTagName('t')->item(0);
+ $escapedCellValue = trim($tNode->nodeValue);
+ $cellValue = $escaper->unescape($escapedCellValue);
+ return $cellValue;
+ }
+
+ /**
+ * Returns the cell String value from shared-strings file using nodeValue index.
+ *
+ * @param string $nodeValue
+ * @param \Box\Spout\Common\Escaper\XLSX $escaper
+ * @return string The value associated with the cell (null when the cell has an error)
+ */
+ protected function formatSharedStringCellValue($nodeValue, $escaper)
+ {
+ // shared strings are formatted this way:
+ // [SHARED_STRING_INDEX]
+ $sharedStringIndex = intval($nodeValue);
+ $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
+ $cellValue = $escaper->unescape($escapedCellValue);
+ return $cellValue;
+ }
+
+ /**
+ * Returns the cell String value, where string is stored in value node.
+ *
+ * @param string $nodeValue
+ * @param \Box\Spout\Common\Escaper\XLSX $escaper
+ * @return string The value associated with the cell (null when the cell has an error)
+ */
+ protected function formatStrCellValue($nodeValue, $escaper)
+ {
+ $escapedCellValue = trim($nodeValue);
+ $cellValue = $escaper->unescape($escapedCellValue);
+ return $cellValue;
+ }
+
+ /**
+ * Returns the cell Numeric value from string of nodeValue.
+ *
+ * @param string $nodeValue
+ * @param \Box\Spout\Common\Escaper\XLSX $escaper
+ * @return int|float The value associated with the cell
+ */
+ protected function formatNumericCellValue($nodeValue)
+ {
+ $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue);
+ return $cellValue;
+ }
+
+ /**
+ * Returns the cell Boolean value from a specific node's Value.
+ *
+ * @param string $nodeValue
+ * @return bool The value associated with the cell
+ */
+ protected function formatBooleanCellValue($nodeValue)
+ {
+ // !! is similar to boolval()
+ $cellValue = !!$nodeValue;
+ return $cellValue;
+ }
+
+ /**
+ * Returns a cell's PHP Date value, associated to the given stored nodeValue.
+ *
+ * @param string $nodeValue
+ * @param \Box\Spout\Common\Escaper\XLSX $escaper
+ * @return DateTime|null The value associated with the cell (null when the cell has an error)
+ */
+ protected function formatDateCellValue($nodeValue)
+ {
+ try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
+ $cellValue = new \DateTime($nodeValue);
+ return $cellValue;
+ } catch (\Exception $e) {
+ return null;
+ }
+ }
+
+ /**
+ * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMNode $node
* @param \Box\Spout\Common\Escaper\XLSX $escaper
@@ -232,47 +348,30 @@ class XLSX extends AbstractReader
*/
protected function getCellValue($node, $escaper)
{
- $cellValue = '';
-
// Default cell type is "n"
$cellType = $node->getAttribute('t') ?: 'n';
-
- if ($cellType === 'inlineStr') {
- // inline strings are formatted this way:
- // [INLINE_STRING]
- $tNode = $node->getElementsByTagName('t')->item(0);
- $escapedCellValue = trim($tNode->nodeValue);
- $cellValue = $escaper->unescape($escapedCellValue);
- } else {
- // all other cell types should have a "v" tag containing the value.
- // if not, the returned value should be empty string.
- $vNode = $node->getElementsByTagName('v')->item(0);
-
- if ($vNode !== null) {
- if ($cellType === 's') {
- // shared strings are formatted this way:
- // [SHARED_STRING_INDEX]
- $sharedStringIndex = intval($vNode->nodeValue);
- $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
- $cellValue = $escaper->unescape($escapedCellValue);
- } else if ($cellType === 'b') {
- // !! is similar to boolval()
- $cellValue = !!$vNode->nodeValue;
- } else if ($cellType === 'n') {
- $nodeValue = $vNode->nodeValue;
- $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue);
- } else if ($cellType === 'd') {
- $cellValue = new \DateTime($vNode->nodeValue);
- } else if ($cellType === 'e') {
- $cellValue = null;
- } else if ($cellType === 'str') {
- $escapedCellValue = trim($vNode->nodeValue);
- $cellValue = $escaper->unescape($escapedCellValue);
- }
- }
+ $vNodeValue = $this->getVNodeValue($node);
+ if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) {
+ return $vNodeValue;
}
- return $cellValue;
+ switch ($cellType)
+ {
+ case self::CELL_TYPE_INLINE_STRING:
+ return $this->formatInlineStringCellValue($node, $escaper);
+ case self::CELL_TYPE_SHARED_STRING:
+ return $this->formatSharedStringCellValue($vNodeValue, $escaper);
+ case self::CELL_TYPE_STR:
+ return $this->formatStrCellValue($vNodeValue, $escaper);
+ case self::CELL_TYPE_BOOLEAN:
+ return $this->formatBooleanCellValue($vNodeValue);
+ case self::CELL_TYPE_NUMERIC:
+ return $this->formatNumericCellValue($vNodeValue);
+ case self::CELL_TYPE_DATE:
+ return $this->formatDateCellValue($vNodeValue);
+ default:
+ return null;
+ }
}
/**
diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php
index 5856887..f1e3d99 100644
--- a/tests/Spout/Reader/XLSXTest.php
+++ b/tests/Spout/Reader/XLSXTest.php
@@ -129,6 +129,8 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
\DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'),
10, 10.43,
null,
+ 'weird string', // valid 'str' string
+ null, // invalid date
],
['', '', '', '', '', '', '', '', ''],
];
diff --git a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx
index 17b7303..ec109de 100644
Binary files a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx and b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx differ