From 0c247dfafb0ebc4f24dc2542f04f30de53777130 Mon Sep 17 00:00:00 2001 From: Lewis Cowles Date: Tue, 30 Jun 2015 22:45:34 +0100 Subject: [PATCH 1/4] Re-Factored Cell Type Value --- src/Spout/Reader/XLSX.php | 171 +++++++++++++++++++++++++++++--------- 1 file changed, 134 insertions(+), 37 deletions(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 83bee9d..94b15f1 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -19,6 +19,14 @@ use Box\Spout\Reader\Helper\XLSX\WorksheetHelper; */ class XLSX extends AbstractReader { + const INLINE_STRING_CELL_TYPE = 'inlineStr'; + const STR_CELL_TYPE = 'str'; + const SHARED_STRING_CELL_TYPE = 's'; + const BOOLEAN_CELL_TYPE = 'b'; + const NUMERIC_CELL_TYPE = 'n'; + const DATE_CELL_TYPE = 'd'; + const EMPTY_CELL_TYPE = 'e'; + /** @var string Real path of the file to read */ protected $filePath; @@ -222,6 +230,114 @@ class XLSX extends AbstractReader // no data means "end of file" return ($rowData !== []) ? $rowData : null; } + + /** + * Returns the cell String value associated to the given XML node where string is inline. + * + * @param \DOMNode $node + * @return mixed The value associated with the cell + */ + protected function getVNodeValue(&$node) + { + // all other cell types should have a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName('v')->item(0); + return $vNode->nodeValue; + } + + /** + * Returns the cell String value associated to the given XML node where string is inline. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatInlineStringCellValue(&$node, &$escaper) + { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName('t')->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value associated to the given XML node where string is shared in shared-strings file. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatSharedStringCellValue(&$node, &$escaper) + { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($this->getVNodeValue(&$node)); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value associated to the given XML node where string is stored in value node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatStrCellValue(&$node, &$escaper) + { + $escapedCellValue = trim($this->getVNodeValue(&$node)); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value associated to the given XML node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return int|float The value associated with the cell + */ + protected function formatNumericCellValue(&$node) + { + $nodeValue = $this->getVNodeValue(&$node); + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value associated to the given XML node. + * + * @param \DOMNode $node + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue(&$node) + { + // !! is similar to boolval() + $cellValue = (!!$this->getVNodeValue(&$node)); + return $cellValue; + } + + /** + * Returns the cell Date value associated to the given XML node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return DateTime The value associated with the cell (null when the cell has an error) + */ + protected function formatDateCellValue(&$node) + { + // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + try { + $cellValue = new \DateTime($this->getVNodeValue(&$node)); + return $cellValue; + } catch ( \Exception $e ) { + // Maybe do something... Not famiiar enough to see about exceptions at this stage + return null; + } + } /** * Returns the (unescaped) cell value associated to the given XML node. @@ -232,47 +348,28 @@ class XLSX extends AbstractReader */ protected function getCellValue($node, $escaper) { - $cellValue = ''; - // Default cell type is "n" $cellType = $node->getAttribute('t') ?: 'n'; - - if ($cellType === 'inlineStr') { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - } else { - // all other cell types should have a "v" tag containing the value. - // if not, the returned value should be empty string. - $vNode = $node->getElementsByTagName('v')->item(0); - - if ($vNode !== null) { - if ($cellType === 's') { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($vNode->nodeValue); - $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - $cellValue = $escaper->unescape($escapedCellValue); - } else if ($cellType === 'b') { - // !! is similar to boolval() - $cellValue = !!$vNode->nodeValue; - } else if ($cellType === 'n') { - $nodeValue = $vNode->nodeValue; - $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); - } else if ($cellType === 'd') { - $cellValue = new \DateTime($vNode->nodeValue); - } else if ($cellType === 'e') { - $cellValue = null; - } else if ($cellType === 'str') { - $escapedCellValue = trim($vNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); + + switch($cellType) { + case self::INLINE_STRING_CELL_TYPE: + return $this->formatInlineStringCellValue($node, $escaper); + case self::SHARED_STRING_CELL_TYPE: + return $this->formatSharedStringCellValue($node, $escaper); + case self::STR_CELL_TYPE: + return $this->formatStrCellValue($node, $escaper); + case self::BOOLEAN_CELL_TYPE: + return $this->formatBooleanCellValue($node); + case self::NUMERIC_CELL_TYPE: + return $this->formatNumericCellValue($node); + case self::DATE_CELL_TYPE: + return $this->formatDateCellValue($node); + default: + if($cellType !== self::EMPTY_CELL_TYPE) { + \trigger_error('UNKNOWN CELL TYPE', \E_USER_NOTICE); } - } + return null; } - - return $cellValue; } /** From 44e8b43f958f9f1f118b756b89c602bf2baab692 Mon Sep 17 00:00:00 2001 From: Lewis Cowles Date: Tue, 30 Jun 2015 22:54:09 +0100 Subject: [PATCH 2/4] pass by reference bug... --- src/Spout/Reader/XLSX.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 94b15f1..51dc73c 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -273,7 +273,7 @@ class XLSX extends AbstractReader { // shared strings are formatted this way: // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($this->getVNodeValue(&$node)); + $sharedStringIndex = intval($this->getVNodeValue($node)); $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; @@ -288,7 +288,7 @@ class XLSX extends AbstractReader */ protected function formatStrCellValue(&$node, &$escaper) { - $escapedCellValue = trim($this->getVNodeValue(&$node)); + $escapedCellValue = trim($this->getVNodeValue($node)); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; } @@ -302,7 +302,7 @@ class XLSX extends AbstractReader */ protected function formatNumericCellValue(&$node) { - $nodeValue = $this->getVNodeValue(&$node); + $nodeValue = $this->getVNodeValue($node); $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); return $cellValue; } @@ -316,7 +316,7 @@ class XLSX extends AbstractReader protected function formatBooleanCellValue(&$node) { // !! is similar to boolval() - $cellValue = (!!$this->getVNodeValue(&$node)); + $cellValue = (!!$this->getVNodeValue($node)); return $cellValue; } @@ -331,7 +331,7 @@ class XLSX extends AbstractReader { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) try { - $cellValue = new \DateTime($this->getVNodeValue(&$node)); + $cellValue = new \DateTime($this->getVNodeValue($node)); return $cellValue; } catch ( \Exception $e ) { // Maybe do something... Not famiiar enough to see about exceptions at this stage From a601b17d9d7453cf37e5527484ffde28c3a32964 Mon Sep 17 00:00:00 2001 From: Lewis Cowles Date: Tue, 30 Jun 2015 23:01:16 +0100 Subject: [PATCH 3/4] Update XLSX.php --- src/Spout/Reader/XLSX.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 51dc73c..c54fdf1 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -242,7 +242,10 @@ class XLSX extends AbstractReader // all other cell types should have a "v" tag containing the value. // if not, the returned value should be empty string. $vNode = $node->getElementsByTagName('v')->item(0); - return $vNode->nodeValue; + if ($vNode !== null) { + return $vNode->nodeValue; + } + return ""; } /** From f8624b40e50b7ca37573560913a6a7604a1989c1 Mon Sep 17 00:00:00 2001 From: Lewis Cowles Date: Tue, 30 Jun 2015 23:09:44 +0100 Subject: [PATCH 4/4] Update XLSX.php --- src/Spout/Reader/XLSX.php | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index c54fdf1..1e8a8dc 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -276,7 +276,7 @@ class XLSX extends AbstractReader { // shared strings are formatted this way: // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($this->getVNodeValue($node)); + $sharedStringIndex = intval($node); $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; @@ -291,7 +291,7 @@ class XLSX extends AbstractReader */ protected function formatStrCellValue(&$node, &$escaper) { - $escapedCellValue = trim($this->getVNodeValue($node)); + $escapedCellValue = trim($node); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; } @@ -303,9 +303,8 @@ class XLSX extends AbstractReader * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return int|float The value associated with the cell */ - protected function formatNumericCellValue(&$node) + protected function formatNumericCellValue(&$nodeValue) { - $nodeValue = $this->getVNodeValue($node); $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); return $cellValue; } @@ -319,7 +318,7 @@ class XLSX extends AbstractReader protected function formatBooleanCellValue(&$node) { // !! is similar to boolval() - $cellValue = (!!$this->getVNodeValue($node)); + $cellValue = !!$node; return $cellValue; } @@ -334,7 +333,7 @@ class XLSX extends AbstractReader { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) try { - $cellValue = new \DateTime($this->getVNodeValue($node)); + $cellValue = new \DateTime($node); return $cellValue; } catch ( \Exception $e ) { // Maybe do something... Not famiiar enough to see about exceptions at this stage @@ -353,20 +352,23 @@ class XLSX extends AbstractReader { // Default cell type is "n" $cellType = $node->getAttribute('t') ?: 'n'; - + $vNodeValue = $this->getVNodeValue($node); + if( ($vNodeValue === "") && ($cellType !== self::INLINE_STRING_CELL_TYPE) ) { + return $vNodeValue; + } switch($cellType) { case self::INLINE_STRING_CELL_TYPE: return $this->formatInlineStringCellValue($node, $escaper); case self::SHARED_STRING_CELL_TYPE: - return $this->formatSharedStringCellValue($node, $escaper); + return $this->formatSharedStringCellValue($vNodeValue, $escaper); case self::STR_CELL_TYPE: - return $this->formatStrCellValue($node, $escaper); + return $this->formatStrCellValue($vNodeValue, $escaper); case self::BOOLEAN_CELL_TYPE: - return $this->formatBooleanCellValue($node); + return $this->formatBooleanCellValue($vNodeValue); case self::NUMERIC_CELL_TYPE: - return $this->formatNumericCellValue($node); + return $this->formatNumericCellValue($vNodeValue); case self::DATE_CELL_TYPE: - return $this->formatDateCellValue($node); + return $this->formatDateCellValue($vNodeValue); default: if($cellType !== self::EMPTY_CELL_TYPE) { \trigger_error('UNKNOWN CELL TYPE', \E_USER_NOTICE);