From c1b1bd0b762e88fb847b041273d5077b1069906b Mon Sep 17 00:00:00 2001 From: Lewis Cowles Date: Wed, 1 Jul 2015 23:35:59 +0100 Subject: [PATCH 1/4] Separated getCellValue into multiple functions Author: Lewis Cowles Committer: Lewis modified: src/Spout/Reader/XLSX.php --- src/Spout/Reader/XLSX.php | 178 ++++++++++++++++++++++++++++++-------- 1 file changed, 140 insertions(+), 38 deletions(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 5471f85..e6fa729 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -19,6 +19,14 @@ use Box\Spout\Reader\Helper\XLSX\WorksheetHelper; */ class XLSX extends AbstractReader { + const INLINE_STRING_CELL_TYPE = 'inlineStr'; + const STR_CELL_TYPE = 'str'; + const SHARED_STRING_CELL_TYPE = 's'; + const BOOLEAN_CELL_TYPE = 'b'; + const NUMERIC_CELL_TYPE = 'n'; + const DATE_CELL_TYPE = 'd'; + const EMPTY_CELL_TYPE = 'e'; + /** @var string Real path of the file to read */ protected $filePath; @@ -223,6 +231,116 @@ class XLSX extends AbstractReader return ($rowData !== []) ? $rowData : null; } + /** + * Returns the cell String value associated to the given XML node where string is inline. + * + * @param \DOMNode $node + * @return mixed The value associated with the cell + */ + protected function getVNodeValue(&$node) + { + // all other cell types should have a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName('v')->item(0); + if ($vNode !== null) { + return $vNode->nodeValue; + } + return ""; + } + + /** + * Returns the cell String value associated to the given XML node where string is inline. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatInlineStringCellValue(&$node, &$escaper) + { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName('t')->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value associated to the given XML node where string is shared in shared-strings file. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatSharedStringCellValue(&$node, &$escaper) + { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($node); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value associated to the given XML node where string is stored in value node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatStrCellValue(&$node, &$escaper) + { + $escapedCellValue = trim($node); + $cellValue = $escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value associated to the given XML node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return int|float The value associated with the cell + */ + protected function formatNumericCellValue(&$nodeValue) + { + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value associated to the given XML node. + * + * @param \DOMNode $node + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue(&$node) + { + // !! is similar to boolval() + $cellValue = !!$node; + return $cellValue; + } + + /** + * Returns the cell Date value associated to the given XML node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return DateTime The value associated with the cell (null when the cell has an error) + */ + protected function formatDateCellValue(&$node) + { + // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + try { + $cellValue = new \DateTime($node); + return $cellValue; + } catch ( \Exception $e ) { + // Maybe do something... Not famiiar enough to see about exceptions at this stage + return null; + } + } + /** * Returns the (unescaped) cell value associated to the given XML node. * @@ -232,47 +350,31 @@ class XLSX extends AbstractReader */ protected function getCellValue($node, $escaper) { - $cellValue = ''; - // Default cell type is "n" $cellType = $node->getAttribute('t') ?: 'n'; - - if ($cellType === 'inlineStr') { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - } else { - // all other cell types should have a "v" tag containing the value. - // if not, the returned value should be empty string. - $vNode = $node->getElementsByTagName('v')->item(0); - - if ($vNode !== null) { - if ($cellType === 's') { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($vNode->nodeValue); - $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - $cellValue = $escaper->unescape($escapedCellValue); - } else if ($cellType === 'b') { - // !! is similar to boolval() - $cellValue = !!$vNode->nodeValue; - } else if ($cellType === 'n') { - $nodeValue = $vNode->nodeValue; - $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); - } else if ($cellType === 'd') { - $cellValue = new \DateTime($vNode->nodeValue); - } else if ($cellType === 'e') { - $cellValue = null; - } else if ($cellType === 'str') { - $escapedCellValue = trim($vNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - } - } + $vNodeValue = $this->getVNodeValue($node); + if( ($vNodeValue === "") && ($cellType !== self::INLINE_STRING_CELL_TYPE) ) { + return $vNodeValue; + } + switch($cellType) { + case self::INLINE_STRING_CELL_TYPE: + return $this->formatInlineStringCellValue($node, $escaper); + case self::SHARED_STRING_CELL_TYPE: + return $this->formatSharedStringCellValue($vNodeValue, $escaper); + case self::STR_CELL_TYPE: + return $this->formatStrCellValue($vNodeValue, $escaper); + case self::BOOLEAN_CELL_TYPE: + return $this->formatBooleanCellValue($vNodeValue); + case self::NUMERIC_CELL_TYPE: + return $this->formatNumericCellValue($vNodeValue); + case self::DATE_CELL_TYPE: + return $this->formatDateCellValue($vNodeValue); + default: + if($cellType !== self::EMPTY_CELL_TYPE) { + \trigger_error('UNKNOWN CELL TYPE', \E_USER_NOTICE); + } + return null; } - - return $cellValue; } /** From 3e1793d8527e8a608e7486256c176acbb776f62a Mon Sep 17 00:00:00 2001 From: Lewis Date: Thu, 2 Jul 2015 17:56:55 +0100 Subject: [PATCH 2/4] Changes made to XLSX.php --- src/Spout/Reader/XLSX.php | 87 +++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index e6fa729..406b10f 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -19,13 +19,13 @@ use Box\Spout\Reader\Helper\XLSX\WorksheetHelper; */ class XLSX extends AbstractReader { - const INLINE_STRING_CELL_TYPE = 'inlineStr'; - const STR_CELL_TYPE = 'str'; - const SHARED_STRING_CELL_TYPE = 's'; - const BOOLEAN_CELL_TYPE = 'b'; - const NUMERIC_CELL_TYPE = 'n'; - const DATE_CELL_TYPE = 'd'; - const EMPTY_CELL_TYPE = 'e'; + const CELL_TYPE_INLINE_STRING = 'inlineStr'; + const CELL_TYPE_STR = 'str'; + const CELL_TYPE_SHARED_STRING = 's'; + const CELL_TYPE_BOOLEAN = 'b'; + const CELL_TYPE_NUMERIC = 'n'; + const CELL_TYPE_DATE = 'd'; + const CELL_TYPE_ERROR = 'e'; /** @var string Real path of the file to read */ protected $filePath; @@ -235,11 +235,11 @@ class XLSX extends AbstractReader * Returns the cell String value associated to the given XML node where string is inline. * * @param \DOMNode $node - * @return mixed The value associated with the cell + * @return string The value associated with the cell */ protected function getVNodeValue(&$node) { - // all other cell types should have a "v" tag containing the value. + // for cell types having a "v" tag containing the value. // if not, the returned value should be empty string. $vNode = $node->getElementsByTagName('v')->item(0); if ($vNode !== null) { @@ -247,7 +247,7 @@ class XLSX extends AbstractReader } return ""; } - + /** * Returns the cell String value associated to the given XML node where string is inline. * @@ -255,7 +255,7 @@ class XLSX extends AbstractReader * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ - protected function formatInlineStringCellValue(&$node, &$escaper) + protected function formatInlineStringCellValue($node, &$escaper) { // inline strings are formatted this way: // [INLINE_STRING] @@ -264,79 +264,77 @@ class XLSX extends AbstractReader $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; } - + /** * Returns the cell String value associated to the given XML node where string is shared in shared-strings file. * - * @param \DOMNode $node + * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ - protected function formatSharedStringCellValue(&$node, &$escaper) + protected function formatSharedStringCellValue($nodeValue, &$escaper) { // shared strings are formatted this way: // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($node); + $sharedStringIndex = intval($nodeValue); $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; } - + /** * Returns the cell String value associated to the given XML node where string is stored in value node. * - * @param \DOMNode $node + * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ - protected function formatStrCellValue(&$node, &$escaper) + protected function formatStrCellValue($nodeValue, &$escaper) { - $escapedCellValue = trim($node); + $escapedCellValue = trim($nodeValue); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; } - + /** * Returns the cell Numeric value associated to the given XML node. * - * @param \DOMNode $node + * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return int|float The value associated with the cell */ - protected function formatNumericCellValue(&$nodeValue) + protected function formatNumericCellValue($nodeValue) { $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); return $cellValue; } - + /** * Returns the cell Boolean value associated to the given XML node. * - * @param \DOMNode $node + * @param string $nodeValue * @return bool The value associated with the cell */ - protected function formatBooleanCellValue(&$node) + protected function formatBooleanCellValue($nodeValue) { // !! is similar to boolval() - $cellValue = !!$node; + $cellValue = !!$nodeValue; return $cellValue; } /** * Returns the cell Date value associated to the given XML node. * - * @param \DOMNode $node + * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return DateTime The value associated with the cell (null when the cell has an error) + * @return DateTime|null The value associated with the cell (null when the cell has an error) */ - protected function formatDateCellValue(&$node) + protected function formatDateCellValue($nodeValue) { - // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) - try { - $cellValue = new \DateTime($node); + try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + $cellValue = new \DateTime($nodeValue); return $cellValue; - } catch ( \Exception $e ) { - // Maybe do something... Not famiiar enough to see about exceptions at this stage + } catch (\Exception $e) { return null; } } @@ -353,26 +351,25 @@ class XLSX extends AbstractReader // Default cell type is "n" $cellType = $node->getAttribute('t') ?: 'n'; $vNodeValue = $this->getVNodeValue($node); - if( ($vNodeValue === "") && ($cellType !== self::INLINE_STRING_CELL_TYPE) ) { + if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) { return $vNodeValue; } - switch($cellType) { - case self::INLINE_STRING_CELL_TYPE: + + switch ($cellType) + { + case self::CELL_TYPE_INLINE_STRING: return $this->formatInlineStringCellValue($node, $escaper); - case self::SHARED_STRING_CELL_TYPE: + case self::CELL_TYPE_SHARED_STRING: return $this->formatSharedStringCellValue($vNodeValue, $escaper); - case self::STR_CELL_TYPE: + case self::CELL_TYPE_STR: return $this->formatStrCellValue($vNodeValue, $escaper); - case self::BOOLEAN_CELL_TYPE: + case self::CELL_TYPE_BOOLEAN: return $this->formatBooleanCellValue($vNodeValue); - case self::NUMERIC_CELL_TYPE: + case self::CELL_TYPE_NUMERIC: return $this->formatNumericCellValue($vNodeValue); - case self::DATE_CELL_TYPE: + case self::CELL_TYPE_DATE: return $this->formatDateCellValue($vNodeValue); default: - if($cellType !== self::EMPTY_CELL_TYPE) { - \trigger_error('UNKNOWN CELL TYPE', \E_USER_NOTICE); - } return null; } } From 1e2452934c30683c550780fc5a20005a30ad10f5 Mon Sep 17 00:00:00 2001 From: Lewis Date: Thu, 2 Jul 2015 19:35:23 +0100 Subject: [PATCH 3/4] Additional tests for Cell Types --- tests/Spout/Reader/XLSXTest.php | 2 ++ .../xlsx/sheet_with_all_cell_types.xlsx | Bin 3715 -> 3799 bytes 2 files changed, 2 insertions(+) diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php index 5856887..f1e3d99 100644 --- a/tests/Spout/Reader/XLSXTest.php +++ b/tests/Spout/Reader/XLSXTest.php @@ -129,6 +129,8 @@ class XLSXTest extends \PHPUnit_Framework_TestCase \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), 10, 10.43, null, + 'weird string', // valid 'str' string + null, // invalid date ], ['', '', '', '', '', '', '', '', ''], ]; diff --git a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx index 17b730308375b7b8415829ea2acaeb2691428570..ec109dead2a74a7381ee454a72473355fc65212f 100644 GIT binary patch delta 544 zcmZpcy)L_9DQErS8IRn~bK0=xF)}b*VrF2FV31*`$k8v)FUl^?NKGv%)(2CDdKI}j zp&^_M%t5tNL%o5xw1S&~k>v$Y3yUHH14HlRK>tGqBCYp*|L}X?cfYR4GChxl)uqx0bGT{=av+?^zy$ySMYs|9qEQed_r8de?WM32UxssjU$g;15}4ywc3D zGV1JK!H;W%wS;$u2~6hSkTPe^{Eu_Z)|YWVuu;*Fp1ig=)n%pc_5hCCmsK@lBC~@z z+V`h9=Vm&jY+ZRGQ-Z}giesgRbH|x_iK@NRG<`~cg*6oM9&yl@k+}2ad9M?(t&sTitT`%Zh)i)=t-&&Lub3 zasC$0^3K$K_MDcb95=%K0@mH0GJXGUKmM<4yzR4JZ;p~@%Xz@=Z2w|?ggsZJiG*#r z;xpNfbN9@17iXoty>sW$?F$~&;(ztN=1TK#P2Uz{r?cCpe81k#eU|IAZZ?`ovfWL1 zxBJZZc6P;3)uNfMfod``RhCE3$aW@lEC_ld&)2Qjf4oposH*8mruDr!|MnbvBD`6i zn}=CVj#&g4_#6zF(I^2FXJ7!vD#Mb-$$@+_dZ8h#kk~_uxd3lgHjq+2AWQ>BXEZm6 F2LRqZ)nEVs delta 438 zcmcaE+bp|bDW}l+(8F%!yb;NMj0_Bs%nS??43qo$r0RPo2l^i}5NW;d`-k8AzWa4W zm+5&dtVh&^kBHxYRhHNry?cx3m;2Q>ub40{tA72`^8Vi3_dlL2Ue_cmp}p0MQ`$v^ zVd`tARk>dCu6KW!#d3`^w8Kl=o+y)^Tp4ChkU#~MzA(G@OyaW7-=(I!R$$Y5xS>A! z)G?tPy#xK*OVbW@<}VcOy~iHWF6f_Q{;Am}wc^H<2eLb+D0IHx)NwiXSZ3X&`q#gg zu3FNho^GI?dn`6BQo=9UPnL=Al;o+o{*`}szj|}y{I5U7_v5$xvq%b9Y#`NsvEXc* z=b1?tzk16jU2J}_w)Q=H==JRP52x8Tp0nB^^=+qojm?&sKeN8>usT=J Date: Mon, 6 Jul 2015 12:10:41 +0100 Subject: [PATCH 4/4] Update XLSX.php --- src/Spout/Reader/XLSX.php | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 406b10f..5865d20 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -232,12 +232,12 @@ class XLSX extends AbstractReader } /** - * Returns the cell String value associated to the given XML node where string is inline. + * Returns the cell's string value from a node's nested value node * * @param \DOMNode $node * @return string The value associated with the cell */ - protected function getVNodeValue(&$node) + protected function getVNodeValue($node) { // for cell types having a "v" tag containing the value. // if not, the returned value should be empty string. @@ -249,13 +249,13 @@ class XLSX extends AbstractReader } /** - * Returns the cell String value associated to the given XML node where string is inline. + * Returns the cell String value where string is inline. * * @param \DOMNode $node * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ - protected function formatInlineStringCellValue($node, &$escaper) + protected function formatInlineStringCellValue($node, $escaper) { // inline strings are formatted this way: // [INLINE_STRING] @@ -266,13 +266,13 @@ class XLSX extends AbstractReader } /** - * Returns the cell String value associated to the given XML node where string is shared in shared-strings file. + * Returns the cell String value from shared-strings file using nodeValue index. * * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ - protected function formatSharedStringCellValue($nodeValue, &$escaper) + protected function formatSharedStringCellValue($nodeValue, $escaper) { // shared strings are formatted this way: // [SHARED_STRING_INDEX] @@ -283,13 +283,13 @@ class XLSX extends AbstractReader } /** - * Returns the cell String value associated to the given XML node where string is stored in value node. + * Returns the cell String value, where string is stored in value node. * * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ - protected function formatStrCellValue($nodeValue, &$escaper) + protected function formatStrCellValue($nodeValue, $escaper) { $escapedCellValue = trim($nodeValue); $cellValue = $escaper->unescape($escapedCellValue); @@ -297,7 +297,7 @@ class XLSX extends AbstractReader } /** - * Returns the cell Numeric value associated to the given XML node. + * Returns the cell Numeric value from string of nodeValue. * * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper @@ -310,7 +310,7 @@ class XLSX extends AbstractReader } /** - * Returns the cell Boolean value associated to the given XML node. + * Returns the cell Boolean value from a specific node's Value. * * @param string $nodeValue * @return bool The value associated with the cell @@ -323,7 +323,7 @@ class XLSX extends AbstractReader } /** - * Returns the cell Date value associated to the given XML node. + * Returns a cell's PHP Date value, associated to the given stored nodeValue. * * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper @@ -340,7 +340,7 @@ class XLSX extends AbstractReader } /** - * Returns the (unescaped) cell value associated to the given XML node. + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node * @param \Box\Spout\Common\Escaper\XLSX $escaper