From 8bac924d48dd9faa3cc4d8b75d841ccf2be0c11a Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Wed, 3 Jun 2015 10:54:53 -0700 Subject: [PATCH] Add support for more cell types Added proper support for booleans, dates, numbers, errors. Added unescaping of the read string. Fixed a bug when cells did not have any values => now returns empty string. --- src/Spout/Reader/XLSX.php | 77 +++++++++++++----- tests/Spout/Reader/XLSXTest.php | 21 +++++ .../xlsx/sheet_with_all_cell_types.xlsx | Bin 0 -> 3715 bytes 3 files changed, 78 insertions(+), 20 deletions(-) create mode 100644 tests/resources/xlsx/sheet_with_all_cell_types.xlsx diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index adb9544..83bee9d 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -177,6 +177,7 @@ class XLSX extends AbstractReader throw new BadUsageException('You must call nextSheet() before calling hasNextRow() or nextRow()'); } + $escaper = new \Box\Spout\Common\Escaper\XLSX(); $isInsideRowTag = false; $rowData = []; @@ -188,6 +189,7 @@ class XLSX extends AbstractReader $lastCellIndex = $matches[1]; $this->numberOfColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; } + } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'row') { // Start of the row description $isInsideRowTag = true; @@ -200,32 +202,15 @@ class XLSX extends AbstractReader $numberOfColumnsForRow = intval($numberOfColumnsForRow); } $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'c') { // Start of a cell description $currentCellIndex = $this->xmlReader->getAttribute('r'); $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); + $node = $this->xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node, $escaper); - $hasInlineString = ($this->xmlReader->getAttribute('t') === 'inlineStr'); - $hasSharedString = ($this->xmlReader->getAttribute('t') === 's'); - - if ($hasInlineString) { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $rowData[$currentColumnIndex] = trim($tNode->nodeValue); - } else if ($hasSharedString) { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $vNode = $node->getElementsByTagName('v')->item(0); - $sharedStringIndex = intval($vNode->nodeValue); - $rowData[$currentColumnIndex] = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - } else { - // other values are formatted this way: - // [VALUE] - $vNode = $node->getElementsByTagName('v')->item(0); - $rowData[$currentColumnIndex] = intval($vNode->nodeValue); - } } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === 'row') { // End of the row description // If needed, we fill the empty cells @@ -238,6 +223,58 @@ class XLSX extends AbstractReader return ($rowData !== []) ? $rowData : null; } + /** + * Returns the (unescaped) cell value associated to the given XML node. + * + * @param \DOMNode $node + * @param \Box\Spout\Common\Escaper\XLSX $escaper + * @return string|int|float|bool|null The value associated with the cell (null when the cell has an error) + */ + protected function getCellValue($node, $escaper) + { + $cellValue = ''; + + // Default cell type is "n" + $cellType = $node->getAttribute('t') ?: 'n'; + + if ($cellType === 'inlineStr') { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName('t')->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + } else { + // all other cell types should have a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName('v')->item(0); + + if ($vNode !== null) { + if ($cellType === 's') { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($vNode->nodeValue); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $escaper->unescape($escapedCellValue); + } else if ($cellType === 'b') { + // !! is similar to boolval() + $cellValue = !!$vNode->nodeValue; + } else if ($cellType === 'n') { + $nodeValue = $vNode->nodeValue; + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + } else if ($cellType === 'd') { + $cellValue = new \DateTime($vNode->nodeValue); + } else if ($cellType === 'e') { + $cellValue = null; + } else if ($cellType === 'str') { + $escapedCellValue = trim($vNode->nodeValue); + $cellValue = $escaper->unescape($escapedCellValue); + } + } + } + + return $cellValue; + } + /** * Closes the reader. To be used after reading the file. * diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php index 24aa89c..f611db4 100644 --- a/tests/Spout/Reader/XLSXTest.php +++ b/tests/Spout/Reader/XLSXTest.php @@ -114,6 +114,27 @@ class XLSXTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadShouldSupportAllCellTypes() + { + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); + + $expectedRows = [ + [ + 's1--A1', 's1--A2', + false, true, + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'), + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), + 10, 10.43, + null, + ], + ['', '', '', '', '', '', '', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ diff --git a/tests/resources/xlsx/sheet_with_all_cell_types.xlsx b/tests/resources/xlsx/sheet_with_all_cell_types.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..17b730308375b7b8415829ea2acaeb2691428570 GIT binary patch literal 3715 zcmai12{e>#8y-W3FqUlDC2Pf$DPt-7l3_-cY*B{E7|h7dpeQ7XBq>W-QXgN+AcRUI zU)JnP$x_Ll&z>#+ix|=W|Iaz^IdkT{ujhI0>t3$=HA2$Tae_b~29RN*xi&p+vL8+Z z0!7e)Kzo3Htu=`RPb|Untf`+H*26~D*VUygP8~^sasD$N(v$jC#`PHMz9s^sJRO?k z7*RFC*Cx?dhA1SCBR^?rTC~D~km&@l&qt=lRvh-DVgo11)+K$^KIo2fC(Y z$*R_Iz_H(*=h6`&Z_@~gN;fg_A$le4P}bfbC&zM4ZWb2Bst*-d1(zln2WJeQ>jA!)v{cCIvT)bVj{ zu&l4zyB^<<-q8V5ML`omHf~?fPB0pb64`>qG7guZbyt4Bni@ssx_m6M!!e~f!fR{IsmVmiyruO)*b8OA-7?(9ta;z z;oJe>9R>^nF>Tt&u3A2cHAGgx*uus`Rzzz96NlBcKG?(Ys!(H!K~_oSoRaw;KEtiNnhbF z4EVXEK?UvHk)+fboRCWBq+!pUD={}oGKXq!1^H)Z)9v;@IlAx2vRSpPo}UaZ_rWo+ ztvELWkM6uCI`e5i;*~@Yw+eblRXFOKMtQ8wX-Zso9beg;S=%NbJ2_(n_;4WF8@UtR zw$sLWs;#FCc&!BFFxyr$J2$u0Cz?6L>)bPlMYML*}ZzFH={Xvrc|dUZ5#Gb~xQ zKDXo+O{6sY7|a;K&e`Z%Kdixwq+U*ah0X)2 zc?mo$dBLl1MT2%;_oe?J(b6~8ys9&$BP!6K;b>dw+xsO|e%HHm4>pd8?z5PhkFEb^ z^P+_I_Y46wof_l@+#CZQ_N}hk6Wy_^zPjF2M^i+ALX`*#4hHQaU09ao_Xw5;~R!lfQ|^=e&)N zM0bPh87=Ktf{{W9q0ql8Ki|RUsv$yzP1>C zK5{szAu%o%F30Ep&m>n(yR@aehW?W7krI0_bQW)rbECy??^O8;IO`S728lb)mBjQP@dXxZh)zzbx} zz2XEpY{N*~@k zgeQG>yIl19X9=nX!{!1m@yer%75YNEj`ez=?%>IRWQtKZ1a4qf+FK9wr9*H*K`n=0bAw~yN=v5^(n1~L8 zUEcNy+P7tMLr*#=;_H^LG-(eNR2viszed=TrG zfWZ7PK@qL7XT47&ugkx#Zski1xk#+J<5R!yg0fWOq(M9Vsto)P!$l5wB5J;!(iRUgZh@kf00QbChc$mH`|e zfw3orAEMKk^;WNd7r+u`xKQn$HMMa%(VDejhuP*HzE1sElm4ItG1M zTo^de%d9)X#XFTQWT7e(MzXeA_?s!EE$9A}qQO&PN@@?o4LozkT;>6rp6P|TV&0jB z8P4|QGs~N$xUP7t=i?~<0Nfr0cxGMacx&-QMXRGIYRuYmdR=y3k0)m^T}jLMWU6OV zC~)o_qVDltWpeoQE}=Wd9iO9?U5DmZyaTIlDW~Ev!w+mtdt>}>L_h^g$IjljeeJ}^ zNvXIZ7?AeHf96L+isg;*T3ts_6SUI!TP0~sw^p4;MXVEht|~Pl?sB^&OiPd*nX*LA zN(qEU^y=Wgsxk|OwR{O%jJ|G4U@MpA%zJ8okE=E@5C3XqFnZx5Vs|HIG{?iWF(;7! zkRrk6^zwIbm;P0A1Kvl4;&WjxmDWU15<+!i-kQ3;KmZ@Q@lHD z*C2;i3oGi@DfAl{XK0V%5-YMtw_Q~04uNi0h2)+Jo}OWEyD00De6HypQfCXCCX+NW zq}WsDI{xEpdQONS?|}k+Mt-gRpxYxFS+4pi?UDc9qibU{yQ+UcuS(M5NR_PI58Ffq zYj6>Wu{txuGv}mKfgfYRGuls=-XupID{w1zK$;RI{zkun93E^4Y3GkN4qFYURJ^ScEt4x7R8A~ zjFYc6+f+IQHC-H8e3+FHBIyW|bgi^GZ^?_YM+t(V)qAUx(SFM%!=oMV7nVGF&gTC_ zVA5fH;f-!}VJB;28)uNhn8wS4PXfmwX6?@7eetTn^-4v2v&F&zD*1Qk@iRqA^={YD z7cm?dI{0VUtAoXg$GK_}8szt*mZiT;&pmqg66gaoK+XU60T~Fymi32mnYzKwkgpDZ z)P)6tyP++=AG&=Qq+%jAF>B)TzYbfSnyBk8;MJBH>i6@uPf=9Z>#eX~GZppF^+Mi0 zm{8H#KS#4z@JIoF4@#`5L;ned(bwH z|15UYeRP2M{z>|%9FWMlT0raPCM8ziVz*2jY5pvgB&O<>^5DEyc@am@x=s&)cY$^Z% literal 0 HcmV?d00001