From fde8a495ca31d66ff2c3a93e4af2505dead8d0ad Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Fri, 14 May 2021 16:10:12 +0200 Subject: [PATCH] Inline strings can contain multiple value nodes We were working under the assumption that XLSX's inline strings only had a single value node (``). This is incorrect. To get the actual value of an inline string node, we need to concatenate the value of all its child nodes. --- composer.json | 3 ++- .../Reader/XLSX/Helper/CellValueFormatter.php | 13 +++++++++---- .../XLSX/Helper/CellValueFormatterTest.php | 8 ++++++-- tests/Spout/Reader/XLSX/ReaderTest.php | 13 +++++++++++++ ...h_multiple_value_nodes_in_inline_strings.xlsx | Bin 0 -> 3779 bytes 5 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 tests/resources/xlsx/sheet_with_multiple_value_nodes_in_inline_strings.xlsx diff --git a/composer.json b/composer.json index 4d9642b..912349d 100644 --- a/composer.json +++ b/composer.json @@ -14,7 +14,8 @@ "require": { "php": ">=7.2.0", "ext-zip": "*", - "ext-xmlreader" : "*" + "ext-xmlreader": "*", + "ext-dom": "*" }, "require-dev": { "phpunit/phpunit": "^8", diff --git a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php index e95f186..ec83681 100644 --- a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php +++ b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php @@ -122,10 +122,15 @@ class CellValueFormatter */ protected function formatInlineStringCellValue($node) { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); - $cellValue = $this->escaper->unescape($tNode->nodeValue); + // inline strings are formatted this way (they can contain any number of nodes): + // [INLINE_STRING][INLINE_STRING_2] + $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE); + + $cellValue = ''; + for ($i = 0; $i < $tNodes->count(); $i++) { + $tNode = $tNodes->item($i); + $cellValue .= $this->escaper->unescape($tNode->nodeValue); + } return $cellValue; } diff --git a/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php b/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php index 34508b0..f981a63 100644 --- a/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php +++ b/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php @@ -186,18 +186,22 @@ class CellValueFormatterTest extends TestCase public function testFormatInlineStringCellValue($value, $expectedFormattedValue) { $nodeListMock = $this->createMock(\DOMNodeList::class); + $nodeListMock + ->expects($this->atLeastOnce()) + ->method('count') + ->willReturn(1); $nodeListMock ->expects($this->atLeastOnce()) ->method('item') ->with(0) - ->will($this->returnValue((object) ['nodeValue' => $value])); + ->willReturn((object) ['nodeValue' => $value]); $nodeMock = $this->createMock(\DOMElement::class); $nodeMock ->expects($this->atLeastOnce()) ->method('getElementsByTagName') ->with(CellValueFormatter::XML_NODE_INLINE_STRING_VALUE) - ->will($this->returnValue($nodeListMock)); + ->willReturn($nodeListMock); $formatter = new CellValueFormatter(null, null, false, false, new Escaper\XLSX()); $formattedValue = \ReflectionHelper::callMethodOnObject($formatter, 'formatInlineStringCellValue', $nodeMock); diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index e9888d8..af7ca37 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -72,6 +72,19 @@ class ReaderTest extends TestCase } } + /** + * @return void + */ + public function testReadShouldSupportInlineStringsWithMultipleValueNodes() + { + $allRows = $this->getAllRowsForFile('sheet_with_multiple_value_nodes_in_inline_strings.xlsx'); + + $expectedRows = [ + ['VALUE 1 VALUE 2 VALUE 3 VALUE 4', 's1 - B1'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ diff --git a/tests/resources/xlsx/sheet_with_multiple_value_nodes_in_inline_strings.xlsx b/tests/resources/xlsx/sheet_with_multiple_value_nodes_in_inline_strings.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2ee574c6198e61d2689ef7a6f1e1916a59e6e645 GIT binary patch literal 3779 zcmaJ@2{@E%8y?0og&~Av8Cxi_k8G7~Y-7!mOp$HuW~>=oWkN@iHFcsoqIA&6P%7jw zQr0L-WUK7!mE9oc8`4<%|M$ASneV#3=YHS!xu5MeHv-dff+*{5x{dC}%g>*Ez`qOD z+1wxN8z5``(+sj?Pktcf3ytF$J-~#&Wt3)88dCN%F41R-Apb_ zdS2Q*f#l%u8j_B5)<2pGB7Cm6)&4|AZOXKXMEX)T=vh$e!q}A8YxSoqotOhc_NIr_ zTzmdUdSrON-V8z6JQIv7&MhcFI{q~xQ1+r1&$K_B-K0j*QL31wQsH{}C;ZUKxTx61 zgkBGr(Kp&aSH_Htm)#$8U}jv{^E@h=DQXfH*2~2V|7j~Ldn{-`zDA}IknmnCg@N_!zz^W z*iqx#UA6SDUkYur)FkG_A0BFqi;=N??NCym+O4RKT-EpYe@n+d(R%xwz(2DJLs!=6 ztuBFgvdDR=y{{Z#PYqy^Z99vOzP=O^?wJM&@1ym~GiwMz8_!D^5Sps>{1_x_3*Y&};mj?hv{h9a;xgt=bfgrR z0rGK`c1*%A=*zk`cv=5-=m^#pmuyqpdGAwvgmJS{d)b@&rL`f~dkc@YObUyfnO#n9 z8nSy;`peb5c?AYs#d+__s58jiVO4;KL0S-qWz!%aFvJ@jKp`T?!URhaVlFr9x}@1{ zLxer@K%(Z^J%)om5a9Cr*m>wMZ4(^{@KGD0|IDo7Mb>ZO5ydeALWs$y{RMH?75}Yk z<4wn3!q(pkZW1xff9MCHpB9>#A6q_>(E7)cC(WM^xp{k|M`a7tx5 zZGq1`(88D)G$Cs%nr6Eop5OY;QX|cjz;EGl^RuX!?C@OltIC{nt7t|K3v|t!R2>gT z2W2fIMOpI?o*!An>ebb_m7TzUa$oJbtqP8)?W6*~$OCpB*nf1pH^2}zz&S*%+ zgq=l^^O^3o6YkHd8W#wXGW#GmJeji>A4%Eu&5;*h^30RxIXl+Qt0JK!~^1|1Z1o%q&4U4|Wm(mXj_ulK!SoSevz(NAri;ax zVX4cSiIq$E)537)nzH=Axqj3gbC4@4ByiD`VCuuaY36^+L=~M%>mi~J>u``htwobC z$B1GkJ=40;amyZTl*$fB;9&!-KHWe1k=&C&O+AOj1>Xc1|gv#_SWZn}| zOsS;1t*hTWkx(q2ZK;?OHD&SX==iHQDmO3_e8PTDRrpw^K!Gy~VP|?%GK!iF(NleK z;)e4hN3GA_e>j%T(U$RMdX2<$+q*}mi?qre=57*sHTHdNV#BwJSj6av@_|sZVUv8U zX+}iIk*SlQtw3+K=YQs_YT|(azXBdgZwG=G;O^*;cCiff_rSPq)>ORK{wg^(-9>yb zq^KlT29fQ-jhAQ!N4a`~8?zgY5m;>Zi`i+|{kL8#HW_UkidPX_3-L73@I!*`#l97G z!4l?2Z5Rje{3{LnzJ2$OK`nY`H5HQ%CGD1%~Q${03-R@VEh6C|HJVJ4_I|22%iRf2s{p6Bs3c1{NF^pk-My&bX%@1Ne*Fdnnv_fAuv zSgA(B=#TXWm?}Q$4B?mNmUm1vKI*JB?isX9s^{6SUd{8IBh+Z)pj`fi{P&>D-Ony^Xz1}*O6qMc}m z+RWRjS2}Fv5CR8`?yVAY??pgk`}N!hG?-yAt>0o+60Tce*eZ{378N+(IoOb1WV zu+v682t+ZO1mo52TVG#!mY6o{)8WdeN$jB@n#cO;;#YP#_bfa1OmkcKhEt5)KBx{X6Lq(Ql7!$G=Wbknz#sc`y3a{%JA3ab? zl;*4nYts@NP}DXe8U!cxT&!k)$u1ey?s?GG5|go9)38{UI^p)XFj5Am`K}N5`9ebY z?2C)vFWUJIod3Shm__qYr|d@;Iq1}Qm#3V4#RdK&hlLJ*y!E{_e1Y}3YIqC>a?Y#e z$T)+odG|BToC_mHSHJT`JHSL|gjHVFbWBzFyg)~uOmrD)5q~07(8_DF*8Z77hd7oPbhf`epO8_Dqbb<=A4T@$143 z5$R)Eh%P(Cfdz5m*AU}JKJK%XhdkF7p(10~tmp%bW}0SCM#0lN&Bl^cN_kK!xt`{n z#>Fyo(%n;p(>YIV1z~VyxMPwr>|7l(MjPXJSnRIH-9}AVz(EfBm!l$KM~Q>>5^&Ll z?1XVLV%CiTVqM!Mn)k#fvSdMK1}FFp;<=bHqO4muo{?Ga4m%z_W72mkDlsWcQo14K zVrX6--TqL+2@!=g>pB^O5J~sKzmL({i*Z4?^_O+b9u*9#RES^XI&L-43W38o>AeEu;ZbwykZldJFb@VNJzI0I$DbTQZw^>Lz@*qNKuofHL_7 z|EVvj=)IAqh&1Xt1NgQf*>>J`w`ijRLN)H&`4sWvEB7?{E>1hiM1 z@N7T<_dwZ^`+!EU6T4lh&~GZ0T`Glov)>O->dk5ZEd=boXOvQR*vlhjeopZ1h mP#4F3YV+9X`R?{X#VG&6{B)ImV^1gu3UUSVM4!@ZK>q;~g@oh) literal 0 HcmV?d00001