From 2c80b1f23a12e179e4adbef4de2130dc516e81ee Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 23 May 2016 14:15:48 -0700 Subject: [PATCH] XLSX Reader should add a space between text nodes (#229) When a cell contains multiple text nodes, the cell value is currently obtained by concatenating the value of each text node. Instead, values should still be concatenated but a space should be added in between. --- .../Reader/XLSX/Helper/SharedStringsHelper.php | 11 +++++++++-- .../XLSX/Helper/SharedStringsHelperTest.php | 16 ++++++++++++++++ ...taining_text_and_hyperlink_in_same_cell.xlsx | Bin 0 -> 5127 bytes 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 tests/resources/xlsx/one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php index 6aafb52..1ba4e6a 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -100,11 +100,16 @@ class SharedStringsHelper // removes nodes that should not be read, like the pronunciation of the Kanji characters $cleanNode = $this->removeSuperfluousTextNodes($node); - // find all text nodes 't'; there can be multiple if the cell contains formatting + // find all text nodes "t"; there can be multiple if the cell contains formatting $textNodes = $cleanNode->xpath('//ns:t'); $textValue = ''; - foreach ($textNodes as $textNode) { + foreach ($textNodes as $nodeIndex => $textNode) { + if ($nodeIndex !== 0) { + // add a space between each "t" node + $textValue .= ' '; + } + if ($this->shouldPreserveWhitespace($textNode)) { $textValue .= $textNode->__toString(); } else { @@ -200,6 +205,8 @@ class SharedStringsHelper { $tagsToRemove = [ 'rPh', // Pronunciation of the text + 'pPr', // Paragraph Properties / Previous Paragraph Properties + 'rPr', // Run Properties for the Paragraph Mark / Previous Run Properties for the Paragraph Mark ]; foreach ($tagsToRemove as $tagToRemove) { diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php index a72d19a..31f9c71 100644 --- a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php @@ -82,6 +82,22 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase $sharedStringsHelper->cleanup(); } + /** + * @return void + */ + public function testGetStringAtIndexShouldWorkWithStringsContainingTextAndHyperlinkInSameCell() + { + $resourcePath = $this->getResourcePath('one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('go to https://github.com please', $sharedString); + + $sharedStringsHelper->cleanup(); + } + /** * @return void */ diff --git a/tests/resources/xlsx/one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx b/tests/resources/xlsx/one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..08527df059d8834bb505d187725ae18d47f69979 GIT binary patch literal 5127 zcmaJ_1yodRx26Y28U`r=L2@LeBn6~q96-7h0f&$phfe8|7L_gu0qGWyMu9;>Lb^*D zDFxxq==Xncz4v#Ywaz+ct+V%8`+4_%cBn&f@u;u}2??=06w8#cF34r{voR8G4Nu9KQ|@mmMVVBYKe=)k( zqW3_2?qN1VZSbSXM;-yT_lHA}SoGnyXTl$JH^1ihuWCvXIphnI3VK*U70)^=(Ga!$qt0Ji3pS*)@i=#~ zK0IyBZhiAvlCQwoHa>-w=D7R5koD`XyYJft+=(Nf;$o1hC}?6{N8{jy{Wqk@FCt}T zkA!2OiqL>*w(yfR9D~JcUkNdX&>ENz&R)M6*|g&NY!# z<9$zL$FZEEA-_%>w?fwQbV4dVhYTGq0~E&&nZmHZi<|n~%Kg|k=qyB8i@5lR>*F>2 zpWxUm1jc0>9gFG__!YS3e20%b)SlKHGiJq zHL9gqR1WFAd92N#E+S4MSBt&G9955HJ68G8aBIg(T2^25n!niUheu*+n^s7rx@sm? z&x7|AHgf}yDD~-@wf2B5V_|gBVK4j_6)l%#gbJEv@YA3ux>ZBvwhb7p&u&5~#yH}lDa#9QO9lBBz9|xr=$9n*^34?OT%ll+ zn7uBvC{kZDxDLP^7htEl0)alN{l5k_9s0OjZTNmCy^B54+SK0O8gpd72_OcB3Gtw< zpCHA=y!3`Z_U9n+XC-DM#3aVAJ<|9;5?|^;GREBO*N8Hm0rLq0 z_H4f|N-fH;T>Zk@RqUUpA>wjr$k?25%l)$brSU>_@AZ@G-*&4L6bg%{COvU5$f#|9 zcOO9e&GuJhNPi=95t9hbPP=h*7$1W1rfhkfjmAqDYRUrfR)WM}pW=kd^fa?>M^;n) zJdET7!}=NBqd)5JE%l&2&m9;+w+K6cg{IUbxJ$lWPS*x`&8HbZH^;eA*K*ek0!^K< zyX{_WQFGWHBn*2*z1I~n2btbH^h(lIcaRt)<_wD)8)WDdl9(1Da5VvAz0GFAZ*D8D zReviVpLEvT2j@cSt(~a*w~(~?EGVzrve9+Fe5Rr7`ju|i^|R-aaKCB|DxuLc`{$4= zZSP?)d-i(#3nPPcMX=Y8Sg&A)JXT(|Z@@^hPkv9*b^!s3U0H$)HdobfR1aM0g|E(~ zh|DQ~7m!KxJaAMWL(xzOXV{5xjIG2QrPR?WH*f0vzTEi!Fkbsj4i_(Ke4UD>PD}Ak zFBfKKP=^iOCzhpay^LeB?KUDQ0>xAvncz8fF()Km{)oX~^jQvTb@}OD9Q~VPL7Th* zH)C~pE@f8VJ-rweQ=26}oTc|645p;=8%sh@eIIhl^aB*AT6~mgIAvD-m~{f2BN};^ z9~aBAaKyGU6B_D?%_}|ItmjsF{O&B9ol6im#XC7J-i4pVTxLowQ$;Z4j=v=+lE9>< zme!Ld!k%E29cG2-6i>*OQPClBQdJx1ueMxaa`QoYWDKQBgci2++yc}hQJ#>^smS!{ z%Vys%V?rkbrC)oBPAVtdzi}Y(@6d5@bhCjwV3KJgUerK7!-2+m;*N8Tz0=Q zMRG$>+nQTD(V1DK>Yjk!?Xj9hZ>EkX>smuE_e!)NyiIFS!LLY_5q{m;o&A*Bi7MoC z{h?9hpwPPsALTt|qGZy{vWUsKW;OcAMzrR;S2WXMJ*)R+^JH(6iMIs`3|;B{tRf4| zRC}a=FWH&J9qOCYC3T+*qG2oXVuJqo6a~ISDF$pS3x~eG$MUMhCqh-O=qS}sEa~9_ zLR1hRJpCp;46&}@JA#wR{M@uP!Ia~UueYpP0B&3LtY6$ftX+kW`7gn042=>hFq2pQPo{W2&`4YZa+?xqvjh=5Ui#&ERQ&yF1 ze%MH91=;JD6`-P|XkW*6eO|~rkZVC!MyM5uE2|Q=J|I($`vAwX&d1Foef`kaB=p6E zr+}a%+#4d#fG;Tp*65L4kNtROjEJb$F(ckJxjWt|>KaVj0{2ORgDq4`;?S-#g?%5iw5VZ!*8l8NpYnzZ(^Ec# zK$6H=sQ0X4(WV`lC}fmix+yDPHd;5M{DCM!-0p*eUghgWUjltsHhTK<8TvGO{_Hk< zQ=On>ly=C%tyOts*>=2;VjiysrRth{(CTNOgKnz2BHxNNcIL<^<+CptTZNfU>bgqr zPh5S2vW4`XnrN}Z7Te98D|wdOi?k3&B!?OEjY%FE3yWAE3wH{fkMS@-aDo`KYtU}; z{I%hs`9DCMLlB`2b@i^(Sko*FsgXA5|| zwOwg&r3%s_ob7N1%W7rrtsZ#dyG*~tI$FBEEUy0HiO)lunie+@2*>9)B_KT8HKi*V+vFdHnH)fjx^Z5w zYmT8u{g5YM;poBf97^xjd5R&9mXI+4?GwCb{!Lw83U1SKtAJOjTUF_tVj-(_PV*rA z(bTV3^P7ErVnw@xsW=(N9(bo~P=}H{1S3P)1N8hfN?mpE#(NjedI|!hScb>_zzkY zh|LNzSd&)eXseQ~Mqq&on4>|WycMMuHsmdVvG*&|KlsiEd!}Xsx+>C{76+Fn*!5G0 z^mb2X*q41jpdLVgw|lb6qd#$aKep)Y-`>bBPMmz(B+hcn3I2A_+jj14X_zYnYPzuR zy|AZw31-w=1}yZvvr%g>faQM`N^JPCd(K#4EOO(D-C-l^LkBa>p`x-54m zOqU&}GB`d4TY~`BLOd}`PW8F1NK2y9sQjrerOX)#Iq#M|4TELev__GA5(D-&1tec! zamQNIn*jW3YkITKiG41h72zSsMs+M?aifdhvSBf{W=iY}URLu|1>RNiNv;QyM%GV^ zRb<}su-{0#;qMn@8tU{ViWgNLw1V6h+R6P{BY=#lClWr0pkc$Oqot$IEx<>DW?4_; z*o2|SD^Ks{T`3McLeMNT_o|0Dw=ed(*-r7zpEEM;{fe)!?hfHW;xY$+{O;a#;S;{&t&gEpQ9LBfIm2l zczx7;^K?ZMdi!C$@?GMgxMF%~Yz$>NGEa=8 zF@)U^CksW7`+?|gA6itFR0tp+wI%ga;?e7+cy1-?D0=B;Y1P?O=ex(`89Y1<*8@-< z)2Pc?iFWTq3~^)ki>T)_sc6MEBAmJn>QPL)h5{+!`QJsb;6`h2sA1u=s*nL<$1osM z(@sx|SuP9H6E6P&a3!AG{g?kB|JOze@~9_FZgQ+Q(QdqSIRn zxo^i?b8BpUKf$(Or1AAv`l7v`(in>!XZRZ*(3&0XOdk?N$np1*f0wy+thw$^>J`pC z`c(V58U}gVz!0ljXw*mXFu{tALxpv|V!60uz^qvQw_I4b{Hb_x8{kMJMHd9~%F(bFpW}3{~esg#XLV#S!aI z<%`V-rma2~CBQG`^CtUG)r;jD)2*Bf82Imh|JJ+wY49Q+VXFSQ2%%3E4G6}Oe+-`Y z0DoG!n4%c&I+qHP-yZ$RWPe(?7;2bQK9^-QzW+~Rt3&b86pDofL|@UE&hA$<{15GR BaCiU! literal 0 HcmV?d00001