From 6c648cf370a37a6f790d6c55e46adf96cbb3ef43 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 24 Jul 2017 23:41:11 +0200 Subject: [PATCH] Fix prefixed shared strings XML file A prefixed sharedStrings.xml file was not properly read, as we were comparing the un-prefixed name with the possible prefixed name. Also, this commit contains a fix for sheets with rows not starting at column A. --- src/Spout/Reader/Wrapper/XMLReader.php | 8 +++++ .../XLSX/Helper/SharedStringsHelper.php | 4 +-- src/Spout/Reader/XLSX/RowIterator.php | 2 +- tests/Spout/Reader/XLSX/ReaderTest.php | 31 ++++++++++++++++++ ...heet_with_prefixed_shared_strings_xml.xlsx | Bin 0 -> 3683 bytes ...eet_with_row_not_starting_at_column_a.xlsx | Bin 0 -> 3613 bytes 6 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 tests/resources/xlsx/sheet_with_prefixed_shared_strings_xml.xlsx create mode 100644 tests/resources/xlsx/sheet_with_row_not_starting_at_column_a.xlsx diff --git a/src/Spout/Reader/Wrapper/XMLReader.php b/src/Spout/Reader/Wrapper/XMLReader.php index 2e20327..08e99fc 100644 --- a/src/Spout/Reader/Wrapper/XMLReader.php +++ b/src/Spout/Reader/Wrapper/XMLReader.php @@ -164,4 +164,12 @@ class XMLReader extends \XMLReader return ($this->nodeType === $nodeType && $currentNodeName === $nodeName); } + + /** + * @return string The name of the current node, un-prefixed + */ + public function getCurrentNodeName() + { + return $this->localName; + } } diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php index 415d5cf..fc04c79 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -99,7 +99,7 @@ class SharedStringsHelper $xmlReader->readUntilNodeFound(self::XML_NODE_SI); - while ($xmlReader->name === self::XML_NODE_SI) { + while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SI) { $this->processSharedStringsItem($xmlReader, $sharedStringIndex); $sharedStringIndex++; @@ -128,7 +128,7 @@ class SharedStringsHelper $xmlReader->next(self::XML_NODE_SST); // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) - while ($xmlReader->name === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) { + while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) { $xmlReader->read(); } diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index 45069b3..e70c617 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -348,7 +348,7 @@ class RowIterator implements IteratorInterface */ protected function isEmptyRow($rowData) { - return (count($rowData) === 1 && $rowData[0] === ''); + return (count($rowData) === 1 && key($rowData) === ''); } /** diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index b9e032e..2ad6834 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -112,6 +112,22 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadShouldSupportPrefixedSharedStringsXML() + { + // The sharedStrings.xml file of this spreadsheet is prefixed. + // For instance, they use "" instead of "", etc. + $allRows = $this->getAllRowsForFile('sheet_with_prefixed_shared_strings_xml.xlsx'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2', 's1--D2', 's1--E2'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ @@ -169,6 +185,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadShouldSupportFilesWithRowsNotStartingAtColumnA() + { + // file where the row starts at column C: + // 0... + $allRows = $this->getAllRowsForFile('sheet_with_row_not_starting_at_column_a.xlsx'); + + $expectedRows = [ + ['', '', 's1--C1', 's1--D1', 's1--E1'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ diff --git a/tests/resources/xlsx/sheet_with_prefixed_shared_strings_xml.xlsx b/tests/resources/xlsx/sheet_with_prefixed_shared_strings_xml.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..33f92f73c85e691e465474e1520cffad5dea0fc2 GIT binary patch literal 3683 zcmai12{@E(7al`~Xe`+hBZ<(L5hEjgWhaIaN|e3H5@RnrGhe3cj3r;luA;;c2}vVg zw#r`7&|)VaLI%zMQiJgS|8re)&3nyz?&mz`e$F}f8AClrCLRz7!~)V!G1X*tm>xhd zfIwkPAkc2$*YoPGST79L%gV&p4dZDcpSQ?ylX#kFb70D-s>XW@4X!x}(kEY3o{Pgw?E$Cd zdFcMpaFZRLdeg6FPFzEzyhD%1Au9#Uk@-%BJg8EcZ-;xo7M{IRWF`VbC?ag)P_PS) zx&$?>jih**V_J&}%#(Nbj*5}}{(q1LEDs{Y=5k`js2Vf&%q(Xb-isAHaKV+&$xQo* zd}eW)PaRg&EFMeEY;u4d51rQUyLl}(&R_a)b9|6rK>^cFzfqvG8jzsH5{94j^kJg7}W3M^1`F4XV`=EzXh?C@q53^Jlu&_}I!BE5OG^S35%w zSGUcy@tp4Js|H>l19F&iqnVAH+e-4L#ny&{k8o;Cvi(qhX(9hrRYOh;?_=rK<*(z1 zkR5@ZIz*i>dwh3j;=FflcK$f^mX4^{Ugg6R`BHlbh9r8HG5E}@Kv3bJF|2{@k@Wc)otKy9ao`G7DdfDhNkQ0-hjFe|aT#Hpgm`+!1~2)a)2XDhvA+;#=hVsI<@ zT(&9rb@Vw&h70Ac@CTPIym~DuF(NVZa7D-maep_<**rCbm?VRk-#yg%%PkuXzH7k1R?@1==aYwZ&SCFnY^0rlD1r>9T+<2ger{g@ZB?0O9SWEFwjZdnpTVe zi9XK%C+`4*?aEb}^NM8`fY$}eRq*FNKh3VXdR(@3b-lcDA-}4(#fdD-9x@qP{a0(C zJ4t;`6B{%!?}5w z`BP4KF;lwXF|Uh~PohRmg`AU=#~15#VLOiYdm6|;c;_p0nE!}PtifK#QZu7bFA~;H zxdV++>hx*^WlD$1lgt&E9XhK9CPLVqs`Y)ElHZn9lQCW*VuYpFh!+2>TxbcduZj~o z93esB4(DjN-fKF{;WO!N{1!<46q;xu(6^o0BN2MHXfLZ#$FzdSbNVEGNPhup?Fz#!lcAgGD#VV6i^3cXP`4U zic}KC@2i`$KdmN5giRH}-q%cYFSX2h)~ zt`u?)oO~=qX<1H0^}^G&RQlPm&*|n5DFL-O00h13SI}O*&Y0C`y^Zu;$@|!=(eHy* zdQC~NJV#ygS#HN6@3un35ms&iCT!<;cMM|0l;klzBOk=^IUulcb$p6^Z!H*)y|*eKi!9HO?J_(b=Q$E|G> z*v=Z^Gr3wJvEJiwGqEJIIf?rnFO5$o859Z`T} zXk)FYswWROY;$VgCElcbj9<}~@dp3(_sQDO-3q)@L>6#Tn5|vaM1Y`Jo{fR#!+8j& z=^dXTwX`2Y!L7K|MnOY_zuOyq<=4cSlHONbO3HfO=0~Bq73G0L{p{LfyaF@XuyZG+ z3I6BJsrT7#cNM13D4`yTkTd&OqMg`t=JO6&^nIhwR|w2fXL-7pFD!53nB!6c`5%B= z1%PLcpB!&2u&5YS1F{Oc=Dbd?&6hvZa@ekA<$N&Fu_za^eu-?kyGNOHy{s2@)9CG| zsAZRt1=^LshIr*n2iwsMOOyUszvwWiaMgrWy5%!RRvvQQHQ|7)=YF%_-`qBfo@~~( z2em=#jRx!eC$yVY`Hzde!&oQB%%yjGiIjgk5E|C6C##lxc9IU(vi4}6s7Q8`323^JQ!U? z1=EXZYm0)e$L#5V!FX+yKX0J$B=2@q?F6`i1HDXOi|s1p$ZGKsTMYndAWZ59Ks>$* zyL2k?4`&&qtB8cWX@69#!uWQi$A3K1?D>Ju(Vi!Q`kS~?(DFsBfx~HuhB7;?9Ufiw ztBn75j_UCU%$G%q5=_-(wGyy4wz9lYJu{i99vR8)9Q)=4vQUZRiF)?)=TQ##+K;)K z9nadnm9R=&0whu1gQdR;5S9evSMF~G&pwc~fEn}fb;Y+At&i?r-yyM2D5(o^6Evnwi#}{%mxc2ak1h0g!fUPU&OFI(#cZE0RlJG}V8B zKOL(AOh4^b!0$nF?N|kDPwHk#ru!+wAveM{_K!^*Z!c!+`)C38yPdSHN4&i%C6Ex z*|S&nkUbwlw)`&|g#Z7a>zZrcYuW5BOh747iOSiG}|u7^F=$wgLj}pvdC^)vqwqkIhF`zSLj<`UwIK3J7?Q@e!RPEI+&od;Zpqa1c7S-eXma&9UAfUsF*+2)Mv{YTcSRONJ@i31d zn1tTzeomwBFleh0K9H5O5n-F5o_ZlOtRU!2scHp zyb<6X1`GnxZrccLSU!o?(tInz5H#(-eyZLlaa>;Mvysx#1>XEG9*9g^++5{ptF=W4 z_xNqyLr$8rjdPa+;EA75<57}jY=)BAR%j-~bD{5o-QRNm%z9?P1%*q%O`{OdYqc6d z3V2gKo@C49205q`^RYuEW87Zn@`sJjz+np+5fjTwa~71;S86`Pa-P@{iWY=s-MI#- ztrn9;C6x0flG5sMP?^A4t-iQ`$S5xX!TP(to;f)bhdtFNxzDd%sTET55WwZ7UH}{O zuu!wAE-9iipAJjD;`L>bL5<0Bg@0A3j5hj{7}MLpUa@$kW1EltoY4S$nA@459qsJ* z(#G^hM_&c-`XZ3S47<%t?d>;`H#xHMHdvHFX`1$@;(H^pZ}Lj#VgByM_8neoo^X*d zi2B*(#lqr;BU4vhD(`1cE+?qC5VT>YIU@xsXXQ|*zT~yPTEmyNW#<$7RTOaZN3xCG z?h3Ey@F@;ZU!eh8=b-96u!#9HYPpSXRU9YigUi6W0{N2aOV3d|1NUgXnolVo!+j7R#5>BX?v>Seg!Sf~Yo0pAtv|OE-T2k0 zw2b`s3<0(LG0FmjIR$)f!OSH=fT_fW!F6V4jZxy-Rbg))Y7#E+qzxNik&@gG-5lfRyd&8>`F zZT?T*0R{(^s{n(maR-3c0LqnpYo9GMH#X3}ja ztW>*mEhtSOL@eJ>oD$bwK0M_|XH}u)UKjtdu!4wn=7I&SHo+Ub(jPzy2z})YkkL@y zUdG$>HNo9_qxA06E`~32^?G#1&b9Jm*;Nf9Y-xF z6KOh_d5m~Yz%E4aIro~Ob?()$6pKfywL?ikqde~h(o48LNOW^+Onxsv7k6=4E_ECp zt*`IL&VNm{`{f*Jcrflbr192ZHecc2&0oSA8(6#6Ojl5`;Z>Km(`8GQ^)<3bq=2A? zfoneUD==pd8|-F8F4|gl#FKOtsLwa$y7ls*nU)%c^Nf}wE-krmQ5r@z3Vi!yX9Rpq zFW+%?PRy5n(A(#E5a&sysb~F9L&L;g*S51K`rovxi*sw_R)74!f%?$&$=UhwCH{!E z<3Fs(ZarXOqwQr}qCbUc_y5~rK;}9ohW1Zw|FomcRFN)|!UnKdg9YBlZSQqtW0AQW z+U9?E!FYs6z6X|8W?fswQdw$atE*xZElsaW$ZLuTqg$+6kMPt8DejU}l@U57n0y2=cmWxcrzlr6c#=oD||I7cs4m`i>P{#$x7K zqrUIUizRIH%kxZ~YuDEHa7=$AhWHNPRvh4&ev9MXMHC((k0i>`DKDyZn+{z{&Y%rQ z&*;%rGb-Y^{$8@-;c@Bw;KFWboc6~r;cK>IOY5#aHFu@caG3E_W8MBp&pROy&hjaf zd&aLVX_$y_1310Y-+0dd_>f|7XS!b10@MPj*8W=UHKkfF&ng4^guPyq79aChr-6tP z=TXg+)%@%fA4o{Q3T{Z24jR-p7_<@*ri*8&uUpIBh^YJD_fxl8i!C(LB7 zlWlXZ&j~>(Nz3RX`QhR}q^s63hBr~=%V1K$?3W?PPRvd?EY^8q3%MYf(+}8` z0{pP;v)zPplUYZ-mxWVe=JkZM<%W znd+2Jx8g#7j8vlNFi7>WVWf5F8kj{Ss~}oh{2HO|wa=S~7XjiYW%($T)SI*(IN;%> zCQDNZChJwQkC(c{P3{-3A_<+M@SollOCYo+8JLOuzmKi}HJhuDu<>=kAiJ}{{!7>x z%SdZo9t%)SQvKhZ?j6%em_kCxHfB@y|Ch6kd4{ys<^KV+W!H4`yT85j4GC6Ac)c6; zYuedyx+PzGM}{4A4$%CzostHK9r%8NHku`At;_!l{9V{=out$52mBu7(LB?L>UA-tX+dMYXlVw)*dd?v=3}=wH;E(A~%1 z0q-YnLk~!6UEU4g5jgn