From 6f0f7c9690ec57a34c0b83183ba43ab08b8e1fe9 Mon Sep 17 00:00:00 2001 From: madflow Date: Wed, 6 Apr 2016 22:00:47 +0200 Subject: [PATCH] Fix #195 --- .../Reader/XLSX/Helper/CellValueFormatter.php | 3 +- tests/Spout/Reader/CSV/ReaderTest.php | 17 +++++++ tests/Spout/Reader/ODS/ReaderTest.php | 17 +++++++ .../XLSX/Helper/CellValueFormatterTest.php | 42 ++++++++++++++++++ tests/Spout/Reader/XLSX/ReaderTest.php | 18 ++++++++ .../csv/sheet_with_untrimmed_strings.csv | 5 +++ .../ods/sheet_with_untrimmed_strings.ods | Bin 0 -> 2454 bytes .../xlsx/sheet_with_untrimmed_strings.xlsx | Bin 0 -> 3223 bytes 8 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 tests/resources/csv/sheet_with_untrimmed_strings.csv create mode 100644 tests/resources/ods/sheet_with_untrimmed_strings.ods create mode 100644 tests/resources/xlsx/sheet_with_untrimmed_strings.xlsx diff --git a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php index c6c40f4..e63384d 100644 --- a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php +++ b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php @@ -118,8 +118,7 @@ class CellValueFormatter // inline strings are formatted this way: // [INLINE_STRING] $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $this->escaper->unescape($escapedCellValue); + $cellValue = $this->escaper->unescape($tNode->nodeValue); return $cellValue; } diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index 1bf0e88..f806fd2 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -384,6 +384,23 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows, 'There should be 3 rows, with equal length'); } + /** + * https://github.com/box/spout/issues/195 + * @return void + */ + public function testReaderShouldNotTrimCellValues() + { + $allRows = $this->getAllRowsForFile('sheet_with_untrimmed_strings.csv'); + + $expectedRows = [ + ['A'], + [' A '], + ["\n\tA\n\t"], + ]; + + $this->assertEquals($expectedRows, $allRows, 'Cell values should not be trimmed'); + } + /** * @param string $fileName * @param string|void $fieldDelimiter diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index e852630..8683459 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -419,6 +419,23 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows, 'There should be 3 rows, with equal length'); } + /** + * https://github.com/box/spout/issues/195 + * @return void + */ + public function testReaderShouldNotTrimCellValues() + { + $allRows = $this->getAllRowsForFile('sheet_with_untrimmed_strings.ods'); + + $expectedRows = [ + ['A'], + [' A '], + ["\n\tA\n\t"], + ]; + + $this->assertEquals($expectedRows, $allRows, 'Cell values should not be trimmed'); + } + /** * @param string $fileName * @return array All the read rows the given file diff --git a/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php b/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php index 68ad980..73863ae 100644 --- a/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php +++ b/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php @@ -123,4 +123,46 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedFormattedValue, $formattedValue); $this->assertEquals($expectedType, gettype($formattedValue)); } + + /** + * @return array + */ + public function dataProviderForTestFormatStringCellValue() + { + return [ + ['A', 'A'], + [' A ', ' A '], + ["\n\tA\n\t", "\n\tA\n\t"], + [' ', ' '], + ]; + } + + /** + * @dataProvider dataProviderForTestFormatStringCellValue + * + * @param string $value + * @param string $expectedFormattedValue + * @return void + */ + public function testFormatInlineStringCellValue($value, $expectedFormattedValue) + { + $nodeListMock = $this->getMockBuilder('DOMNodeList')->disableOriginalConstructor()->getMock(); + $nodeListMock + ->expects($this->atLeastOnce()) + ->method('item') + ->with(0) + ->will($this->returnValue((object)['nodeValue' => $value])); + + $nodeMock = $this->getMockBuilder('DOMElement')->disableOriginalConstructor()->getMock(); + $nodeMock + ->expects($this->atLeastOnce()) + ->method('getElementsByTagName') + ->with(CellValueFormatter::XML_NODE_INLINE_STRING_VALUE) + ->will($this->returnValue($nodeListMock)); + + $formatter = new CellValueFormatter(null, null); + $formattedValue = \ReflectionHelper::callMethodOnObject($formatter, 'formatInlineStringCellValue', $nodeMock); + + $this->assertEquals($expectedFormattedValue, $formattedValue); + } } diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index 65a0069..106a790 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -461,6 +461,24 @@ class ReaderTest extends \PHPUnit_Framework_TestCase } + /** + * https://github.com/box/spout/issues/195 + * @return void + */ + public function testReaderShouldNotTrimCellValues() + { + $allRows = $this->getAllRowsForFile('sheet_with_untrimmed_strings.xlsx'); + + $expectedRows = [ + ['A'], + [' A '], + ["\n\tA\n\t"], + ]; + + $this->assertEquals($expectedRows, $allRows, 'Cell values should not be trimmed'); + } + + /** * @param string $fileName * @return array All the read rows the given file diff --git a/tests/resources/csv/sheet_with_untrimmed_strings.csv b/tests/resources/csv/sheet_with_untrimmed_strings.csv new file mode 100644 index 0000000..4be5db5 --- /dev/null +++ b/tests/resources/csv/sheet_with_untrimmed_strings.csv @@ -0,0 +1,5 @@ +A +" A " +" + A + " diff --git a/tests/resources/ods/sheet_with_untrimmed_strings.ods b/tests/resources/ods/sheet_with_untrimmed_strings.ods new file mode 100644 index 0000000000000000000000000000000000000000..eb55bf87a0987dc823124d9f465f671a6a3b0847 GIT binary patch literal 2454 zcmZ{m2T)Vl9>#+d4WM*cLKW#!A{_()5hV1IAR-E=A(YTVGX|F4S&$+ktaRLvC^btE zm4JXWDG~_Mt1MuGjbu^a1>bw)%FNz-XU^O^_jm4{@BIJY`RuJ&*#rRqz%hXGlO&5o zq+Xgb^G@~q69B;9#~+T03WBqfVu1juV{Gl2NV$>Z1V@P99#uE=e$Z7$%)06<3$zQLRkj`CEE^hajqm;}UwdClm0i7Wo>1fSdj zzU*si!=C{Kh7bpmIYIHS_gr!^7qMfvbz=4&MuCLChE8Rxp`H_}E2UTRN*$ahR-vQx zPQQ;AQh7$bCtc!Ycg^kS-rLe1IgG88JcVj^nRqJK zOnupOgEB8jx2}v+ne}mopb_ns?r3m>8IMWq`(b6#h1J9q!wHR!m^Z@nu)XQA)lX^q z+@|^l<}Nw!^wO`n`oCfn2w1Z<^26}PdU>SQb$la6fnrIVl#a;jv66M=z^SW-17j>1 z)NCN-yeR=QFA{XU&V4#UbiYrnK%{sk$G!jQ3tTkdXQeK-J}G*~REd`h0AOB#n}Go+ zcmV1^tQBWc)&nrFp^kg(ortzctfR0jP8w#oJ-W>*ror39ag_kfk zN@pOIt?3ZQ!B?B)g*CM^*b4t=LxN4L-*TZdt&^!GD{r-??8X{dt@f z%cVb0;e8xYVyT?u@{`FFpH9CzMKP^Hto!0*Htt{f%aEnI!+}epP>sXUxzAgi1%2X^E`@jl3AzM0!*#nGyrIsnt zbqj0l-kPEPNW#tCJ2seNpM_`J=+V1Xt(9$#tce8APmC4k+1w=6%k94NQ-tp`|4PTuF4qF2vZ z2m9*Tk&)La$Z|^N(8Q+j&;3x;6_;Gg><0+bHaMC65Q>UI!b87%2dwulvmKtjJEO3Y zp(S&V8*Y$~g=Xo=N=k9X=8s%}bael{qv>5=STE3GkD_l(^ygAuSUU=^5bwJ+xjFokR}M`cY}x*${85X8o%wK6f*v&%Ok>d zUwXA|AiJG&UWw`-1+2l}6kL8ccQs6fHdV>XrOrTXGYPGs=Zrn)z+M9IBN1<3Ib^3? zt}swDP2Hxd$ORY0h?XQnzC|t^9P~&Ttk0ca z*LqJ2A#SQZ6rO|)4Gl$pC^vyPJOB(bKz{R;MD z3$VTxVKSjWN164U(DNK|Hl$snR_RJGC&38IXg?iV?nxgZv zZC1Eiw(5DDSc$r*Z8vY(z249gWW0;q<1lqNMf{fm z00^;;2h{&E#bN63j{1YT!%QUq*k2EWhpp!an8Ef>BRb3-Uf&;_`_X?{ZF?(rj)T|O OnWq%GMT96z)J39&3DM&( zL|<)VMN9NL#Q#b5{H^)#zIWdFo%hc9?z!K0-~H}I+#@4r0f9glK*AM|H5|XC`Dl=U zKzGSOAZFmJxiZ?#%fZdd+Q8r4!P64z=ju|M+zW3LV^ZBa#&nspG&+PaX-!aI`3>@d zLaPMh3|r?w6!kgumv( zq9bYUMEu9$E3$H_IJY7oRIS_<`jx7x)Eagr|E88>II{tc6>ENpYQ#-6B(rJbki7Kw z>l@^CSp0}e(=Bfv))p75;``lB`zg9k9Qle=wLZyj#RU>L%tKd&*Nnfu*ofG&C_ED8 zG-Ndwx}#*)99{WGo2lsRjO_Nw%=jKJCM0837CIc;^jqj>Man4dnSK6pC4qqo*$<1) ziewn}<@#*C8E7CnRFFLjeFxOO@EaKD`b;2bsu(q(z?(Zr6{@u@+Pck*I4Ar;Djj7hx8|K)r82dFvF~fwv#r&KTN&fiz0_VhZ0xxbX7BZpXDJJwY|J|M zT)(rie?MW_k@s~wa>={DRLLn@=jY&j8)fw>lkK$LH?MB7%L|}r;SDaa*M{P@=Wgst z2M-Zi^5)pj^SwUeo)-gf-5~>k=zs9_^zwIc@H}PvP9N{~3Cz^IC&aXu$`_3^9FH&2 zO5LqaE7YqP>tGlZ1B1(cv!20^{hDtfG|cYE)tB{I8nLQQwyJ!PE0P*SsGQTbLCM7_ zch4~F9CP<8C68%V7HfqVg{_q~^0vc@NAvY?7s(zca@%?7Dn;%V|3b$&0(tRtI^g>C zf)FOWKYwEahAw8xaqFjFK6aJny}C?>`a*5j?F&P&&M6QQ6svtK3{+A0fQ9J7dGF?o zH{|(}H@E|FxmPQgf0Dp6e$c)LzD4hjtN(MDGas$wDN_LND3%|Itd_l#&1o8K55_I< z;z~EQKE~~E>dZZs|0EQE2xck$ePzy%LS<#d+0L%UG0w}3fsNndh4AgvnE4u-*_(0u z%`L-{yTaT@lPTFy?ZYU3zWfCy-9jl(-b)9WFkcf~(R1dthDD{iX5^#cfYr~6x2Qf( zI6jpPq`aMb|F>-=D1@**$oWxb&biC(q_O$k>bss@^C405m$J?=9PC+3)2T<+0gmfs zOB3$4Y%`2?hot9AHYZkT3Ijw_Wh+R$s~aK4(zzBy zGgunNwlE;s%8>a~Qv726Xkk;8nptLD{Fg~{CUWyrVlzeDao<4<^ zZMYJmsWb{nvu)&>AIM_XY4Cmmem4yM;6ovuM>%ATt|=WJ@rcxAQME^}oVLanuK$b3 zOoYAEZsg`%IV@4uI+GIBp6YrRE!uK4$oS_J)IFxz z?bP2K3fu+hjnRE`SH84r1hR4^Mai30A%bT*kCF31C)D5kLIOC|<03h7Fl09(2A&0N z>br%?7AvElD@?ubCM?fYP2@@NYqH@cW9$tx2Y?!mO7j&&&hIOEJWmZVl-sc0bIF(w zsi$PH-DfT+>hFCZd{X}Ai1NIg&Ts>UB0$Ck;9>f^oHp+6r)uiN<7wWCF{!K_V-f;& zcEG&O7;p~73RR?6X_&nhS;mVcXAfL{lgDo3? z8Rb67;Peuj6g{82g=M(*CXn+g@Q+{qk^89ydpbEdczFUefJ6Md7k_x~)&`~n&3bOJ zbnT_^r4&L)OEiY99!!#f|B!S#QkgBAoW#D+t5r7o(u$V~8Ux2$Pgm(`k9UBdkB_#B zrOS-Dr>^fF9<7rLCox3$i04M4``isuUbk=O2&&jpTrhpBBI%p`5cYi3pZg(Qfkemr z%@nbx{yEIcHn-A?WDv(1^etPte(Poot|#&n~Vbbr~p2|@k1wT4+j^|a{~){`lT%K zKftO$*Rd9e>Dn9Sq>3CEv=n$@X(O+g-0*Xa)RR@ZvB^*2C9eBH>iJ2$0yMXr zd9hGbuZX~B-E#gWNU!;PJ(Yn_eyx~P^aLsQa#7F~jn-?NuaZT&)UAtOA=6Y&G~;!K z0w>ge=7P*~gVu*w*F_Wh#UTtUs6PLN&EYDx43lG0wPfiTl?t4$g!9m`WP{YOajQyX z>AIdb~zp;n{Ot=2CgF#0z~ptzsmP?XIDB zP2_oEHZy&f#S&NYz)kbp!8dGzqZ~m~-nIuyDWY3qMnflH#vUMoW;9-b)w>$Mr-vhC z5~pJX7(QhX|3X#H;ut@#Uq8q`ET|WGNoMG>#kr}(Y z(87&79rrP9usL*q?Wk--qZ60><4v8ZH1Z^#9g~`D!JTa*V{wBLii+O!^$NHTzT1w3 zmLl0^J@b;X-lKGdRlmbb>=x<|^%mN^lOTy~TFO|Tak`l4cO2f^IupQPCn04a`}Z9Z z(B%777j}C5!$wIQfOr@B0SAHV!u)~OB-)IK!NiNj8Q2*p(5WB&Z_OZv5)ZgD=rv#+ z{=acY3@0}9GdPOkZ}7kDofu8*oM&i7%76Az;t0h4br#|1M}&WLGvX-3-F+5?@51*y k{-xK6;}EySSsWPkxh6r}qX5zafvAC70Vt6Y&FR~J0p6fJ2LJ#7 literal 0 HcmV?d00001