From 6e11a043c1b7ab8985ef78058df6c2a464862a88 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Fri, 27 Mar 2015 16:54:56 -0700 Subject: [PATCH] Add support for multiline strings Escaped line feed characters in shared strings before processing them. This makes every string remain on one single line and therefore allow fast retrieval Replaced usages of "\n" by PHP_EOL Added test for multiline strings --- .../Helper/XLSX/SharedStringsHelper.php | 36 ++++++++++++++++-- src/Spout/Writer/Internal/XLSX/Workbook.php | 1 - src/Spout/Writer/Internal/XLSX/Worksheet.php | 3 +- .../Helper/XLSX/SharedStringsHelperTest.php | 21 +++++++++- tests/Spout/Writer/CSVTest.php | 2 +- ...e_sheet_with_shared_multiline_strings.xlsx | Bin 0 -> 3789 bytes 6 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 tests/resources/xlsx/one_sheet_with_shared_multiline_strings.xlsx diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php index b1a239c..89f0004 100644 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php +++ b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php @@ -31,6 +31,9 @@ class SharedStringsHelper */ const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; + /** Value to use to escape the line feed character ("\n") */ + const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; + /** @var string Path of the XLSX file being read */ protected $filePath; @@ -80,7 +83,6 @@ class SharedStringsHelper * Please note that SimpleXML does not provide such a functionality but since it is faster * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. * - * @param string $filePath * @return void * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read */ @@ -120,7 +122,12 @@ class SharedStringsHelper } $unescapedTextValue = $escaper->unescape($textValue); - $this->writeSharedStringToTempFile($unescapedTextValue, $sharedStringIndex); + + // The shared string retrieval logic expects each cell data to be on one line only + // Encoding the line feed character allows to preserve this assumption + $lineFeedEncodedTextValue = $this->escapeLineFeed($unescapedTextValue); + + $this->writeSharedStringToTempFile($lineFeedEncodedTextValue, $sharedStringIndex); $sharedStringIndex++; @@ -246,7 +253,8 @@ class SharedStringsHelper $sharedString = null; if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { - $sharedString = $this->inMemoryTempFileContents[$indexInFile]; + $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; + $sharedString = $this->unescapeLineFeed($escapedSharedString); } if (!$sharedString) { @@ -256,6 +264,28 @@ class SharedStringsHelper return rtrim($sharedString, PHP_EOL); } + /** + * Escapes the line feed character (\n) + * + * @param string $unescapedString + * @return string + */ + private function escapeLineFeed($unescapedString) + { + return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString); + } + + /** + * Unescapes the line feed character (\n) + * + * @param string $escapedString + * @return string + */ + private function unescapeLineFeed($escapedString) + { + return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString); + } + /** * Deletes the created temporary folder and all its contents * diff --git a/src/Spout/Writer/Internal/XLSX/Workbook.php b/src/Spout/Writer/Internal/XLSX/Workbook.php index 3f8cca4..e6ca0db 100644 --- a/src/Spout/Writer/Internal/XLSX/Workbook.php +++ b/src/Spout/Writer/Internal/XLSX/Workbook.php @@ -5,7 +5,6 @@ namespace Box\Spout\Writer\Internal\XLSX; use Box\Spout\Writer\Exception\SheetNotFoundException; use Box\Spout\Writer\Helper\XLSX\FileSystemHelper; use Box\Spout\Writer\Helper\XLSX\SharedStringsHelper; -use Box\Spout\Writer\Helper\XLSX\ZipHelper; use Box\Spout\Writer\Sheet; /** diff --git a/src/Spout/Writer/Internal/XLSX/Worksheet.php b/src/Spout/Writer/Internal/XLSX/Worksheet.php index 326c41b..4a13dea 100644 --- a/src/Spout/Writer/Internal/XLSX/Worksheet.php +++ b/src/Spout/Writer/Internal/XLSX/Worksheet.php @@ -42,7 +42,8 @@ EOD; /** * @param \Box\Spout\Writer\Sheet $externalSheet The associated "external" sheet - * @param string $tempFolder Temporary folder where the files to create the XLSX will be stored + * @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored + * @param \Box\Spout\Writer\Helper\XLSX\SharedStringsHelper $sharedStringsHelper Helper for shared strings * @param bool $shouldUseInlineStrings Whether inline or shared strings should be used * @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing */ diff --git a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php b/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php index 110c04d..c868863 100644 --- a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php +++ b/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php @@ -46,7 +46,7 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase $this->assertEquals(1, count($filesInTempFolder), 'One temp file should have been created in the temp folder.'); $tempFileContents = file_get_contents($filesInTempFolder[0]); - $tempFileContentsPerLine = explode("\n", $tempFileContents); + $tempFileContentsPerLine = explode(PHP_EOL, $tempFileContents); $this->assertEquals('s1--A1', $tempFileContentsPerLine[0]); $this->assertEquals('s1--E5', $tempFileContentsPerLine[24]); @@ -96,4 +96,23 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase $sharedString = $this->sharedStringsHelper->getStringAtIndex(24); $this->assertEquals('s1--E5', $sharedString); } + + /** + * @return void + */ + public function testGetStringAtIndexShouldWorkWithMultilineStrings() + { + $resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals("s1\nA1", $sharedString); + + $sharedString = $sharedStringsHelper->getStringAtIndex(24); + $this->assertEquals("s1\nE5", $sharedString); + + $sharedStringsHelper->cleanup(); + } } diff --git a/tests/Spout/Writer/CSVTest.php b/tests/Spout/Writer/CSVTest.php index 0d8478b..bfbc71f 100644 --- a/tests/Spout/Writer/CSVTest.php +++ b/tests/Spout/Writer/CSVTest.php @@ -135,6 +135,6 @@ class CSVTest extends \PHPUnit_Framework_TestCase private function trimWrittenContent($writtenContent) { // remove line feeds and UTF-8 BOM - return trim($writtenContent, "\n" . CSV::UTF8_BOM); + return trim($writtenContent, PHP_EOL . CSV::UTF8_BOM); } } diff --git a/tests/resources/xlsx/one_sheet_with_shared_multiline_strings.xlsx b/tests/resources/xlsx/one_sheet_with_shared_multiline_strings.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bfe9e8baf4e9801c19f9460ef0ece8c9df163820 GIT binary patch literal 3789 zcmai13pmql8y~Xi#YhWDj*&x|!xS&&7-DPLGMVX|Ugp@;(RMZNU3PmL6 za?WX}oDX}Q8D8ICVxsT+p6l9m?b>typ69uL_u+RBZN#z#1_FWDK*k9cN5D?wJqRWc zC}Il;v>o{Mq>dZG8%OXyZRYQe^Rkljb3I!Sqh(Yq5Bo$5?M(S0<9dKgtd0OFx(`is zj3^%xXp-zMP|0V zA##42Wu1O)zU07(*TG}KR_;Gek8MSAYVK))*W1fOM~7OS<*ncQ3Q9|fNY&pO%L|<5 zdZijj+HN@MQY$?K#-iI_YCiPsHwquy6?R5Qd|XuJO=vki${-y|t3hd(xP0 zLMU6V@|N;Dk>#zs-4!wPy#(objmqKO=wBp*KoAm958!oefq`$QJ#lBf;Hx$(fjFll zoYe>H*)oGb>}xi1%a%{0jg3m=xkgKvH}1T3De4AMW`EVq;DF4`Eq?_Z9TrnuG_Q~|@RxDQx_5xt zT7s92Pk&AamHwpJG@L}&}hxT-H z-$)zmSW{;Ku>CiX!`$o5Y~0$L2 z0D4rGKJ)7JqrQ=oK1KI4hUs_od_pkdHko}nda{}*(QjFe?-vPg8#1zsyY#f3rZ_XK z&)tO2n@Z_)vzc#YcF9Cl`QwmNdkrWxC3+rUnewbMPXvOawm5D$M={7v-k>k9YIR{jXB9_SKGvik+C4i>~Rl zdYi}cdxn7Oj`i~bZjJz-ZR=gNbMwS4`|5f_3so!v6slx!IEl#q!ojr87gB9ot^=Ui=YV|%O>-fYY)E$@A=XbTyyHwMo%SFwEUXGch`WlEgp z>wcUQ1$oBn%uhNiB`H^f8k?{GUOCxlcH6^(8QqTNLHP$<*a}gTZIDN{ z&Q1ve9QS3Y>Cgp_7!^d;wI7R5w-1^4qQ*hHD^!oR>`2EDThc4`eM zoNA+Y1t>)(LMhu)8SK=U-!pQ7!@0oNuQLAYtAb*jx3D;A;WMI|nEC*k6VjQ_4egJV zr1D(hDi3eB=;!jI`B=WASag^U$kve#*&ZGgG}QRuX$E;i-Ei5KfKc^>o7iPwe4t*_s!WY zbWln)f!!FRHjwJ(rY^ul zx2|4TP^HG9v74>5J;xk+4qz%jCEjureIy*@?rjiBJ|c(bl)L3o@R#Lr8d#v1Eivui{Tq1y$R=O}_JA%$d!t7@|qyF^aMoGkKIaFck#@MY`grfk}dS0{LB3mdOZ)iEigYWaHqaYNv^6+mVG!8eD z5d#+4p5qfr!Cc=1gIpEQ)2YQ&jP z1TprpZQ>H}zR9q+MGsFc;2?MmuJmJ!4&LU>K`kRCIJ(Zco>QVy?Qgz=wk((V!dv6@ zq1%;rj776C$3@uM<&Ojkh^O0_9C&ep0uPtVNY(N+?`NGJ`*lZ>14a^%#k*e zE^E~}NuPPcKSiH{H7}l8T&v_2!Q^s?DgFd-s|@hWwZifG+Pg;9GAY*NI5K0={fz-;1D7;d?(G5ubL6Ki=<&W&+P8(s7lCje!tXDrnjm5 z)nYqL@p7%D1nt_ds)CQ^%)X{PtM+lp*77_mpU%9RSDU*Rl;N|N6AtISQ5mtsbD3Cx zn;>A~;NN8X6QZzOm$9+kCKu&lVM__K(6lA1kSx_(A5u;3Y6{=8!EvWZxZ>#3#~rGo ziEr9wKaHq{d(MKg<_24x&`rvEefw^I+=qFK$kU^FP5(O`kZXHm0zv`6+ zoqG9VmN4abg}$g(UScD4#-+vzn`!d)_CPFDHa071vb$mQ&Ct{6yxNAB>es$Y(CqKG zDA>5d%jL}*W1L+oKAYgrb7W=29v<#&G^U;SdI|fU|cU0%G=CFqh+uCOkNjPs%NOLZDLL52glXpM)HaP(cRg02+Sv>5Ny7}zDb zGR(c;qjAhrGxpNx_hLpN^zcff+o%}Da8$A$tHPHjB47dD63zNheoCQ4f|KOM#|Zf4 z5HMKbO9`;Ev&5ZMDKf%=_n(o6x3xGBN|!?%fUPPKl2R30FkOOoVd>Mc1)wD`0p((CzZ^i+eqrW^xG8PVq^Qubye(A=sl7Wd>!~B$~|8?2&{KZ(e0k76gW53_G zaUNs93fIGaO=pZtS896Wh{Hf<0?M*>>FU74fNv&fS=<@xAK>rJPpMyh?PkF5PHtoU z3HURq8^xb-kIH)3`gXK|<3GzL;~YI8zJC%?hQ}*}Z4?^D3CAHTC#=cNM(D==$$-9P z`w3nDdn1FtO`(lgf95