From a24e794177462506fb707596235675d83ffddd4a Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 23 May 2016 13:36:07 -0700 Subject: [PATCH 1/3] Update README.md (#228) Fixed the amount of memory used by Spout, as it used to take into account the memory used by PHPUnit. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a34c365..a2054be 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License](https://poser.pugx.org/box/spout/license)](https://packagist.org/packages/box/spout) Spout is a PHP library to read and write spreadsheet files (CSV, XLSX and ODS), in a fast and scalable way. -Contrary to other file readers or writers, it is capable of processing very large files while keeping the memory usage really low (less than 10MB). +Contrary to other file readers or writers, it is capable of processing very large files while keeping the memory usage really low (less than 3MB). Join the community and come discuss about Spout: [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/box/spout?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) @@ -301,7 +301,7 @@ For information, the performance tests take about 30 minutes to run (processing ## Frequently Asked Questions -#### How can Spout handle such large data sets and still use less than 10MB of memory? +#### How can Spout handle such large data sets and still use less than 3MB of memory? When writing data, Spout is streaming the data to files, one or few lines at a time. That means that it only keeps in memory the few rows that it needs to write. Once written, the memory is freed. From 2c80b1f23a12e179e4adbef4de2130dc516e81ee Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 23 May 2016 14:15:48 -0700 Subject: [PATCH 2/3] XLSX Reader should add a space between text nodes (#229) When a cell contains multiple text nodes, the cell value is currently obtained by concatenating the value of each text node. Instead, values should still be concatenated but a space should be added in between. --- .../Reader/XLSX/Helper/SharedStringsHelper.php | 11 +++++++++-- .../XLSX/Helper/SharedStringsHelperTest.php | 16 ++++++++++++++++ ...taining_text_and_hyperlink_in_same_cell.xlsx | Bin 0 -> 5127 bytes 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 tests/resources/xlsx/one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php index 6aafb52..1ba4e6a 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -100,11 +100,16 @@ class SharedStringsHelper // removes nodes that should not be read, like the pronunciation of the Kanji characters $cleanNode = $this->removeSuperfluousTextNodes($node); - // find all text nodes 't'; there can be multiple if the cell contains formatting + // find all text nodes "t"; there can be multiple if the cell contains formatting $textNodes = $cleanNode->xpath('//ns:t'); $textValue = ''; - foreach ($textNodes as $textNode) { + foreach ($textNodes as $nodeIndex => $textNode) { + if ($nodeIndex !== 0) { + // add a space between each "t" node + $textValue .= ' '; + } + if ($this->shouldPreserveWhitespace($textNode)) { $textValue .= $textNode->__toString(); } else { @@ -200,6 +205,8 @@ class SharedStringsHelper { $tagsToRemove = [ 'rPh', // Pronunciation of the text + 'pPr', // Paragraph Properties / Previous Paragraph Properties + 'rPr', // Run Properties for the Paragraph Mark / Previous Run Properties for the Paragraph Mark ]; foreach ($tagsToRemove as $tagToRemove) { diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php index a72d19a..31f9c71 100644 --- a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php @@ -82,6 +82,22 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase $sharedStringsHelper->cleanup(); } + /** + * @return void + */ + public function testGetStringAtIndexShouldWorkWithStringsContainingTextAndHyperlinkInSameCell() + { + $resourcePath = $this->getResourcePath('one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('go to https://github.com please', $sharedString); + + $sharedStringsHelper->cleanup(); + } + /** * @return void */ diff --git a/tests/resources/xlsx/one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx b/tests/resources/xlsx/one_sheet_with_shared_strings_containing_text_and_hyperlink_in_same_cell.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..08527df059d8834bb505d187725ae18d47f69979 GIT binary patch literal 5127 zcmaJ_1yodRx26Y28U`r=L2@LeBn6~q96-7h0f&$phfe8|7L_gu0qGWyMu9;>Lb^*D zDFxxq==Xncz4v#Ywaz+ct+V%8`+4_%cBn&f@u;u}2??=06w8#cF34r{voR8G4Nu9KQ|@mmMVVBYKe=)k( zqW3_2?qN1VZSbSXM;-yT_lHA}SoGnyXTl$JH^1ihuWCvXIphnI3VK*U70)^=(Ga!$qt0Ji3pS*)@i=#~ zK0IyBZhiAvlCQwoHa>-w=D7R5koD`XyYJft+=(Nf;$o1hC}?6{N8{jy{Wqk@FCt}T zkA!2OiqL>*w(yfR9D~JcUkNdX&>ENz&R)M6*|g&NY!# z<9$zL$FZEEA-_%>w?fwQbV4dVhYTGq0~E&&nZmHZi<|n~%Kg|k=qyB8i@5lR>*F>2 zpWxUm1jc0>9gFG__!YS3e20%b)SlKHGiJq zHL9gqR1WFAd92N#E+S4MSBt&G9955HJ68G8aBIg(T2^25n!niUheu*+n^s7rx@sm? z&x7|AHgf}yDD~-@wf2B5V_|gBVK4j_6)l%#gbJEv@YA3ux>ZBvwhb7p&u&5~#yH}lDa#9QO9lBBz9|xr=$9n*^34?OT%ll+ zn7uBvC{kZDxDLP^7htEl0)alN{l5k_9s0OjZTNmCy^B54+SK0O8gpd72_OcB3Gtw< zpCHA=y!3`Z_U9n+XC-DM#3aVAJ<|9;5?|^;GREBO*N8Hm0rLq0 z_H4f|N-fH;T>Zk@RqUUpA>wjr$k?25%l)$brSU>_@AZ@G-*&4L6bg%{COvU5$f#|9 zcOO9e&GuJhNPi=95t9hbPP=h*7$1W1rfhkfjmAqDYRUrfR)WM}pW=kd^fa?>M^;n) zJdET7!}=NBqd)5JE%l&2&m9;+w+K6cg{IUbxJ$lWPS*x`&8HbZH^;eA*K*ek0!^K< zyX{_WQFGWHBn*2*z1I~n2btbH^h(lIcaRt)<_wD)8)WDdl9(1Da5VvAz0GFAZ*D8D zReviVpLEvT2j@cSt(~a*w~(~?EGVzrve9+Fe5Rr7`ju|i^|R-aaKCB|DxuLc`{$4= zZSP?)d-i(#3nPPcMX=Y8Sg&A)JXT(|Z@@^hPkv9*b^!s3U0H$)HdobfR1aM0g|E(~ zh|DQ~7m!KxJaAMWL(xzOXV{5xjIG2QrPR?WH*f0vzTEi!Fkbsj4i_(Ke4UD>PD}Ak zFBfKKP=^iOCzhpay^LeB?KUDQ0>xAvncz8fF()Km{)oX~^jQvTb@}OD9Q~VPL7Th* zH)C~pE@f8VJ-rweQ=26}oTc|645p;=8%sh@eIIhl^aB*AT6~mgIAvD-m~{f2BN};^ z9~aBAaKyGU6B_D?%_}|ItmjsF{O&B9ol6im#XC7J-i4pVTxLowQ$;Z4j=v=+lE9>< zme!Ld!k%E29cG2-6i>*OQPClBQdJx1ueMxaa`QoYWDKQBgci2++yc}hQJ#>^smS!{ z%Vys%V?rkbrC)oBPAVtdzi}Y(@6d5@bhCjwV3KJgUerK7!-2+m;*N8Tz0=Q zMRG$>+nQTD(V1DK>Yjk!?Xj9hZ>EkX>smuE_e!)NyiIFS!LLY_5q{m;o&A*Bi7MoC z{h?9hpwPPsALTt|qGZy{vWUsKW;OcAMzrR;S2WXMJ*)R+^JH(6iMIs`3|;B{tRf4| zRC}a=FWH&J9qOCYC3T+*qG2oXVuJqo6a~ISDF$pS3x~eG$MUMhCqh-O=qS}sEa~9_ zLR1hRJpCp;46&}@JA#wR{M@uP!Ia~UueYpP0B&3LtY6$ftX+kW`7gn042=>hFq2pQPo{W2&`4YZa+?xqvjh=5Ui#&ERQ&yF1 ze%MH91=;JD6`-P|XkW*6eO|~rkZVC!MyM5uE2|Q=J|I($`vAwX&d1Foef`kaB=p6E zr+}a%+#4d#fG;Tp*65L4kNtROjEJb$F(ckJxjWt|>KaVj0{2ORgDq4`;?S-#g?%5iw5VZ!*8l8NpYnzZ(^Ec# zK$6H=sQ0X4(WV`lC}fmix+yDPHd;5M{DCM!-0p*eUghgWUjltsHhTK<8TvGO{_Hk< zQ=On>ly=C%tyOts*>=2;VjiysrRth{(CTNOgKnz2BHxNNcIL<^<+CptTZNfU>bgqr zPh5S2vW4`XnrN}Z7Te98D|wdOi?k3&B!?OEjY%FE3yWAE3wH{fkMS@-aDo`KYtU}; z{I%hs`9DCMLlB`2b@i^(Sko*FsgXA5|| zwOwg&r3%s_ob7N1%W7rrtsZ#dyG*~tI$FBEEUy0HiO)lunie+@2*>9)B_KT8HKi*V+vFdHnH)fjx^Z5w zYmT8u{g5YM;poBf97^xjd5R&9mXI+4?GwCb{!Lw83U1SKtAJOjTUF_tVj-(_PV*rA z(bTV3^P7ErVnw@xsW=(N9(bo~P=}H{1S3P)1N8hfN?mpE#(NjedI|!hScb>_zzkY zh|LNzSd&)eXseQ~Mqq&on4>|WycMMuHsmdVvG*&|KlsiEd!}Xsx+>C{76+Fn*!5G0 z^mb2X*q41jpdLVgw|lb6qd#$aKep)Y-`>bBPMmz(B+hcn3I2A_+jj14X_zYnYPzuR zy|AZw31-w=1}yZvvr%g>faQM`N^JPCd(K#4EOO(D-C-l^LkBa>p`x-54m zOqU&}GB`d4TY~`BLOd}`PW8F1NK2y9sQjrerOX)#Iq#M|4TELev__GA5(D-&1tec! zamQNIn*jW3YkITKiG41h72zSsMs+M?aifdhvSBf{W=iY}URLu|1>RNiNv;QyM%GV^ zRb<}su-{0#;qMn@8tU{ViWgNLw1V6h+R6P{BY=#lClWr0pkc$Oqot$IEx<>DW?4_; z*o2|SD^Ks{T`3McLeMNT_o|0Dw=ed(*-r7zpEEM;{fe)!?hfHW;xY$+{O;a#;S;{&t&gEpQ9LBfIm2l zczx7;^K?ZMdi!C$@?GMgxMF%~Yz$>NGEa=8 zF@)U^CksW7`+?|gA6itFR0tp+wI%ga;?e7+cy1-?D0=B;Y1P?O=ex(`89Y1<*8@-< z)2Pc?iFWTq3~^)ki>T)_sc6MEBAmJn>QPL)h5{+!`QJsb;6`h2sA1u=s*nL<$1osM z(@sx|SuP9H6E6P&a3!AG{g?kB|JOze@~9_FZgQ+Q(QdqSIRn zxo^i?b8BpUKf$(Or1AAv`l7v`(in>!XZRZ*(3&0XOdk?N$np1*f0wy+thw$^>J`pC z`c(V58U}gVz!0ljXw*mXFu{tALxpv|V!60uz^qvQw_I4b{Hb_x8{kMJMHd9~%F(bFpW}3{~esg#XLV#S!aI z<%`V-rma2~CBQG`^CtUG)r;jD)2*Bf82Imh|JJ+wY49Q+VXFSQ2%%3E4G6}Oe+-`Y z0DoG!n4%c&I+qHP-yZ$RWPe(?7;2bQK9^-QzW+~Rt3&b86pDofL|@UE&hA$<{15GR BaCiU! literal 0 HcmV?d00001 From 2ed30321b43c619ba7a008e778e402097c7e1849 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Wed, 25 May 2016 19:59:18 -0700 Subject: [PATCH 3/3] ODS Writer should accept associative arrays (#232) --- src/Spout/Writer/ODS/Internal/Worksheet.php | 10 ++++++++-- tests/Spout/Writer/CSV/WriterTest.php | 14 ++++++++++++++ tests/Spout/Writer/ODS/WriterTest.php | 21 ++++++++++++++++++++- tests/Spout/Writer/XLSX/WriterTest.php | 21 ++++++++++++++++++++- 4 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/Spout/Writer/ODS/Internal/Worksheet.php b/src/Spout/Writer/ODS/Internal/Worksheet.php index f0544f0..7a0df1a 100644 --- a/src/Spout/Writer/ODS/Internal/Worksheet.php +++ b/src/Spout/Writer/ODS/Internal/Worksheet.php @@ -134,6 +134,10 @@ class Worksheet implements WorksheetInterface */ public function addRow($dataRow, $style) { + // $dataRow can be an associative array. We need to transform + // it into a regular array, as we'll use the numeric indexes. + $dataRowWithNumericIndexes = array_values($dataRow); + $styleIndex = ($style->getId() + 1); // 1-based $cellsCount = count($dataRow); $this->maxNumColumns = max($this->maxNumColumns, $cellsCount); @@ -144,10 +148,12 @@ class Worksheet implements WorksheetInterface $nextCellIndex = 1; for ($i = 0; $i < $cellsCount; $i++) { - $currentCellValue = $dataRow[$currentCellIndex]; + $currentCellValue = $dataRowWithNumericIndexes[$currentCellIndex]; // Using isset here because it is way faster than array_key_exists... - if (!isset($dataRow[$nextCellIndex]) || $currentCellValue !== $dataRow[$nextCellIndex]) { + if (!isset($dataRowWithNumericIndexes[$nextCellIndex]) || + $currentCellValue !== $dataRowWithNumericIndexes[$nextCellIndex]) { + $numTimesValueRepeated = ($nextCellIndex - $currentCellIndex); $data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated); diff --git a/tests/Spout/Writer/CSV/WriterTest.php b/tests/Spout/Writer/CSV/WriterTest.php index fce430f..ee343c2 100644 --- a/tests/Spout/Writer/CSV/WriterTest.php +++ b/tests/Spout/Writer/CSV/WriterTest.php @@ -74,6 +74,20 @@ class WriterTest extends \PHPUnit_Framework_TestCase $this->assertContains(EncodingHelper::BOM_UTF8, $writtenContent, 'The CSV file should contain a UTF-8 BOM'); } + /** + * @return void + */ + public function testWriteShouldSupportAssociativeArrays() + { + $allRows = [ + ['foo' => 'csv--11', 'bar' => 'csv--12'], + ]; + $writtenContent = $this->writeToCsvFileAndReturnWrittenContent($allRows, 'csv_from_associative_arrays.csv'); + $writtenContent = $this->trimWrittenContent($writtenContent); + + $this->assertEquals('csv--11,csv--12', $writtenContent, 'Values from associative arrays should be written'); + } + /** * @return void */ diff --git a/tests/Spout/Writer/ODS/WriterTest.php b/tests/Spout/Writer/ODS/WriterTest.php index f00f485..4c67e4e 100644 --- a/tests/Spout/Writer/ODS/WriterTest.php +++ b/tests/Spout/Writer/ODS/WriterTest.php @@ -23,7 +23,7 @@ class WriterTest extends \PHPUnit_Framework_TestCase public function testAddRowShouldThrowExceptionIfCannotOpenAFileForWriting() { $fileName = 'file_that_wont_be_written.ods'; - $this->createUnwritableFolderIfNeeded($fileName); + $this->createUnwritableFolderIfNeeded(); $filePath = $this->getGeneratedUnwritableResourcePath($fileName); $writer = WriterFactory::create(Type::ODS); @@ -177,6 +177,25 @@ class WriterTest extends \PHPUnit_Framework_TestCase } } + /** + * @return void + */ + public function testAddRowShouldSupportAssociativeArrays() + { + $fileName = 'test_add_row_should_support_associative_arrays.ods'; + $dataRows = [ + ['foo' => 'ods--11', 'bar' => 'ods--12'], + ]; + + $this->writeToODSFile($dataRows, $fileName); + + foreach ($dataRows as $dataRow) { + foreach ($dataRow as $cellValue) { + $this->assertValueWasWritten($fileName, $cellValue); + } + } + } + /** * @return void */ diff --git a/tests/Spout/Writer/XLSX/WriterTest.php b/tests/Spout/Writer/XLSX/WriterTest.php index 7e10498..e74cd9c 100644 --- a/tests/Spout/Writer/XLSX/WriterTest.php +++ b/tests/Spout/Writer/XLSX/WriterTest.php @@ -21,7 +21,7 @@ class WriterTest extends \PHPUnit_Framework_TestCase public function testAddRowShouldThrowExceptionIfCannotOpenAFileForWriting() { $fileName = 'file_that_wont_be_written.xlsx'; - $this->createUnwritableFolderIfNeeded($fileName); + $this->createUnwritableFolderIfNeeded(); $filePath = $this->getGeneratedUnwritableResourcePath($fileName); $writer = WriterFactory::create(Type::XLSX); @@ -232,6 +232,25 @@ class WriterTest extends \PHPUnit_Framework_TestCase } } + /** + * @return void + */ + public function testAddRowShouldSupportAssociativeArrays() + { + $fileName = 'test_add_row_should_support_associative_arrays.xlsx'; + $dataRows = [ + ['foo' => 'xlsx--11', 'bar' => 'xlsx--12'], + ]; + + $this->writeToXLSXFile($dataRows, $fileName, $shouldUseInlineStrings = true); + + foreach ($dataRows as $dataRow) { + foreach ($dataRow as $cellValue) { + $this->assertInlineDataWasWrittenToSheet($fileName, 1, $cellValue); + } + } + } + /** * @return void */