Add support for multiline strings

Escaped line feed characters in shared strings before processing them.
This makes every string remain on one single line and therefore allow
fast retrieval
Replaced usages of "\n" by PHP_EOL
Added test for multiline strings
This commit is contained in:
Adrien Loison 2015-03-27 16:54:56 -07:00
parent c24cdbb9be
commit 6e11a043c1
6 changed files with 56 additions and 7 deletions

View File

@ -31,6 +31,9 @@ class SharedStringsHelper
*/
const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
/** Value to use to escape the line feed character ("\n") */
const ESCAPED_LINE_FEED_CHARACTER = '_x000A_';
/** @var string Path of the XLSX file being read */
protected $filePath;
@ -80,7 +83,6 @@ class SharedStringsHelper
* Please note that SimpleXML does not provide such a functionality but since it is faster
* and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose.
*
* @param string $filePath
* @return void
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
*/
@ -120,7 +122,12 @@ class SharedStringsHelper
}
$unescapedTextValue = $escaper->unescape($textValue);
$this->writeSharedStringToTempFile($unescapedTextValue, $sharedStringIndex);
// The shared string retrieval logic expects each cell data to be on one line only
// Encoding the line feed character allows to preserve this assumption
$lineFeedEncodedTextValue = $this->escapeLineFeed($unescapedTextValue);
$this->writeSharedStringToTempFile($lineFeedEncodedTextValue, $sharedStringIndex);
$sharedStringIndex++;
@ -246,7 +253,8 @@ class SharedStringsHelper
$sharedString = null;
if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) {
$sharedString = $this->inMemoryTempFileContents[$indexInFile];
$escapedSharedString = $this->inMemoryTempFileContents[$indexInFile];
$sharedString = $this->unescapeLineFeed($escapedSharedString);
}
if (!$sharedString) {
@ -256,6 +264,28 @@ class SharedStringsHelper
return rtrim($sharedString, PHP_EOL);
}
/**
* Escapes the line feed character (\n)
*
* @param string $unescapedString
* @return string
*/
private function escapeLineFeed($unescapedString)
{
return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString);
}
/**
* Unescapes the line feed character (\n)
*
* @param string $escapedString
* @return string
*/
private function unescapeLineFeed($escapedString)
{
return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString);
}
/**
* Deletes the created temporary folder and all its contents
*

View File

@ -5,7 +5,6 @@ namespace Box\Spout\Writer\Internal\XLSX;
use Box\Spout\Writer\Exception\SheetNotFoundException;
use Box\Spout\Writer\Helper\XLSX\FileSystemHelper;
use Box\Spout\Writer\Helper\XLSX\SharedStringsHelper;
use Box\Spout\Writer\Helper\XLSX\ZipHelper;
use Box\Spout\Writer\Sheet;
/**

View File

@ -42,7 +42,8 @@ EOD;
/**
* @param \Box\Spout\Writer\Sheet $externalSheet The associated "external" sheet
* @param string $tempFolder Temporary folder where the files to create the XLSX will be stored
* @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored
* @param \Box\Spout\Writer\Helper\XLSX\SharedStringsHelper $sharedStringsHelper Helper for shared strings
* @param bool $shouldUseInlineStrings Whether inline or shared strings should be used
* @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing
*/

View File

@ -46,7 +46,7 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase
$this->assertEquals(1, count($filesInTempFolder), 'One temp file should have been created in the temp folder.');
$tempFileContents = file_get_contents($filesInTempFolder[0]);
$tempFileContentsPerLine = explode("\n", $tempFileContents);
$tempFileContentsPerLine = explode(PHP_EOL, $tempFileContents);
$this->assertEquals('s1--A1', $tempFileContentsPerLine[0]);
$this->assertEquals('s1--E5', $tempFileContentsPerLine[24]);
@ -96,4 +96,23 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase
$sharedString = $this->sharedStringsHelper->getStringAtIndex(24);
$this->assertEquals('s1--E5', $sharedString);
}
/**
* @return void
*/
public function testGetStringAtIndexShouldWorkWithMultilineStrings()
{
$resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx');
$sharedStringsHelper = new SharedStringsHelper($resourcePath);
$sharedStringsHelper->extractSharedStrings();
$sharedString = $sharedStringsHelper->getStringAtIndex(0);
$this->assertEquals("s1\nA1", $sharedString);
$sharedString = $sharedStringsHelper->getStringAtIndex(24);
$this->assertEquals("s1\nE5", $sharedString);
$sharedStringsHelper->cleanup();
}
}

View File

@ -135,6 +135,6 @@ class CSVTest extends \PHPUnit_Framework_TestCase
private function trimWrittenContent($writtenContent)
{
// remove line feeds and UTF-8 BOM
return trim($writtenContent, "\n" . CSV::UTF8_BOM);
return trim($writtenContent, PHP_EOL . CSV::UTF8_BOM);
}
}