From 2a5f4f973e8775a4df103d0d5d424f9b31411fd6 Mon Sep 17 00:00:00 2001 From: Bob4ever Date: Thu, 11 Jun 2015 11:29:13 +0200 Subject: [PATCH 1/2] Added option for SharedStringsHelper to turn off the file cache --- .../Helper/XLSX/SharedStringsHelper.php | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php index ab33725..6406a53 100644 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php +++ b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php @@ -34,6 +34,9 @@ class SharedStringsHelper /** Value to use to escape the line feed character ("\n") */ const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; + /** This will increase your memory usage but can improve your execution time */ + static public $KEEP_ALL_STRINGS_IN_MEMORY = false; + /** @var string Path of the XLSX file being read */ protected $filePath; @@ -56,8 +59,8 @@ class SharedStringsHelper * @var string Contents of the temporary file that was last read * @see MAX_NUM_STRINGS_PER_TEMP_FILE */ - protected $inMemoryTempFileContents; - + protected $inMemoryContents; + /** * @param string $filePath Path of the XLSX file being read * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored @@ -141,11 +144,19 @@ class SharedStringsHelper $unescapedTextValue = $escaper->unescape($textValue); - // The shared string retrieval logic expects each cell data to be on one line only - // Encoding the line feed character allows to preserve this assumption - $lineFeedEncodedTextValue = $this->escapeLineFeed($unescapedTextValue); - - $this->writeSharedStringToTempFile($lineFeedEncodedTextValue, $sharedStringIndex); + if (self::$KEEP_ALL_STRINGS_IN_MEMORY) { + if (!Isset($this->inMemoryContents)) { + $this->inMemoryContents = []; + } + + $this->inMemoryContents[$sharedStringIndex] = $unescapedTextValue; + } else { + // The shared string retrieval logic expects each cell data to be on one line only + // Encoding the line feed character allows to preserve this assumption + $lineFeedEncodedTextValue = $this->escapeLineFeed($unescapedTextValue); + + $this->writeSharedStringToTempFile($lineFeedEncodedTextValue, $sharedStringIndex); + } $sharedStringIndex++; @@ -263,25 +274,32 @@ class SharedStringsHelper */ public function getStringAtIndex($sharedStringIndex) { - $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - $indexInFile = $sharedStringIndex % self::MAX_NUM_STRINGS_PER_TEMP_FILE; - - if (!file_exists($tempFilePath)) { - throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); - } - - if ($this->inMemoryTempFilePath !== $tempFilePath) { - // free memory - unset($this->inMemoryTempFileContents); - - $this->inMemoryTempFileContents = explode(PHP_EOL, file_get_contents($tempFilePath)); - $this->inMemoryTempFilePath = $tempFilePath; - } - $sharedString = null; - if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { - $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; - $sharedString = $this->unescapeLineFeed($escapedSharedString); + + if (self::$KEEP_ALL_STRINGS_IN_MEMORY) { + if (isset($this->inMemoryContents) && array_key_exists($sharedStringIndex, $this->inMemoryContents)) { + $sharedString = $this->inMemoryContents[$sharedStringIndex]; + } + } else { + $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); + $indexInFile = $sharedStringIndex % self::MAX_NUM_STRINGS_PER_TEMP_FILE; + + if (!file_exists($tempFilePath)) { + throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); + } + + if ($this->inMemoryTempFilePath !== $tempFilePath) { + // free memory + unset($this->inMemoryContents); + + $this->inMemoryContents = explode(PHP_EOL, file_get_contents($tempFilePath)); + $this->inMemoryTempFilePath = $tempFilePath; + } + + if (array_key_exists($indexInFile, $this->inMemoryContents)) { + $escapedSharedString = $this->inMemoryContents[$indexInFile]; + $sharedString = $this->unescapeLineFeed($escapedSharedString); + } } if ($sharedString === null) { From d52e87d4bde2d5259425a1ba9103bdd37c006d6d Mon Sep 17 00:00:00 2001 From: Bob4ever Date: Fri, 12 Jun 2015 09:53:38 +0200 Subject: [PATCH 2/2] Added method for XLSX reader to disable the sharedStringsFileCache --- .../Helper/XLSX/SharedStringsHelper.php | 32 ++++++++++--------- src/Spout/Reader/XLSX.php | 18 ++++++++++- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php index 6406a53..7ac0ddd 100644 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php +++ b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php @@ -34,8 +34,8 @@ class SharedStringsHelper /** Value to use to escape the line feed character ("\n") */ const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; - /** This will increase your memory usage but can improve your execution time */ - static public $KEEP_ALL_STRINGS_IN_MEMORY = false; + /** Disabling this will increase your memory usage but can improve your execution time */ + protected $useSharedStringsFileCache; /** @var string Path of the XLSX file being read */ protected $filePath; @@ -64,10 +64,12 @@ class SharedStringsHelper /** * @param string $filePath Path of the XLSX file being read * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored + * @param bool|void $useSharedStringsFileCache Disabling this will increase your memory usage but can improve your execution time */ - public function __construct($filePath, $tempFolder = null) + public function __construct($filePath, $tempFolder = null, $useSharedStringsFileCache = true) { $this->filePath = $filePath; + $this->useSharedStringsFileCache = $useSharedStringsFileCache; $rootTempFolder = ($tempFolder) ?: sys_get_temp_dir(); $this->fileSystemHelper = new FileSystemHelper($rootTempFolder); @@ -144,18 +146,18 @@ class SharedStringsHelper $unescapedTextValue = $escaper->unescape($textValue); - if (self::$KEEP_ALL_STRINGS_IN_MEMORY) { - if (!Isset($this->inMemoryContents)) { - $this->inMemoryContents = []; - } - - $this->inMemoryContents[$sharedStringIndex] = $unescapedTextValue; - } else { + if ($this->useSharedStringsFileCache) { // The shared string retrieval logic expects each cell data to be on one line only // Encoding the line feed character allows to preserve this assumption $lineFeedEncodedTextValue = $this->escapeLineFeed($unescapedTextValue); $this->writeSharedStringToTempFile($lineFeedEncodedTextValue, $sharedStringIndex); + } else { + if (!isset($this->inMemoryContents)) { + $this->inMemoryContents = []; + } + + $this->inMemoryContents[$sharedStringIndex] = $unescapedTextValue; } $sharedStringIndex++; @@ -276,11 +278,7 @@ class SharedStringsHelper { $sharedString = null; - if (self::$KEEP_ALL_STRINGS_IN_MEMORY) { - if (isset($this->inMemoryContents) && array_key_exists($sharedStringIndex, $this->inMemoryContents)) { - $sharedString = $this->inMemoryContents[$sharedStringIndex]; - } - } else { + if ($this->useSharedStringsFileCache) { $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); $indexInFile = $sharedStringIndex % self::MAX_NUM_STRINGS_PER_TEMP_FILE; @@ -300,6 +298,10 @@ class SharedStringsHelper $escapedSharedString = $this->inMemoryContents[$indexInFile]; $sharedString = $this->unescapeLineFeed($escapedSharedString); } + } else { + if (is_array($this->inMemoryContents) && array_key_exists($sharedStringIndex, $this->inMemoryContents)) { + $sharedString = $this->inMemoryContents[$sharedStringIndex]; + } } if ($sharedString === null) { diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index 83bee9d..8d4edb9 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -24,6 +24,9 @@ class XLSX extends AbstractReader /** @var string Temporary folder where the temporary files will be created */ protected $tempFolder; + + /** @var bool Disabling this will increase your memory usage but can improve your execution time */ + protected $useSharedStringsFileCache = true; /** @var \ZipArchive */ protected $zip; @@ -55,6 +58,19 @@ class XLSX extends AbstractReader $this->tempFolder = $tempFolder; return $this; } + + /** + * Disabling the shared strings file cache will increase your memory usage but can improve your execution time. + * The shared strings file cache is active by default. + * + * @param bool $useSharedStringsFileCache + * @return XLSX + */ + public function setUseSharedStringsFileCache($useSharedStringsFileCache) + { + $this->useSharedStringsFileCache = $useSharedStringsFileCache; + return $this; + } /** * Opens the file at the given file path to make it ready to be read. @@ -72,7 +88,7 @@ class XLSX extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); + $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder, $this->useSharedStringsFileCache); if ($this->sharedStringsHelper->hasSharedStrings()) { // Extracts all the strings from the worksheets for easy access in the future