diff --git a/src/Spout/Common/Escaper/XLSX.php b/src/Spout/Common/Escaper/XLSX.php index b5719a0..8ca317f 100644 --- a/src/Spout/Common/Escaper/XLSX.php +++ b/src/Spout/Common/Escaper/XLSX.php @@ -14,15 +14,23 @@ class XLSX implements EscaperInterface { use Singleton; - /** @var string[] Control characters to be escaped */ + /** @var string Regex pattern to detect control characters that need to be escaped */ + protected $escapableControlCharactersPattern; + + /** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */ protected $controlCharactersEscapingMap; + /** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */ + protected $controlCharactersEscapingReverseMap; + /** * Initializes the singleton instance */ protected function init() { + $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern(); $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap(); + $this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap); } /** @@ -53,6 +61,20 @@ class XLSX implements EscaperInterface return $unescapedString; } + /** + * @return string Regex pattern containing all escapable control characters + */ + protected function getEscapableControlCharactersPattern() + { + // control characters values are from 0 to 1F (hex values) in the ASCII table + // some characters should not be escaped though: "\t", "\r" and "\n". + return '[\x00-\x08' . + // skipping "\t" (0x9) and "\n" (0xA) + '\x0B-\x0C' . + // skipping "\r" (0xD) + '\x0E-\x1F]'; + } + /** * Builds the map containing control characters to be escaped * mapped to their escaped values. @@ -66,14 +88,14 @@ class XLSX implements EscaperInterface protected function getControlCharactersEscapingMap() { $controlCharactersEscapingMap = []; - $whitelistedControlCharacters = ["\t", "\r", "\n"]; // control characters values are from 0 to 1F (hex values) in the ASCII table - for ($charValue = 0x0; $charValue <= 0x1F; $charValue++) { - if (!in_array(chr($charValue), $whitelistedControlCharacters)) { + for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) { + $character = chr($charValue); + if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) { $charHexValue = dechex($charValue); $escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_'; - $controlCharactersEscapingMap[$escapedChar] = chr($charValue); + $controlCharactersEscapingMap[$escapedChar] = $character; } } @@ -96,7 +118,15 @@ class XLSX implements EscaperInterface protected function escapeControlCharacters($string) { $escapedString = $this->escapeEscapeCharacter($string); - return str_replace(array_values($this->controlCharactersEscapingMap), array_keys($this->controlCharactersEscapingMap), $escapedString); + + // if no control characters + if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) { + return $escapedString; + } + + return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) { + return $this->controlCharactersEscapingReverseMap[$matches[0]]; + }, $escapedString); } /** @@ -126,6 +156,7 @@ class XLSX implements EscaperInterface protected function unescapeControlCharacters($string) { $unescapedString = $string; + foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) { // only unescape characters that don't contain the escaped escape character for now $unescapedString = preg_replace("/(?