Improve XLSX Escaper performance (#305)
This commit is contained in:
parent
5e7a1745ac
commit
435a9a016e
@ -14,15 +14,23 @@ class XLSX implements EscaperInterface
|
||||
{
|
||||
use Singleton;
|
||||
|
||||
/** @var string[] Control characters to be escaped */
|
||||
/** @var string Regex pattern to detect control characters that need to be escaped */
|
||||
protected $escapableControlCharactersPattern;
|
||||
|
||||
/** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
|
||||
protected $controlCharactersEscapingMap;
|
||||
|
||||
/** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
|
||||
protected $controlCharactersEscapingReverseMap;
|
||||
|
||||
/**
|
||||
* Initializes the singleton instance
|
||||
*/
|
||||
protected function init()
|
||||
{
|
||||
$this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
|
||||
$this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
|
||||
$this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -53,6 +61,20 @@ class XLSX implements EscaperInterface
|
||||
return $unescapedString;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string Regex pattern containing all escapable control characters
|
||||
*/
|
||||
protected function getEscapableControlCharactersPattern()
|
||||
{
|
||||
// control characters values are from 0 to 1F (hex values) in the ASCII table
|
||||
// some characters should not be escaped though: "\t", "\r" and "\n".
|
||||
return '[\x00-\x08' .
|
||||
// skipping "\t" (0x9) and "\n" (0xA)
|
||||
'\x0B-\x0C' .
|
||||
// skipping "\r" (0xD)
|
||||
'\x0E-\x1F]';
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the map containing control characters to be escaped
|
||||
* mapped to their escaped values.
|
||||
@ -66,14 +88,14 @@ class XLSX implements EscaperInterface
|
||||
protected function getControlCharactersEscapingMap()
|
||||
{
|
||||
$controlCharactersEscapingMap = [];
|
||||
$whitelistedControlCharacters = ["\t", "\r", "\n"];
|
||||
|
||||
// control characters values are from 0 to 1F (hex values) in the ASCII table
|
||||
for ($charValue = 0x0; $charValue <= 0x1F; $charValue++) {
|
||||
if (!in_array(chr($charValue), $whitelistedControlCharacters)) {
|
||||
for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
|
||||
$character = chr($charValue);
|
||||
if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
|
||||
$charHexValue = dechex($charValue);
|
||||
$escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
|
||||
$controlCharactersEscapingMap[$escapedChar] = chr($charValue);
|
||||
$controlCharactersEscapingMap[$escapedChar] = $character;
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,7 +118,15 @@ class XLSX implements EscaperInterface
|
||||
protected function escapeControlCharacters($string)
|
||||
{
|
||||
$escapedString = $this->escapeEscapeCharacter($string);
|
||||
return str_replace(array_values($this->controlCharactersEscapingMap), array_keys($this->controlCharactersEscapingMap), $escapedString);
|
||||
|
||||
// if no control characters
|
||||
if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
|
||||
return $escapedString;
|
||||
}
|
||||
|
||||
return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) {
|
||||
return $this->controlCharactersEscapingReverseMap[$matches[0]];
|
||||
}, $escapedString);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -126,6 +156,7 @@ class XLSX implements EscaperInterface
|
||||
protected function unescapeControlCharacters($string)
|
||||
{
|
||||
$unescapedString = $string;
|
||||
|
||||
foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
|
||||
// only unescape characters that don't contain the escaped escape character for now
|
||||
$unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
|
||||
|
@ -17,7 +17,11 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
|
||||
return [
|
||||
['test', 'test'],
|
||||
['adam\'s "car"', 'adam's "car"'],
|
||||
["\n", "\n"],
|
||||
["\r", "\r"],
|
||||
["\t", "\t"],
|
||||
[chr(0), '_x0000_'],
|
||||
[chr(4), '_x0004_'],
|
||||
['_x0000_', '_x005F_x0000_'],
|
||||
[chr(21), '_x0015_'],
|
||||
['control '.chr(21).' character', 'control _x0015_ character'],
|
||||
@ -49,7 +53,11 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
|
||||
return [
|
||||
['test', 'test'],
|
||||
['adam's "car"', 'adam\'s "car"'],
|
||||
["\n", "\n"],
|
||||
["\r", "\r"],
|
||||
["\t", "\t"],
|
||||
['_x0000_', chr(0)],
|
||||
['_x0004_', chr(4)],
|
||||
['_x005F_x0000_', '_x0000_'],
|
||||
['_x0015_', chr(21)],
|
||||
['control _x0015_ character', 'control '.chr(21).' character'],
|
||||
|
Loading…
x
Reference in New Issue
Block a user