Improve XLSX Escaper performance
This commit is contained in:
parent
5e7a1745ac
commit
a0749c63b9
@ -14,15 +14,23 @@ class XLSX implements EscaperInterface
|
|||||||
{
|
{
|
||||||
use Singleton;
|
use Singleton;
|
||||||
|
|
||||||
/** @var string[] Control characters to be escaped */
|
/** @var string Regex pattern to detect control characters that need to be escaped */
|
||||||
|
protected $escapableControlCharactersPattern;
|
||||||
|
|
||||||
|
/** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
|
||||||
protected $controlCharactersEscapingMap;
|
protected $controlCharactersEscapingMap;
|
||||||
|
|
||||||
|
/** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
|
||||||
|
protected $controlCharactersEscapingReverseMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the singleton instance
|
* Initializes the singleton instance
|
||||||
*/
|
*/
|
||||||
protected function init()
|
protected function init()
|
||||||
{
|
{
|
||||||
|
$this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
|
||||||
$this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
|
$this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
|
||||||
|
$this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -53,6 +61,20 @@ class XLSX implements EscaperInterface
|
|||||||
return $unescapedString;
|
return $unescapedString;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string Regex pattern containing all escapable control characters
|
||||||
|
*/
|
||||||
|
protected function getEscapableControlCharactersPattern()
|
||||||
|
{
|
||||||
|
// control characters values are from 0 to 1F (hex values) in the ASCII table
|
||||||
|
// some characters should not be escaped though: "\t", "\r" and "\n".
|
||||||
|
return '[\x00-\x08' .
|
||||||
|
// skipping "\t" (0x9) and "\n" (0xA)
|
||||||
|
'\x0B-\x0C' .
|
||||||
|
// skipping "\r" (0xD)
|
||||||
|
'\x0E-\x1F]';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds the map containing control characters to be escaped
|
* Builds the map containing control characters to be escaped
|
||||||
* mapped to their escaped values.
|
* mapped to their escaped values.
|
||||||
@ -66,14 +88,14 @@ class XLSX implements EscaperInterface
|
|||||||
protected function getControlCharactersEscapingMap()
|
protected function getControlCharactersEscapingMap()
|
||||||
{
|
{
|
||||||
$controlCharactersEscapingMap = [];
|
$controlCharactersEscapingMap = [];
|
||||||
$whitelistedControlCharacters = ["\t", "\r", "\n"];
|
|
||||||
|
|
||||||
// control characters values are from 0 to 1F (hex values) in the ASCII table
|
// control characters values are from 0 to 1F (hex values) in the ASCII table
|
||||||
for ($charValue = 0x0; $charValue <= 0x1F; $charValue++) {
|
for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
|
||||||
if (!in_array(chr($charValue), $whitelistedControlCharacters)) {
|
$character = chr($charValue);
|
||||||
|
if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
|
||||||
$charHexValue = dechex($charValue);
|
$charHexValue = dechex($charValue);
|
||||||
$escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
|
$escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
|
||||||
$controlCharactersEscapingMap[$escapedChar] = chr($charValue);
|
$controlCharactersEscapingMap[$escapedChar] = $character;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +118,15 @@ class XLSX implements EscaperInterface
|
|||||||
protected function escapeControlCharacters($string)
|
protected function escapeControlCharacters($string)
|
||||||
{
|
{
|
||||||
$escapedString = $this->escapeEscapeCharacter($string);
|
$escapedString = $this->escapeEscapeCharacter($string);
|
||||||
return str_replace(array_values($this->controlCharactersEscapingMap), array_keys($this->controlCharactersEscapingMap), $escapedString);
|
|
||||||
|
// if no control characters
|
||||||
|
if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
|
||||||
|
return $escapedString;
|
||||||
|
}
|
||||||
|
|
||||||
|
return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) {
|
||||||
|
return $this->controlCharactersEscapingReverseMap[$matches[0]];
|
||||||
|
}, $escapedString);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -126,6 +156,7 @@ class XLSX implements EscaperInterface
|
|||||||
protected function unescapeControlCharacters($string)
|
protected function unescapeControlCharacters($string)
|
||||||
{
|
{
|
||||||
$unescapedString = $string;
|
$unescapedString = $string;
|
||||||
|
|
||||||
foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
|
foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
|
||||||
// only unescape characters that don't contain the escaped escape character for now
|
// only unescape characters that don't contain the escaped escape character for now
|
||||||
$unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
|
$unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
|
||||||
|
@ -17,7 +17,11 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
|
|||||||
return [
|
return [
|
||||||
['test', 'test'],
|
['test', 'test'],
|
||||||
['adam\'s "car"', 'adam's "car"'],
|
['adam\'s "car"', 'adam's "car"'],
|
||||||
|
["\n", "\n"],
|
||||||
|
["\r", "\r"],
|
||||||
|
["\t", "\t"],
|
||||||
[chr(0), '_x0000_'],
|
[chr(0), '_x0000_'],
|
||||||
|
[chr(4), '_x0004_'],
|
||||||
['_x0000_', '_x005F_x0000_'],
|
['_x0000_', '_x005F_x0000_'],
|
||||||
[chr(21), '_x0015_'],
|
[chr(21), '_x0015_'],
|
||||||
['control '.chr(21).' character', 'control _x0015_ character'],
|
['control '.chr(21).' character', 'control _x0015_ character'],
|
||||||
@ -49,7 +53,11 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
|
|||||||
return [
|
return [
|
||||||
['test', 'test'],
|
['test', 'test'],
|
||||||
['adam's "car"', 'adam\'s "car"'],
|
['adam's "car"', 'adam\'s "car"'],
|
||||||
|
["\n", "\n"],
|
||||||
|
["\r", "\r"],
|
||||||
|
["\t", "\t"],
|
||||||
['_x0000_', chr(0)],
|
['_x0000_', chr(0)],
|
||||||
|
['_x0004_', chr(4)],
|
||||||
['_x005F_x0000_', '_x0000_'],
|
['_x005F_x0000_', '_x0000_'],
|
||||||
['_x0015_', chr(21)],
|
['_x0015_', chr(21)],
|
||||||
['control _x0015_ character', 'control '.chr(21).' character'],
|
['control _x0015_ character', 'control '.chr(21).' character'],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user