diff --git a/src/Spout/Common/Escaper/ODS.php b/src/Spout/Common/Escaper/ODS.php index 9462ab3..5e6b695 100644 --- a/src/Spout/Common/Escaper/ODS.php +++ b/src/Spout/Common/Escaper/ODS.php @@ -14,6 +14,31 @@ class ODS implements EscaperInterface { use Singleton; + /** @var string Regex pattern to detect control characters that need to be escaped */ + protected $escapableControlCharactersPattern; + + /** + * Initializes the singleton instance + */ + protected function init() + { + $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern(); + } + + /** + * @return string Regex pattern containing all escapable control characters + */ + protected function getEscapableControlCharactersPattern() + { + // control characters values are from 0 to 1F (hex values) in the ASCII table + // some characters should not be escaped though: "\t", "\r" and "\n". + return '[\x00-\x08' . + // skipping "\t" (0x9) and "\n" (0xA) + '\x0B-\x0C' . + // skipping "\r" (0xD) + '\x0E-\x1F]'; + } + /** * Escapes the given string to make it compatible with XLSX * @@ -23,12 +48,14 @@ class ODS implements EscaperInterface public function escape($string) { if (defined('ENT_DISALLOWED')) { + // 'ENT_DISALLOWED' ensures that invalid characters in the given document type are replaced. + // Otherwise characters like a vertical tab "\v" will make the XML document unreadable by the XML processor + // @link https://github.com/box/spout/issues/329 return htmlspecialchars($string, ENT_QUOTES | ENT_DISALLOWED); } else { // We are on hhvm or any other engine that does not support ENT_DISALLOWED - // https://github.com/box/spout/issues/329 $escapedString = htmlspecialchars($string, ENT_QUOTES); - $replacedString = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F]/', '�', $escapedString); + $replacedString = preg_replace('/'.$this->escapableControlCharactersPattern.'/', '�', $escapedString); return $replacedString; } } diff --git a/tests/Spout/Common/Escaper/ODSTest.php b/tests/Spout/Common/Escaper/ODSTest.php new file mode 100644 index 0000000..fb5c960 --- /dev/null +++ b/tests/Spout/Common/Escaper/ODSTest.php @@ -0,0 +1,42 @@ +escape($stringToEscape); + + $this->assertEquals($expectedEscapedString, $escapedString, 'Incorrect escaped string'); + } +} diff --git a/tests/Spout/Writer/ODS/WriterTest.php b/tests/Spout/Writer/ODS/WriterTest.php index 7ae6f30..7c4b396 100644 --- a/tests/Spout/Writer/ODS/WriterTest.php +++ b/tests/Spout/Writer/ODS/WriterTest.php @@ -4,7 +4,6 @@ namespace Box\Spout\Writer\ODS; use Box\Spout\Common\Exception\SpoutException; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\TestUsingResource; use Box\Spout\Writer\Common\Helper\ZipHelper; @@ -447,47 +446,6 @@ class WriterTest extends \PHPUnit_Framework_TestCase $this->assertEquals('application/vnd.oasis.opendocument.spreadsheet', $finfo->file($resourcePath)); } - /** - * https://github.com/box/spout/issues/329 - * @return void - */ - public function testGeneratedFileWithIllegalCharsCanBeRead() - { - $fileName = 'test_illegal_characters.ods'; - $dataRows = [ - ['I am a text'], - ['I am a vertical tab:' . "\v"], - ['I am a form feed:' . "\f"], - ]; - - $this->writeToODSFile($dataRows, $fileName); - - $resourcePath = $this->getGeneratedResourcePath($fileName); - - $reader = ReaderFactory::create(Type::ODS); - $reader->open($resourcePath); - - $canBeRead = false; - $rowsRead = []; - try { - foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { - foreach ($sheet->getRowIterator() as $rowIndex => $row) { - $rowsRead[] = $row; - } - } - $canBeRead = true; - $reader->close(); - } catch(\Exception $e) {} - - $this->assertTrue($canBeRead, 'The file with illegal chars can be read'); - $dataRowsExpected = [ - ['I am a text'], - ['I am a vertical tab:�'], - ['I am a form feed:�'], - ]; - $this->assertEquals($dataRowsExpected, $rowsRead, 'Correct rows with unicode replacement are read'); - } - /** * @param array $allRows * @param string $fileName