diff --git a/README.md b/README.md index 951d984..500f7ce 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,6 @@ require_once '[PATH/TO]/src/Spout/Autoloader/autoload.php'; // don't forget to c * PHP version 5.4.0 or higher * PHP extension `php_zip` enabled * PHP extension `php_xmlreader` enabled -* PHP extension `php_simplexml` enabled ## Basic usage diff --git a/composer.json b/composer.json index 5c38dfb..76485c9 100644 --- a/composer.json +++ b/composer.json @@ -14,8 +14,7 @@ "require": { "php": ">=5.4.0", "ext-zip": "*", - "ext-xmlreader" : "*", - "ext-simplexml": "*" + "ext-xmlreader" : "*" }, "require-dev": { "phpunit/phpunit": "^4.8.0" diff --git a/src/Spout/Common/Escaper/ODS.php b/src/Spout/Common/Escaper/ODS.php index 5f8a7c9..4ffd766 100644 --- a/src/Spout/Common/Escaper/ODS.php +++ b/src/Spout/Common/Escaper/ODS.php @@ -26,10 +26,13 @@ class ODS implements EscaperInterface // 'ENT_DISALLOWED' ensures that invalid characters in the given document type are replaced. // Otherwise control characters like a vertical tab "\v" will make the XML document unreadable by the XML processor // @link https://github.com/box/spout/issues/329 - $replacedString = htmlspecialchars($string, ENT_QUOTES | ENT_DISALLOWED); + $replacedString = htmlspecialchars($string, ENT_NOQUOTES | ENT_DISALLOWED); } else { - // We are on hhvm or any other engine that does not support ENT_DISALLOWED - $escapedString = htmlspecialchars($string, ENT_QUOTES); + // We are on hhvm or any other engine that does not support ENT_DISALLOWED. + // + // @NOTE: Using ENT_NOQUOTES as only XML entities ('<', '>', '&') need to be encoded. + // Single and double quotes can be left as is. + $escapedString = htmlspecialchars($string, ENT_NOQUOTES); // control characters values are from 0 to 1F (hex values) in the ASCII table // some characters should not be escaped though: "\t", "\r" and "\n". @@ -52,6 +55,12 @@ class ODS implements EscaperInterface */ public function unescape($string) { - return htmlspecialchars_decode($string, ENT_QUOTES); + // ============== + // = WARNING = + // ============== + // It is assumed that the given string has already had its XML entities decoded. + // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation). + // Therefore there is no need to call "htmlspecialchars_decode()". + return $string; } } diff --git a/src/Spout/Common/Escaper/XLSX.php b/src/Spout/Common/Escaper/XLSX.php index 8ca317f..7c6c61b 100644 --- a/src/Spout/Common/Escaper/XLSX.php +++ b/src/Spout/Common/Escaper/XLSX.php @@ -42,7 +42,9 @@ class XLSX implements EscaperInterface public function escape($string) { $escapedString = $this->escapeControlCharacters($string); - $escapedString = htmlspecialchars($escapedString, ENT_QUOTES); + // @NOTE: Using ENT_NOQUOTES as only XML entities ('<', '>', '&') need to be encoded. + // Single and double quotes can be left as is. + $escapedString = htmlspecialchars($escapedString, ENT_NOQUOTES); return $escapedString; } @@ -55,8 +57,13 @@ class XLSX implements EscaperInterface */ public function unescape($string) { - $unescapedString = htmlspecialchars_decode($string, ENT_QUOTES); - $unescapedString = $this->unescapeControlCharacters($unescapedString); + // ============== + // = WARNING = + // ============== + // It is assumed that the given string has already had its XML entities decoded. + // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation). + // Therefore there is no need to call "htmlspecialchars_decode()". + $unescapedString = $this->unescapeControlCharacters($string); return $unescapedString; } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index ca38e05..46ff33e 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -202,6 +202,7 @@ class RowIterator implements IteratorInterface { $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); + // NOTE: expand() will automatically decode all XML entities of the child nodes $node = $xmlReader->expand(); $currentCellValue = $this->getCellValue($node); diff --git a/src/Spout/Reader/Wrapper/SimpleXMLElement.php b/src/Spout/Reader/Wrapper/SimpleXMLElement.php deleted file mode 100644 index a6f723c..0000000 --- a/src/Spout/Reader/Wrapper/SimpleXMLElement.php +++ /dev/null @@ -1,159 +0,0 @@ -useXMLInternalErrors(); - - try { - $this->simpleXMLElement = new \SimpleXMLElement($xmlData); - } catch (\Exception $exception) { - // if the data is invalid, the constructor will throw an Exception - $this->resetXMLInternalErrorsSetting(); - throw new XMLProcessingException($this->getLastXMLErrorMessage()); - } - - $this->resetXMLInternalErrorsSetting(); - } - - /** - * Returns the attribute for the given name. - * - * @param string $name Attribute name - * @param string|null|void $namespace An optional namespace for the retrieved attributes - * @return string|null The attribute value or NULL if attribute not found - */ - public function getAttribute($name, $namespace = null) - { - $isPrefix = ($namespace !== null); - $attributes = $this->simpleXMLElement->attributes($namespace, $isPrefix); - $attributeValue = $attributes->{$name}; - - return ($attributeValue !== null) ? (string) $attributeValue : null; - } - - /** - * Creates a prefix/ns context for the next XPath query - * @see \SimpleXMLElement::registerXPathNamespace - * - * @param string $prefix The namespace prefix to use in the XPath query for the namespace given in "namespace". - * @param string $namespace The namespace to use for the XPath query. This must match a namespace in - * use by the XML document or the XPath query using "prefix" will not return any results. - * @return bool TRUE on success or FALSE on failure. - */ - public function registerXPathNamespace($prefix, $namespace) - { - return $this->simpleXMLElement->registerXPathNamespace($prefix, $namespace); - } - - /** - * Runs XPath query on XML data - * @see \SimpleXMLElement::xpath - * - * @param string $path An XPath path - * @return SimpleXMLElement[]|bool an array of SimpleXMLElement objects or FALSE in case of an error. - */ - public function xpath($path) - { - $elements = $this->simpleXMLElement->xpath($path); - - if ($elements !== false) { - $wrappedElements = []; - foreach ($elements as $element) { - $wrappedElement = $this->wrapSimpleXMLElement($element); - - if ($wrappedElement !== null) { - $wrappedElements[] = $this->wrapSimpleXMLElement($element); - } - } - - $elements = $wrappedElements; - } - - return $elements; - } - - /** - * Wraps the given element into an instance of the wrapper - * - * @param \SimpleXMLElement $element Element to be wrapped - * @return SimpleXMLElement|null The wrapped element or NULL if the given element is invalid - */ - protected function wrapSimpleXMLElement(\SimpleXMLElement $element) - { - $wrappedElement = null; - $elementAsXML = $element->asXML(); - - if ($elementAsXML !== false) { - $wrappedElement = new SimpleXMLElement($elementAsXML); - } - - return $wrappedElement; - } - - /** - * Remove all nodes matching the given XPath query. - * It does not map to any \SimpleXMLElement function. - * - * @param string $path An XPath path - * @return void - */ - public function removeNodesMatchingXPath($path) - { - $nodesToRemove = $this->simpleXMLElement->xpath($path); - - foreach ($nodesToRemove as $nodeToRemove) { - unset($nodeToRemove[0]); - } - } - - /** - * Returns the first child matching the given tag name - * - * @param string $tagName - * @return SimpleXMLElement|null The first child matching the tag name or NULL if none found - */ - public function getFirstChildByTagName($tagName) - { - $doesElementExist = isset($this->simpleXMLElement->{$tagName}); - - /** @var \SimpleXMLElement $realElement */ - $realElement = $this->simpleXMLElement->{$tagName}; - - return $doesElementExist ? $this->wrapSimpleXMLElement($realElement) : null; - } - - /** - * @return string - */ - public function __toString() - { - return $this->simpleXMLElement->__toString(); - } -} diff --git a/src/Spout/Reader/Wrapper/XMLReader.php b/src/Spout/Reader/Wrapper/XMLReader.php index ffe6818..2e20327 100644 --- a/src/Spout/Reader/Wrapper/XMLReader.php +++ b/src/Spout/Reader/Wrapper/XMLReader.php @@ -1,6 +1,7 @@ getSharedStringsFilePath(); if ($xmlReader->open($sharedStringsFilePath) === false) { @@ -91,14 +100,14 @@ class SharedStringsHelper $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); - $xmlReader->readUntilNodeFound('si'); + $xmlReader->readUntilNodeFound(self::XML_NODE_SI); - while ($xmlReader->name === 'si') { - $this->processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper); + while ($xmlReader->name === self::XML_NODE_SI) { + $this->processSharedStringsItem($xmlReader, $sharedStringIndex); $sharedStringIndex++; - // jump to the next 'si' tag - $xmlReader->next('si'); + // jump to the next '' tag + $xmlReader->next(self::XML_NODE_SI); } $this->cachingStrategy->closeCache(); @@ -127,19 +136,19 @@ class SharedStringsHelper */ protected function getSharedStringsUniqueCount($xmlReader) { - $xmlReader->next('sst'); + $xmlReader->next(self::XML_NODE_SST); // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) - while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== XMLReader::ELEMENT) { + while ($xmlReader->name === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) { $xmlReader->read(); } - $uniqueCount = $xmlReader->getAttribute('uniqueCount'); + $uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_UNIQUE_COUNT); // some software do not add the "uniqueCount" attribute but only use the "count" one // @see https://github.com/box/spout/issues/254 if ($uniqueCount === null) { - $uniqueCount = $xmlReader->getAttribute('count'); + $uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT); } return ($uniqueCount !== null) ? intval($uniqueCount) : null; @@ -160,99 +169,54 @@ class SharedStringsHelper /** * Processes the shared strings item XML node which the given XML reader is positioned on. * - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "" node * @param int $sharedStringIndex Index of the processed shared strings item - * @param \Box\Spout\Common\Escaper\XLSX $escaper Helper to escape values * @return void */ - protected function processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper) + protected function processSharedStringsItem($xmlReader, $sharedStringIndex) { - $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); - $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); + $sharedStringValue = ''; - // removes nodes that should not be read, like the pronunciation of the Kanji characters - $cleanNode = $this->removeSuperfluousTextNodes($node); + // NOTE: expand() will automatically decode all XML entities of the child nodes + $siNode = $xmlReader->expand(); + $textNodes = $siNode->getElementsByTagName(self::XML_NODE_T); - // find all text nodes "t"; there can be multiple if the cell contains formatting - $textNodes = $cleanNode->xpath('//ns:t'); + foreach ($textNodes as $textNode) { + if ($this->shouldExtractTextNodeValue($textNode)) { + $textNodeValue = $textNode->nodeValue; + $shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode); - $textValue = $this->extractTextValueForNodes($textNodes); - $unescapedTextValue = $escaper->unescape($textValue); + $sharedStringValue .= ($shouldPreserveWhitespace) ? $textNodeValue : trim($textNodeValue); + } + } - $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); + $this->cachingStrategy->addStringForIndex($sharedStringValue, $sharedStringIndex); } /** - * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. - * This is to simplify the parsing of the subtree. + * Not all text nodes' values must be extracted. + * Some text nodes are part of a node describing the pronunciation for instance. + * We'll only consider the nodes whose parents are "" or "". * - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader - * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement - * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read + * @param \DOMElement $textNode Text node to check + * @return bool Whether the given text node's value must be extracted */ - protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) + protected function shouldExtractTextNodeValue($textNode) { - $node = null; - try { - $node = new SimpleXMLElement($xmlReader->readOuterXml()); - } catch (XMLProcessingException $exception) { - throw new IOException("The sharedStrings.xml file contains unreadable data [{$exception->getMessage()}]."); - } - - return $node; - } - - /** - * Removes nodes that should not be read, like the pronunciation of the Kanji characters. - * By keeping them, their text content would be added to the read string. - * - * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $parentNode Parent node that may contain nodes to remove - * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement Cleaned parent node - */ - protected function removeSuperfluousTextNodes($parentNode) - { - $tagsToRemove = [ - 'rPh', // Pronunciation of the text - 'pPr', // Paragraph Properties / Previous Paragraph Properties - 'rPr', // Run Properties for the Paragraph Mark / Previous Run Properties for the Paragraph Mark - ]; - - foreach ($tagsToRemove as $tagToRemove) { - $xpath = '//ns:' . $tagToRemove; - $parentNode->removeNodesMatchingXPath($xpath); - } - - return $parentNode; - } - - /** - * @param array $textNodes Text XML nodes ("") - * @return string The value associated with the given text node(s) - */ - protected function extractTextValueForNodes($textNodes) - { - $textValue = ''; - - foreach ($textNodes as $nodeIndex => $textNode) { - $textNodeAsString = $textNode->__toString(); - $shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode); - - $textValue .= ($shouldPreserveWhitespace) ? $textNodeAsString : trim($textNodeAsString); - } - - return $textValue; + $parentTagName = $textNode->parentNode->localName; + return ($parentTagName === self::XML_NODE_SI || $parentTagName === self::XML_NODE_R); } /** * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. * - * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $textNode The text node element () whitespace may be preserved + * @param \DOMElement $textNode The text node element () whose whitespace may be preserved * @return bool Whether whitespace should be preserved */ protected function shouldPreserveWhitespace($textNode) { - $spaceValue = $textNode->getAttribute('space', 'xml'); - return ($spaceValue === 'preserve'); + $spaceValue = $textNode->getAttribute(self::XML_ATTRIBUTE_XML_SPACE); + return ($spaceValue === self::XML_ATTRIBUTE_VALUE_PRESERVE); } /** diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index 7716e2c..2440593 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -260,6 +260,7 @@ class RowIterator implements IteratorInterface { $currentColumnIndex = $this->getColumnIndex($xmlReader); + // NOTE: expand() will automatically decode all XML entities of the child nodes $node = $xmlReader->expand(); $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node); $this->lastColumnIndexProcessed = $currentColumnIndex; diff --git a/tests/Spout/Common/Escaper/ODSTest.php b/tests/Spout/Common/Escaper/ODSTest.php index fb5c960..77b4913 100644 --- a/tests/Spout/Common/Escaper/ODSTest.php +++ b/tests/Spout/Common/Escaper/ODSTest.php @@ -16,7 +16,7 @@ class ODSTest extends \PHPUnit_Framework_TestCase { return [ ['test', 'test'], - ['carl\'s "pokemon"', 'carl's "pokemon"'], + ['carl\'s "pokemon"', 'carl\'s "pokemon"'], ["\n", "\n"], ["\r", "\r"], ["\t", "\t"], diff --git a/tests/Spout/Common/Escaper/XLSXTest.php b/tests/Spout/Common/Escaper/XLSXTest.php index 7cede8b..0f2098f 100644 --- a/tests/Spout/Common/Escaper/XLSXTest.php +++ b/tests/Spout/Common/Escaper/XLSXTest.php @@ -16,7 +16,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase { return [ ['test', 'test'], - ['adam\'s "car"', 'adam's "car"'], + ['adam\'s "car"', 'adam\'s "car"'], ["\n", "\n"], ["\r", "\r"], ["\t", "\t"], @@ -25,7 +25,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase ['_x0000_', '_x005F_x0000_'], [chr(21), '_x0015_'], ['control '.chr(21).' character', 'control _x0015_ character'], - ['control\'s '.chr(21).' "character"', 'control's _x0015_ "character"'], + ['control\'s '.chr(21).' "character"', 'control\'s _x0015_ "character"'], ]; } @@ -52,7 +52,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase { return [ ['test', 'test'], - ['adam's "car"', 'adam\'s "car"'], + ['adam's "car"', 'adam's "car"'], ["\n", "\n"], ["\r", "\r"], ["\t", "\t"], @@ -61,7 +61,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase ['_x005F_x0000_', '_x0000_'], ['_x0015_', chr(21)], ['control _x0015_ character', 'control '.chr(21).' character'], - ['control's _x0015_ "character"', 'control\'s '.chr(21).' "character"'], + ['control's _x0015_ "character"', 'control's '.chr(21).' "character"'], ]; } diff --git a/tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php b/tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php deleted file mode 100644 index bb074d7..0000000 --- a/tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php +++ /dev/null @@ -1,127 +0,0 @@ -'; - new SimpleXMLElement($invalidXML); - } - - /** - * @return array - */ - public function dataProviderForTestGetAttribute() - { - $xmlWithoutNamespace = << - -XML; - - $xmlWithHalfNamespace = << - -XML; - - $xmlWithFullNamespace = << - -XML; - - return [ - [$xmlWithoutNamespace, null, ['foo' => 'bar', 'type' => 'test']], - [$xmlWithHalfNamespace, null, ['foo' => 'bar', 'type' => null]], - [$xmlWithFullNamespace, null, ['foo' => null, 'type' => null]], - [$xmlWithoutNamespace, 'r', ['foo' => null, 'type' => null]], - [$xmlWithHalfNamespace, 'r', ['foo' => null, 'type' => 'test']], - [$xmlWithFullNamespace, 'r', ['foo' => 'bar', 'type' => 'test']], - ]; - } - - /** - * @dataProvider dataProviderForTestGetAttribute - * - * @param string $xml - * @param string|null $namespace - * @param array $expectedAttributes - * @return void - */ - public function testGetAttribute($xml, $namespace, $expectedAttributes) - { - $element = new SimpleXMLElement($xml); - - foreach ($expectedAttributes as $name => $expectedValue) { - $value = $element->getAttribute($name, $namespace); - $this->assertEquals($expectedValue, $value); - } - } - - /** - * @return void - */ - public function testXPath() - { - $xml = << - - - - 0 - 1 - - - -XML; - - $element = new SimpleXMLElement($xml); - $matchedElements = $element->xpath('//c'); - - $this->assertEquals(2, count($matchedElements)); - $this->assertTrue($matchedElements[0] instanceof SimpleXMLElement, 'The SimpleXMLElement should be wrapped'); - $this->assertEquals('A2', $matchedElements[1]->getAttribute('r')); - } - - /** - * @return void - */ - public function testRemoveNodeMatchingXPath() - { - $xml = << - - - - 0 - 1 - - - -XML; - - $element = new SimpleXMLElement($xml); - $this->assertNotNull($element->getFirstChildByTagName('sheetData')); - - $element->removeNodesMatchingXPath('//sheetData'); - $this->assertNull($element->getFirstChildByTagName('sheetData')); - } -} diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php index 31f9c71..17d1b52 100644 --- a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php @@ -98,6 +98,22 @@ class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase $sharedStringsHelper->cleanup(); } + /** + * @return void + */ + public function testGetStringAtIndexShouldNotDoubleDecodeHTMLEntities() + { + $resourcePath = $this->getResourcePath('one_sheet_with_pre_encoded_html_entities.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('quote: " - ampersand: &', $sharedString); + + $sharedStringsHelper->cleanup(); + } + /** * @return void */ diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index 51b79d4..b9e032e 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -426,42 +426,46 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRow, $allRows[0], 'Pronunciation data should be removed.'); } - /** - * @return array - */ - public function dataProviderForTestReadShouldBeProtectedAgainstAttacks() - { - return [ - ['attack_billion_laughs.xlsx'], - ['attack_quadratic_blowup.xlsx'], - ]; - } - - /** - * @dataProvider dataProviderForTestReadShouldBeProtectedAgainstAttacks * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) * - * @param string $fileName * @return void */ - public function testReadShouldBeProtectedAgainstAttacks($fileName) + public function testReadShouldBeProtectedAgainstBillionLaughsAttack() { $startTime = microtime(true); try { // using @ to prevent warnings/errors from being displayed - @$this->getAllRowsForFile($fileName); + @$this->getAllRowsForFile('attack_billion_laughs.xlsx'); $this->fail('An exception should have been thrown'); } catch (IOException $exception) { $duration = microtime(true) - $startTime; $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); - $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB + $expectedMaxMemoryUsage = 40 * 1024 * 1024; // 40MB $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); } } + /** + * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) + * + * @return void + */ + public function testReadShouldBeProtectedAgainstQuadraticBlowupAttack() + { + $startTime = microtime(true); + + $this->getAllRowsForFile('attack_quadratic_blowup.xlsx'); + + $duration = microtime(true) - $startTime; + $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); + + $expectedMaxMemoryUsage = 40 * 1024 * 1024; // 40MB + $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); + } + /** * @return void */ diff --git a/tests/Spout/Writer/ODS/WriterTest.php b/tests/Spout/Writer/ODS/WriterTest.php index 172f8bd..836e679 100644 --- a/tests/Spout/Writer/ODS/WriterTest.php +++ b/tests/Spout/Writer/ODS/WriterTest.php @@ -421,7 +421,7 @@ class WriterTest extends \PHPUnit_Framework_TestCase $this->writeToODSFile($dataRows, $fileName); - $this->assertValueWasWritten($fileName, 'I'm in "great" mood', 'Quotes should be escaped'); + $this->assertValueWasWritten($fileName, 'I\'m in "great" mood', 'Quotes should not be escaped'); $this->assertValueWasWritten($fileName, 'This <must> be escaped & tested', '<, > and & should be escaped'); } diff --git a/tests/Spout/Writer/XLSX/WriterTest.php b/tests/Spout/Writer/XLSX/WriterTest.php index 86ae1f6..c7f4f96 100644 --- a/tests/Spout/Writer/XLSX/WriterTest.php +++ b/tests/Spout/Writer/XLSX/WriterTest.php @@ -473,7 +473,7 @@ class WriterTest extends \PHPUnit_Framework_TestCase $this->writeToXLSXFile($dataRows, $fileName); - $this->assertInlineDataWasWrittenToSheet($fileName, 1, 'I'm in "great" mood', 'Quotes should be escaped'); + $this->assertInlineDataWasWrittenToSheet($fileName, 1, 'I\'m in "great" mood', 'Quotes should not be escaped'); $this->assertInlineDataWasWrittenToSheet($fileName, 1, 'This <must> be escaped & tested', '<, > and & should be escaped'); } @@ -482,14 +482,14 @@ class WriterTest extends \PHPUnit_Framework_TestCase */ public function testAddRowShouldEscapeControlCharacters() { - $fileName = 'test_add_row_should_escape_html_special_characters.xlsx'; + $fileName = 'test_add_row_should_escape_control_characters.xlsx'; $dataRows = [ - ['control\'s '.chr(21).' "character"'], + ['control '.chr(21).' character'], ]; $this->writeToXLSXFile($dataRows, $fileName); - $this->assertInlineDataWasWrittenToSheet($fileName, 1, 'control's _x0015_ "character"'); + $this->assertInlineDataWasWrittenToSheet($fileName, 1, 'control _x0015_ character'); } /** diff --git a/tests/resources/xlsx/one_sheet_with_pre_encoded_html_entities.xlsx b/tests/resources/xlsx/one_sheet_with_pre_encoded_html_entities.xlsx new file mode 100644 index 0000000..3265403 Binary files /dev/null and b/tests/resources/xlsx/one_sheet_with_pre_encoded_html_entities.xlsx differ