Add wrappers around XMLReader and SimpleXMLElement to improve error handling
This commit is contained in:
parent
be3932af18
commit
1ba10ed2b0
@ -40,7 +40,7 @@ class FileSystemHelper
|
||||
|
||||
$wasCreationSuccessful = mkdir($folderPath, 0777, true);
|
||||
if (!$wasCreationSuccessful) {
|
||||
throw new IOException('Unable to create folder: ' . $folderPath);
|
||||
throw new IOException("Unable to create folder: $folderPath");
|
||||
}
|
||||
|
||||
return $folderPath;
|
||||
@ -64,7 +64,7 @@ class FileSystemHelper
|
||||
|
||||
$wasCreationSuccessful = file_put_contents($filePath, $fileContents);
|
||||
if ($wasCreationSuccessful === false) {
|
||||
throw new IOException('Unable to create file: ' . $filePath);
|
||||
throw new IOException("Unable to create file: $filePath");
|
||||
}
|
||||
|
||||
return $filePath;
|
||||
@ -126,7 +126,7 @@ class FileSystemHelper
|
||||
{
|
||||
$isInBaseFolder = (strpos($operationFolderPath, $this->baseFolderPath) === 0);
|
||||
if (!$isInBaseFolder) {
|
||||
throw new IOException('Cannot perform I/O operation outside of the base folder: ' . $this->baseFolderPath);
|
||||
throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderPath}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -64,9 +64,9 @@ abstract class AbstractReader implements ReaderInterface
|
||||
if (!$this->isPhpStream($filePath)) {
|
||||
// we skip the checks if the provided file path points to a PHP stream
|
||||
if (!$this->globalFunctionsHelper->file_exists($filePath)) {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.');
|
||||
throw new IOException("Could not open $filePath for reading! File does not exist.");
|
||||
} else if (!$this->globalFunctionsHelper->is_readable($filePath)) {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.');
|
||||
throw new IOException("Could not open $filePath for reading! File is not readable.");
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ abstract class AbstractReader implements ReaderInterface
|
||||
$this->openReader($filePath);
|
||||
$this->isStreamOpened = true;
|
||||
} catch (\Exception $exception) {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')');
|
||||
throw new IOException("Could not open $filePath for reading! ({$exception->getMessage()})");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ class Reader extends AbstractReader
|
||||
{
|
||||
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
|
||||
if (!$this->filePointer) {
|
||||
throw new IOException('Could not open file ' . $filePath . ' for reading.');
|
||||
throw new IOException("Could not open file $filePath for reading.");
|
||||
}
|
||||
|
||||
$this->sheetIterator = new SheetIterator($this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, $this->globalFunctionsHelper);
|
||||
|
12
src/Spout/Reader/Exception/XMLProcessingException.php
Normal file
12
src/Spout/Reader/Exception/XMLProcessingException.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Exception;
|
||||
|
||||
/**
|
||||
* Class XMLProcessingException
|
||||
*
|
||||
* @package Box\Spout\Reader\Exception
|
||||
*/
|
||||
class XMLProcessingException extends ReaderException
|
||||
{
|
||||
}
|
161
src/Spout/Reader/Wrapper/SimpleXMLElement.php
Normal file
161
src/Spout/Reader/Wrapper/SimpleXMLElement.php
Normal file
@ -0,0 +1,161 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Wrapper;
|
||||
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
|
||||
|
||||
/**
|
||||
* Class SimpleXMLElement
|
||||
* Wrapper around the built-in SimpleXMLElement. This class does not extend \SimpleXMLElement
|
||||
* because it its constructor is final... Instead, it is used as a passthrough.
|
||||
* @see \SimpleXMLElement
|
||||
*
|
||||
* @package Box\Spout\Reader\Wrapper
|
||||
*/
|
||||
class SimpleXMLElement
|
||||
{
|
||||
use XMLInternalErrorsHelper;
|
||||
|
||||
/** @var \SimpleXMLElement Instance of the wrapped SimpleXMLElement object */
|
||||
protected $simpleXMLElement;
|
||||
|
||||
/**
|
||||
* Creates a new SimpleXMLElement object
|
||||
* @see \SimpleXMLElement::__construct
|
||||
*
|
||||
* @param string $xmlData A well-formed XML string
|
||||
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If the XML string is not well-formed
|
||||
*/
|
||||
public function __construct($xmlData)
|
||||
{
|
||||
$this->useXMLInternalErrors();
|
||||
|
||||
try {
|
||||
$this->simpleXMLElement = new \SimpleXMLElement($xmlData);
|
||||
} catch (\Exception $exception) {
|
||||
// if the data is invalid, the constructor will throw an Exception
|
||||
$this->resetXMLInternalErrorsSetting();
|
||||
throw new XMLProcessingException($this->getLastXMLErrorMessage());
|
||||
}
|
||||
|
||||
$this->resetXMLInternalErrorsSetting();
|
||||
|
||||
return $this->simpleXMLElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the attribute for the given name.
|
||||
*
|
||||
* @param string $name Attribute name
|
||||
* @param string|null|void $namespace An optional namespace for the retrieved attributes
|
||||
* @return string|null The attribute value or NULL if attribute not found
|
||||
*/
|
||||
public function getAttribute($name, $namespace = null)
|
||||
{
|
||||
$isPrefix = ($namespace !== null);
|
||||
$attributes = $this->simpleXMLElement->attributes($namespace, $isPrefix);
|
||||
$attributeValue = $attributes->{$name};
|
||||
|
||||
return ($attributeValue !== null) ? (string) $attributeValue : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a prefix/ns context for the next XPath query
|
||||
* @see \SimpleXMLElement::registerXPathNamespace
|
||||
*
|
||||
* @param string $prefix The namespace prefix to use in the XPath query for the namespace given in "namespace".
|
||||
* @param string $namespace The namespace to use for the XPath query. This must match a namespace in
|
||||
* use by the XML document or the XPath query using "prefix" will not return any results.
|
||||
* @return bool TRUE on success or FALSE on failure.
|
||||
*/
|
||||
public function registerXPathNamespace($prefix, $namespace)
|
||||
{
|
||||
return $this->simpleXMLElement->registerXPathNamespace($prefix, $namespace);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs XPath query on XML data
|
||||
* @see \SimpleXMLElement::xpath
|
||||
*
|
||||
* @param string $path An XPath path
|
||||
* @return SimpleXMLElement[]|bool an array of SimpleXMLElement objects or FALSE in case of an error.
|
||||
*/
|
||||
public function xpath($path)
|
||||
{
|
||||
$elements = $this->simpleXMLElement->xpath($path);
|
||||
|
||||
if ($elements !== false) {
|
||||
$wrappedElements = [];
|
||||
foreach ($elements as $element) {
|
||||
$wrappedElement = $this->wrapSimpleXMLElement($element);
|
||||
|
||||
if ($wrappedElement !== null) {
|
||||
$wrappedElements[] = $this->wrapSimpleXMLElement($element);
|
||||
}
|
||||
}
|
||||
|
||||
$elements = $wrappedElements;
|
||||
}
|
||||
|
||||
return $elements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps the given element into an instance of the wrapper
|
||||
*
|
||||
* @param \SimpleXMLElement $element Element to be wrapped
|
||||
* @return SimpleXMLElement|null The wrapped element or NULL if the given element is invalid
|
||||
*/
|
||||
protected function wrapSimpleXMLElement(\SimpleXMLElement $element)
|
||||
{
|
||||
$wrappedElement = null;
|
||||
$elementAsXML = $element->asXML();
|
||||
|
||||
if ($elementAsXML !== false) {
|
||||
$wrappedElement = new SimpleXMLElement($elementAsXML);
|
||||
}
|
||||
|
||||
return $wrappedElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all nodes matching the given XPath query.
|
||||
* It does not map to any \SimpleXMLElement function.
|
||||
*
|
||||
* @param string $path An XPath path
|
||||
* @return void
|
||||
*/
|
||||
public function removeNodesMatchingXPath($path)
|
||||
{
|
||||
$nodesToRemove = $this->simpleXMLElement->xpath($path);
|
||||
|
||||
foreach ($nodesToRemove as $nodeToRemove) {
|
||||
unset($nodeToRemove[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first child matching the given tag name
|
||||
*
|
||||
* @param string $tagName
|
||||
* @return SimpleXMLElement|null The first child matching the tag name or NULL if none found
|
||||
*/
|
||||
public function getFirstChildByTagName($tagName)
|
||||
{
|
||||
$doesElementExist = isset($this->simpleXMLElement->{$tagName});
|
||||
|
||||
/** @var \SimpleXMLElement $realElement */
|
||||
$realElement = $this->simpleXMLElement->{$tagName};
|
||||
|
||||
return $doesElementExist ? $this->wrapSimpleXMLElement($realElement) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return $this->simpleXMLElement->__toString();
|
||||
}
|
||||
}
|
82
src/Spout/Reader/Wrapper/XMLInternalErrorsHelper.php
Normal file
82
src/Spout/Reader/Wrapper/XMLInternalErrorsHelper.php
Normal file
@ -0,0 +1,82 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Wrapper;
|
||||
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
|
||||
/**
|
||||
* Trait XMLInternalErrorsHelper
|
||||
*
|
||||
* @package Box\Spout\Reader\Wrapper
|
||||
*/
|
||||
trait XMLInternalErrorsHelper
|
||||
{
|
||||
/** @var bool Stores whether XML errors were initially stored internally - used to reset */
|
||||
protected $initialUseInternalErrorsValue;
|
||||
|
||||
/**
|
||||
* To avoid displaying lots of warning/error messages on screen,
|
||||
* stores errors internally instead.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function useXMLInternalErrors()
|
||||
{
|
||||
libxml_clear_errors();
|
||||
$this->initialUseInternalErrorsValue = libxml_use_internal_errors(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws an XMLProcessingException if an error occured.
|
||||
* It also always resets the "libxml_use_internal_errors" setting back to its initial value.
|
||||
*
|
||||
* @return void
|
||||
* @throws \Box\Spout\Reader\Exception\XMLProcessingException
|
||||
*/
|
||||
protected function resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured()
|
||||
{
|
||||
if ($this->hasXMLErrorOccured()) {
|
||||
$this->resetXMLInternalErrorsSetting();
|
||||
throw new XMLProcessingException($this->getLastXMLErrorMessage());
|
||||
}
|
||||
|
||||
$this->resetXMLInternalErrorsSetting();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the a XML error has occured since the last time errors were cleared.
|
||||
*
|
||||
* @return bool TRUE if an error occured, FALSE otherwise
|
||||
*/
|
||||
private function hasXMLErrorOccured()
|
||||
{
|
||||
return (libxml_get_last_error() !== false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the error message for the last XML error that occured.
|
||||
* @see libxml_get_last_error
|
||||
*
|
||||
* @return String|null Last XML error message or null if no error
|
||||
*/
|
||||
private function getLastXMLErrorMessage()
|
||||
{
|
||||
$errorMessage = null;
|
||||
$error = libxml_get_last_error();
|
||||
|
||||
if ($error !== false) {
|
||||
$errorMessage = trim($error->message);
|
||||
}
|
||||
|
||||
return $errorMessage;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
protected function resetXMLInternalErrorsSetting()
|
||||
{
|
||||
libxml_use_internal_errors($this->initialUseInternalErrorsValue);
|
||||
}
|
||||
|
||||
}
|
123
src/Spout/Reader/Wrapper/XMLReader.php
Normal file
123
src/Spout/Reader/Wrapper/XMLReader.php
Normal file
@ -0,0 +1,123 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Wrapper;
|
||||
|
||||
|
||||
/**
|
||||
* Class XMLReader
|
||||
* Wrapper around the built-in XMLReader
|
||||
* @see \XMLReader
|
||||
*
|
||||
* @package Box\Spout\Reader\Wrapper
|
||||
*/
|
||||
class XMLReader extends \XMLReader
|
||||
{
|
||||
use XMLInternalErrorsHelper;
|
||||
|
||||
/**
|
||||
* Set the URI containing the XML to parse
|
||||
* @see \XMLReader::open
|
||||
*
|
||||
* @param string $URI URI pointing to the document
|
||||
* @return bool TRUE on success or FALSE on failure
|
||||
*/
|
||||
public function open($URI)
|
||||
{
|
||||
$wasOpenSuccessful = false;
|
||||
|
||||
// HHVM does not check if file exists within zip file
|
||||
// @link https://github.com/facebook/hhvm/issues/5779
|
||||
if ($this->isRunningHHVM() && $this->isZipStream($URI)) {
|
||||
if ($this->fileExistsWithinZip($URI)) {
|
||||
$wasOpenSuccessful = parent::open($URI, null, LIBXML_NONET);
|
||||
}
|
||||
} else {
|
||||
$wasOpenSuccessful = parent::open($URI, null, LIBXML_NONET);
|
||||
}
|
||||
|
||||
return $wasOpenSuccessful;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the given URI is a zip stream.
|
||||
*
|
||||
* @param string $URI URI pointing to a document
|
||||
* @return bool TRUE if URI is a zip stream, FALSE otherwise
|
||||
*/
|
||||
protected function isZipStream($URI)
|
||||
{
|
||||
return (strpos($URI, 'zip://') === 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the current environment is HHVM
|
||||
*
|
||||
* @return bool TRUE if running on HHVM, FALSE otherwise
|
||||
*/
|
||||
protected function isRunningHHVM()
|
||||
{
|
||||
return defined('HHVM_VERSION');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the file at the given location exists
|
||||
*
|
||||
* @param string $zipStreamURI URI of a zip stream, e.g. "zip://file.zip#path/inside.xml"
|
||||
* @return bool TRUE if the file exists, FALSE otherwise
|
||||
*/
|
||||
protected function fileExistsWithinZip($zipStreamURI)
|
||||
{
|
||||
$doesFileExists = false;
|
||||
|
||||
$pattern = '/zip:\/\/([^#]+)#(.*)/';
|
||||
if (preg_match($pattern, $zipStreamURI, $matches)) {
|
||||
$zipFilePath = $matches[1];
|
||||
$innerFilePath = $matches[2];
|
||||
|
||||
$zip = new \ZipArchive();
|
||||
if ($zip->open($zipFilePath) === true) {
|
||||
$doesFileExists = ($zip->locateName($innerFilePath) !== false);
|
||||
$zip->close();
|
||||
}
|
||||
}
|
||||
|
||||
return $doesFileExists;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move to next node in document
|
||||
* @see \XMLReader::read
|
||||
*
|
||||
* @return bool TRUE on success or FALSE on failure
|
||||
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred
|
||||
*/
|
||||
public function read()
|
||||
{
|
||||
$this->useXMLInternalErrors();
|
||||
|
||||
$wasReadSuccessful = parent::read();
|
||||
|
||||
$this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured();
|
||||
|
||||
return $wasReadSuccessful;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move cursor to next node skipping all subtrees
|
||||
* @see \XMLReader::next
|
||||
*
|
||||
* @param string|void $localName The name of the next node to move to
|
||||
* @return bool TRUE on success or FALSE on failure
|
||||
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred
|
||||
*/
|
||||
public function next($localName = null)
|
||||
{
|
||||
$this->useXMLInternalErrors();
|
||||
|
||||
$wasNextSuccessful = parent::next($localName);
|
||||
|
||||
$this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured();
|
||||
|
||||
return $wasNextSuccessful;
|
||||
}
|
||||
}
|
@ -3,6 +3,9 @@
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory;
|
||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyInterface;
|
||||
|
||||
@ -74,7 +77,7 @@ class SharedStringsHelper
|
||||
*/
|
||||
public function extractSharedStrings()
|
||||
{
|
||||
$xmlReader = new \XMLReader();
|
||||
$xmlReader = new XMLReader();
|
||||
$sharedStringIndex = 0;
|
||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
|
||||
@ -83,39 +86,44 @@ class SharedStringsHelper
|
||||
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
|
||||
}
|
||||
|
||||
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
|
||||
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
|
||||
try {
|
||||
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
|
||||
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
|
||||
|
||||
while ($xmlReader->read() && $xmlReader->name !== 'si') {
|
||||
// do nothing until a 'si' tag is reached
|
||||
}
|
||||
|
||||
while ($xmlReader->name === 'si') {
|
||||
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
||||
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
||||
|
||||
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
||||
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
||||
|
||||
// find all text nodes 't'; there can be multiple if the cell contains formatting
|
||||
$textNodes = $cleanNode->xpath('//ns:t');
|
||||
|
||||
$textValue = '';
|
||||
foreach ($textNodes as $textNode) {
|
||||
if ($this->shouldPreserveWhitespace($textNode)) {
|
||||
$textValue .= $textNode->__toString();
|
||||
} else {
|
||||
$textValue .= trim($textNode->__toString());
|
||||
}
|
||||
while ($xmlReader->read() && $xmlReader->name !== 'si') {
|
||||
// do nothing until a 'si' tag is reached
|
||||
}
|
||||
|
||||
$unescapedTextValue = $escaper->unescape($textValue);
|
||||
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
||||
while ($xmlReader->name === 'si') {
|
||||
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
||||
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
||||
|
||||
$sharedStringIndex++;
|
||||
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
||||
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
||||
|
||||
// jump to the next 'si' tag
|
||||
$xmlReader->next('si');
|
||||
// find all text nodes 't'; there can be multiple if the cell contains formatting
|
||||
$textNodes = $cleanNode->xpath('//ns:t');
|
||||
|
||||
$textValue = '';
|
||||
foreach ($textNodes as $textNode) {
|
||||
if ($this->shouldPreserveWhitespace($textNode)) {
|
||||
$textValue .= $textNode->__toString();
|
||||
} else {
|
||||
$textValue .= trim($textNode->__toString());
|
||||
}
|
||||
}
|
||||
|
||||
$unescapedTextValue = $escaper->unescape($textValue);
|
||||
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
||||
|
||||
$sharedStringIndex++;
|
||||
|
||||
// jump to the next 'si' tag
|
||||
$xmlReader->next('si');
|
||||
}
|
||||
|
||||
} catch (XMLProcessingException $exception) {
|
||||
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
|
||||
}
|
||||
|
||||
$this->cachingStrategy->closeCache();
|
||||
@ -134,33 +142,19 @@ class SharedStringsHelper
|
||||
/**
|
||||
* Returns the shared strings unique count, as specified in <sst> tag.
|
||||
*
|
||||
* @param \XMLReader $xmlReader XMLReader instance
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance
|
||||
* @return int Number of unique shared strings in the sharedStrings.xml file
|
||||
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
|
||||
*/
|
||||
protected function getSharedStringsUniqueCount($xmlReader)
|
||||
{
|
||||
// Use internal errors to avoid displaying lots of warning messages in case of invalid file
|
||||
// For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$xmlReader->next('sst');
|
||||
|
||||
// Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE)
|
||||
while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) {
|
||||
while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== XMLReader::ELEMENT) {
|
||||
$xmlReader->read();
|
||||
}
|
||||
|
||||
$readError = libxml_get_last_error();
|
||||
if ($readError !== false) {
|
||||
$readErrorMessage = trim($readError->message);
|
||||
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readErrorMessage}]");
|
||||
}
|
||||
|
||||
// reset the setting to display XML warnings/errors
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
return intval($xmlReader->getAttribute('uniqueCount'));
|
||||
}
|
||||
|
||||
@ -180,29 +174,19 @@ class SharedStringsHelper
|
||||
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
|
||||
* This is to simplify the parsing of the subtree.
|
||||
*
|
||||
* @param \XMLReader $xmlReader
|
||||
* @return \SimpleXMLElement
|
||||
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader
|
||||
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read
|
||||
*/
|
||||
protected function getSimpleXmlElementNodeFromXMLReader($xmlReader)
|
||||
{
|
||||
// Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node.
|
||||
// For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$node = null;
|
||||
try {
|
||||
$node = new \SimpleXMLElement($xmlReader->readOuterXml());
|
||||
} catch (\Exception $exception) {
|
||||
$error = libxml_get_last_error();
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].');
|
||||
$node = new SimpleXMLElement($xmlReader->readOuterXml());
|
||||
} catch (XMLProcessingException $exception) {
|
||||
throw new IOException("The sharedStrings.xml file contains unreadable data [{$exception->getMessage()}].");
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
return $node;
|
||||
}
|
||||
|
||||
@ -210,8 +194,8 @@ class SharedStringsHelper
|
||||
* Removes nodes that should not be read, like the pronunciation of the Kanji characters.
|
||||
* By keeping them, their text content would be added to the read string.
|
||||
*
|
||||
* @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove
|
||||
* @return \SimpleXMLElement Cleaned parent node
|
||||
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $parentNode Parent node that may contain nodes to remove
|
||||
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement Cleaned parent node
|
||||
*/
|
||||
protected function removeSuperfluousTextNodes($parentNode)
|
||||
{
|
||||
@ -221,12 +205,7 @@ class SharedStringsHelper
|
||||
|
||||
foreach ($tagsToRemove as $tagToRemove) {
|
||||
$xpath = '//ns:' . $tagToRemove;
|
||||
$nodesToRemove = $parentNode->xpath($xpath);
|
||||
|
||||
foreach ($nodesToRemove as $nodeToRemove) {
|
||||
// This is how to remove a node from the XML
|
||||
unset($nodeToRemove[0]);
|
||||
}
|
||||
$parentNode->removeNodesMatchingXPath($xpath);
|
||||
}
|
||||
|
||||
return $parentNode;
|
||||
@ -235,24 +214,13 @@ class SharedStringsHelper
|
||||
/**
|
||||
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
||||
*
|
||||
* @param \SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved
|
||||
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved
|
||||
* @return bool Whether whitespace should be preserved
|
||||
*/
|
||||
protected function shouldPreserveWhitespace($textNode)
|
||||
{
|
||||
$shouldPreserveWhitespace = false;
|
||||
|
||||
$attributes = $textNode->attributes('xml', true);
|
||||
if ($attributes) {
|
||||
foreach ($attributes as $attributeName => $attributeValue) {
|
||||
if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') {
|
||||
$shouldPreserveWhitespace = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $shouldPreserveWhitespace;
|
||||
$spaceValue = $textNode->getAttribute('space', 'xml');
|
||||
return ($spaceValue === 'preserve');
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
namespace Box\Spout\Reader\XLSX\Helper;
|
||||
|
||||
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
|
||||
use Box\Spout\Reader\XLSX\Sheet;
|
||||
|
||||
/**
|
||||
@ -37,10 +38,10 @@ class SheetHelper
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */
|
||||
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml.rels file */
|
||||
protected $workbookXMLRelsAsXMLElement;
|
||||
|
||||
/** @var \SimpleXMLElement XML element representing the workbook.xml file */
|
||||
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml file */
|
||||
protected $workbookXMLAsXMLElement;
|
||||
|
||||
/**
|
||||
@ -76,7 +77,7 @@ class SheetHelper
|
||||
|
||||
for ($i = 0; $i < $numSheetNodes; $i++) {
|
||||
$sheetNode = $sheetNodes[$i];
|
||||
$sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName;
|
||||
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
|
||||
|
||||
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i);
|
||||
}
|
||||
@ -115,15 +116,15 @@ class SheetHelper
|
||||
|
||||
if (count($relationshipNodes) === 1) {
|
||||
$relationshipNode = $relationshipNodes[0];
|
||||
$sheetId = (string) $relationshipNode->attributes()->Id;
|
||||
$sheetId = $relationshipNode->getAttribute('Id');
|
||||
|
||||
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
||||
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
|
||||
|
||||
if (count($sheetNodes) === 1) {
|
||||
$sheetNode = $sheetNodes[0];
|
||||
$sheetId = (int) $sheetNode->attributes()->sheetId;
|
||||
$escapedSheetName = (string) $sheetNode->attributes()->name;
|
||||
$sheetId = (int) $sheetNode->getAttribute('sheetId');
|
||||
$escapedSheetName = $sheetNode->getAttribute('name');
|
||||
|
||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
$sheetName = $escaper->unescape($escapedSheetName);
|
||||
@ -149,7 +150,7 @@ class SheetHelper
|
||||
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
||||
* The returned value is cached.
|
||||
*
|
||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
*/
|
||||
protected function getWorkbookXMLRelsAsXMLElement()
|
||||
{
|
||||
@ -167,7 +168,7 @@ class SheetHelper
|
||||
* Returns a representation of the workbook.xml file, ready to be parsed.
|
||||
* The returned value is cached.
|
||||
*
|
||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
*/
|
||||
protected function getWorkbookXMLAsXMLElement()
|
||||
{
|
||||
@ -186,13 +187,13 @@ class SheetHelper
|
||||
*
|
||||
* @param string $xmlFilePath The path of the XML file inside the XLSX file
|
||||
* @param string $mainNamespace The main XPath namespace to register
|
||||
* @return \SimpleXMLElement The XML element representing the file
|
||||
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement The XML element representing the file
|
||||
*/
|
||||
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
|
||||
{
|
||||
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
|
||||
|
||||
$xmlElement = new \SimpleXMLElement($xmlContents);
|
||||
$xmlElement = new SimpleXMLElement($xmlContents);
|
||||
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
|
||||
|
||||
return $xmlElement;
|
||||
|
@ -61,7 +61,7 @@ class Reader extends AbstractReader
|
||||
|
||||
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper);
|
||||
} else {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading.');
|
||||
throw new IOException("Could not open $filePath for reading.");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,9 @@
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\CellHelper;
|
||||
|
||||
/**
|
||||
@ -45,7 +47,7 @@ class RowIterator implements IteratorInterface
|
||||
/** @var Helper\SharedStringsHelper Helper to work with shared strings */
|
||||
protected $sharedStringsHelper;
|
||||
|
||||
/** @var \XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
|
||||
protected $xmlReader;
|
||||
|
||||
/** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
|
||||
@ -74,7 +76,7 @@ class RowIterator implements IteratorInterface
|
||||
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
|
||||
$this->sharedStringsHelper = $sharedStringsHelper;
|
||||
|
||||
$this->xmlReader = new \XMLReader();
|
||||
$this->xmlReader = new XMLReader();
|
||||
$this->escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
}
|
||||
|
||||
@ -102,8 +104,8 @@ class RowIterator implements IteratorInterface
|
||||
$this->xmlReader->close();
|
||||
|
||||
$sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
|
||||
if ($this->xmlReader->open($sheetDataFilePath, null, LIBXML_NONET) === false) {
|
||||
throw new IOException('Could not open "' . $this->sheetDataXMLFilePath . '".');
|
||||
if ($this->xmlReader->open($sheetDataFilePath) === false) {
|
||||
throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
|
||||
}
|
||||
|
||||
$this->numReadRows = 0;
|
||||
@ -138,59 +140,52 @@ class RowIterator implements IteratorInterface
|
||||
$isInsideRowTag = false;
|
||||
$rowData = [];
|
||||
|
||||
// Use internal errors to avoid displaying lots of warning messages in case of invalid file
|
||||
// For instance on HHVM, XMLReader->open() won't fail when trying to read a unexisting file within a zip...
|
||||
// But the XMLReader->read() will fail!
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors(true);
|
||||
try {
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->nodeType == XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) {
|
||||
// Read dimensions of the sheet
|
||||
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
||||
$lastCellIndex = $matches[1];
|
||||
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
||||
}
|
||||
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) {
|
||||
// Read dimensions of the sheet
|
||||
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
||||
$lastCellIndex = $matches[1];
|
||||
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
||||
} else if ($this->xmlReader->nodeType == XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
||||
// Start of the row description
|
||||
$isInsideRowTag = true;
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numColumns;
|
||||
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
||||
if ($spans) {
|
||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||
}
|
||||
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
|
||||
} else if ($isInsideRowTag && $this->xmlReader->nodeType == XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) {
|
||||
// Start of a cell description
|
||||
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
|
||||
|
||||
$node = $this->xmlReader->expand();
|
||||
$rowData[$currentColumnIndex] = $this->getCellValue($node);
|
||||
|
||||
} else if ($this->xmlReader->nodeType == XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
||||
// End of the row description
|
||||
// If needed, we fill the empty cells
|
||||
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||
$this->numReadRows++;
|
||||
break;
|
||||
|
||||
} else if ($this->xmlReader->nodeType == XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) {
|
||||
// The closing "</worksheet>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
|
||||
} else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
||||
// Start of the row description
|
||||
$isInsideRowTag = true;
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numColumns;
|
||||
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
||||
if ($spans) {
|
||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||
}
|
||||
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
|
||||
} else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) {
|
||||
// Start of a cell description
|
||||
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
|
||||
|
||||
$node = $this->xmlReader->expand();
|
||||
$rowData[$currentColumnIndex] = $this->getCellValue($node);
|
||||
|
||||
} else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
||||
// End of the row description
|
||||
// If needed, we fill the empty cells
|
||||
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||
$this->numReadRows++;
|
||||
break;
|
||||
|
||||
} else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) {
|
||||
// The closing "</worksheet>" marks the end of the file
|
||||
$this->hasReachedEndOfFile = true;
|
||||
}
|
||||
}
|
||||
|
||||
$readError = libxml_get_last_error();
|
||||
if ($readError !== false) {
|
||||
$readErrorMessage = trim($readError->message);
|
||||
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$readErrorMessage}]");
|
||||
} catch (XMLProcessingException $exception) {
|
||||
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
|
||||
}
|
||||
|
||||
$this->rowDataBuffer = $rowData;
|
||||
|
@ -156,7 +156,7 @@ EOD;
|
||||
|
||||
$wasWriteSuccessful = fwrite($this->sheetFilePointer, $data);
|
||||
if ($wasWriteSuccessful === false) {
|
||||
throw new IOException('Unable to write data in ' . $this->worksheetFilePath);
|
||||
throw new IOException("Unable to write data in {$this->worksheetFilePath}");
|
||||
}
|
||||
|
||||
// only update the count if the write worked
|
||||
|
127
tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php
Normal file
127
tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php
Normal file
@ -0,0 +1,127 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Wrapper;
|
||||
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
* Class SimpleXMLElementTest
|
||||
*
|
||||
* @package Box\Spout\Reader\Wrapper
|
||||
*/
|
||||
class SimpleXMLElementTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\XMLProcessingException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testConstructShouldThrowExceptionIfInvalidData()
|
||||
{
|
||||
$invalidXML = '<invalid><xml></invalid>';
|
||||
new SimpleXMLElement($invalidXML);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestGetAttribute()
|
||||
{
|
||||
$xmlWithoutNamespace = <<<XML
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<worksheet foo="bar" type="test" />
|
||||
XML;
|
||||
|
||||
$xmlWithHalfNamespace = <<<XML
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<worksheet
|
||||
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
foo="bar" r:type="test" />
|
||||
XML;
|
||||
|
||||
$xmlWithFullNamespace = <<<XML
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<worksheet
|
||||
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
r:foo="bar" r:type="test" />
|
||||
XML;
|
||||
|
||||
return [
|
||||
[$xmlWithoutNamespace, null, ['foo' => 'bar', 'type' => 'test']],
|
||||
[$xmlWithHalfNamespace, null, ['foo' => 'bar', 'type' => null]],
|
||||
[$xmlWithFullNamespace, null, ['foo' => null, 'type' => null]],
|
||||
[$xmlWithoutNamespace, 'r', ['foo' => null, 'type' => null]],
|
||||
[$xmlWithHalfNamespace, 'r', ['foo' => null, 'type' => 'test']],
|
||||
[$xmlWithFullNamespace, 'r', ['foo' => 'bar', 'type' => 'test']],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestGetAttribute
|
||||
*
|
||||
* @param string $xml
|
||||
* @param string|null $namespace
|
||||
* @param array $expectedAttributes
|
||||
* @return void
|
||||
*/
|
||||
public function testGetAttribute($xml, $namespace, $expectedAttributes)
|
||||
{
|
||||
$element = new SimpleXMLElement($xml);
|
||||
|
||||
foreach ($expectedAttributes as $name => $expectedValue) {
|
||||
$value = $element->getAttribute($name, $namespace);
|
||||
$this->assertEquals($expectedValue, $value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testXPath()
|
||||
{
|
||||
$xml = <<<XML
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<worksheet>
|
||||
<sheetData>
|
||||
<row r="1">
|
||||
<c r="A1"><v>0</v></c>
|
||||
<c r="A2"><v>1</v></c>
|
||||
</row>
|
||||
</sheetData>
|
||||
</worksheet>
|
||||
XML;
|
||||
|
||||
$element = new SimpleXMLElement($xml);
|
||||
$matchedElements = $element->xpath('//c');
|
||||
|
||||
$this->assertEquals(2, count($matchedElements));
|
||||
$this->assertTrue($matchedElements[0] instanceof SimpleXMLElement, 'The SimpleXMLElement should be wrapped');
|
||||
$this->assertEquals('A2', $matchedElements[1]->getAttribute('r'));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testRemoveNodeMatchingXPath()
|
||||
{
|
||||
$xml = <<<XML
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<worksheet>
|
||||
<sheetData>
|
||||
<row r="1">
|
||||
<c r="A1"><v>0</v></c>
|
||||
<c r="A2"><v>1</v></c>
|
||||
</row>
|
||||
</sheetData>
|
||||
</worksheet>
|
||||
XML;
|
||||
|
||||
$element = new SimpleXMLElement($xml);
|
||||
$this->assertNotNull($element->getFirstChildByTagName('sheetData'));
|
||||
|
||||
$element->removeNodesMatchingXPath('//sheetData');
|
||||
$this->assertNull($element->getFirstChildByTagName('sheetData'));
|
||||
}
|
||||
}
|
166
tests/Spout/Reader/Wrapper/XMLReaderTest.php
Normal file
166
tests/Spout/Reader/Wrapper/XMLReaderTest.php
Normal file
@ -0,0 +1,166 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Wrapper;
|
||||
|
||||
use Box\Spout\TestUsingResource;
|
||||
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||
|
||||
/**
|
||||
* Class XMLReaderTest
|
||||
*
|
||||
* @package Box\Spout\Reader\Wrapper
|
||||
*/
|
||||
class XMLReaderTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testOpenShouldFailIfFileInsideZipDoesNotExist()
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
|
||||
$nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml';
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
|
||||
// using "@" to prevent errors/warning to be displayed
|
||||
$wasOpenSuccessful = @$xmlReader->open($nonExistingXMLFilePath);
|
||||
|
||||
$this->assertTrue($wasOpenSuccessful === false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing a HHVM bug: https://github.com/facebook/hhvm/issues/5779
|
||||
* The associated code in XMLReader::open() can be removed when the issue is fixed (and this test starts failing).
|
||||
* @see XMLReader::open()
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testHHVMStillDoesNotComplainWhenCallingOpenWithFileInsideZipNotExisting()
|
||||
{
|
||||
// Test should only be run on HHVM
|
||||
if ($this->isRunningHHVM()) {
|
||||
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
|
||||
$nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml';
|
||||
|
||||
libxml_clear_errors();
|
||||
$initialUseInternalErrorsSetting = libxml_use_internal_errors(true);
|
||||
|
||||
// using the built-in XMLReader
|
||||
$xmlReader = new \XMLReader();
|
||||
$this->assertTrue($xmlReader->open($nonExistingXMLFilePath) !== false);
|
||||
$this->assertTrue(libxml_get_last_error() === false);
|
||||
|
||||
libxml_use_internal_errors($initialUseInternalErrorsSetting);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool TRUE if running on HHVM, FALSE otherwise
|
||||
*/
|
||||
private function isRunningHHVM()
|
||||
{
|
||||
return defined('HHVM_VERSION');
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\XMLProcessingException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldThrowExceptionOnError()
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('one_sheet_with_invalid_xml_characters.xlsx');
|
||||
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/worksheets/sheet1.xml';
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
if ($xmlReader->open($sheetDataXMLFilePath) === false) {
|
||||
$this->fail();
|
||||
}
|
||||
|
||||
// using "@" to prevent errors/warning to be displayed
|
||||
while (@$xmlReader->read()) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\XMLProcessingException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testNextShouldThrowExceptionOnError()
|
||||
{
|
||||
// The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains
|
||||
// a doctype element that causes read errors
|
||||
$resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx');
|
||||
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/sharedStrings.xml';
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
if ($xmlReader->open($sheetDataXMLFilePath) !== false) {
|
||||
@$xmlReader->next('sst');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestIsZipStream()
|
||||
{
|
||||
return [
|
||||
['/absolute/path/to/file.xlsx', false],
|
||||
['relative/path/to/file.xlsx', false],
|
||||
['php://temp', false],
|
||||
['zip:///absolute/path/to/file.xlsx', true],
|
||||
['zip://relative/path/to/file.xlsx', true],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestIsZipStream
|
||||
*
|
||||
* @param string $URI
|
||||
* @param bool $expectedResult
|
||||
* @return void
|
||||
*/
|
||||
public function testIsZipStream($URI, $expectedResult)
|
||||
{
|
||||
$xmlReader = new XMLReader();
|
||||
$isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'isZipStream', $URI);
|
||||
|
||||
$this->assertEquals($expectedResult, $isZipStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestFileExistsWithinZip()
|
||||
{
|
||||
return [
|
||||
['[Content_Types].xml', true],
|
||||
['xl/sharedStrings.xml', true],
|
||||
['xl/worksheets/sheet1.xml', true],
|
||||
['/invalid/file.xml', false],
|
||||
['another/invalid/file.xml', false],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestFileExistsWithinZip
|
||||
*
|
||||
* @param string $innerFilePath
|
||||
* @param bool $expectedResult
|
||||
* @return void
|
||||
*/
|
||||
public function testFileExistsWithinZip($innerFilePath, $expectedResult)
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
|
||||
$zipStreamURI = 'zip://' . $resourcePath . '#' . $innerFilePath;
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
$isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'fileExistsWithinZip', $zipStreamURI);
|
||||
|
||||
$this->assertEquals($expectedResult, $isZipStream);
|
||||
}
|
||||
}
|
@ -6,7 +6,6 @@ use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Reader\ReaderFactory;
|
||||
use Box\Spout\TestUsingResource;
|
||||
use Symfony\Component\Config\Definition\Exception\Exception;
|
||||
|
||||
/**
|
||||
* Class ReaderTest
|
||||
|
BIN
tests/resources/xlsx/one_sheet_with_invalid_xml_characters.xlsx
Normal file
BIN
tests/resources/xlsx/one_sheet_with_invalid_xml_characters.xlsx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user