Merge pull request #75 from box/xml_reader_wrappers
Add wrappers around XMLReader and SimpleXMLElement
This commit is contained in:
commit
03d1917080
@ -40,7 +40,7 @@ class FileSystemHelper
|
|||||||
|
|
||||||
$wasCreationSuccessful = mkdir($folderPath, 0777, true);
|
$wasCreationSuccessful = mkdir($folderPath, 0777, true);
|
||||||
if (!$wasCreationSuccessful) {
|
if (!$wasCreationSuccessful) {
|
||||||
throw new IOException('Unable to create folder: ' . $folderPath);
|
throw new IOException("Unable to create folder: $folderPath");
|
||||||
}
|
}
|
||||||
|
|
||||||
return $folderPath;
|
return $folderPath;
|
||||||
@ -64,7 +64,7 @@ class FileSystemHelper
|
|||||||
|
|
||||||
$wasCreationSuccessful = file_put_contents($filePath, $fileContents);
|
$wasCreationSuccessful = file_put_contents($filePath, $fileContents);
|
||||||
if ($wasCreationSuccessful === false) {
|
if ($wasCreationSuccessful === false) {
|
||||||
throw new IOException('Unable to create file: ' . $filePath);
|
throw new IOException("Unable to create file: $filePath");
|
||||||
}
|
}
|
||||||
|
|
||||||
return $filePath;
|
return $filePath;
|
||||||
@ -126,7 +126,7 @@ class FileSystemHelper
|
|||||||
{
|
{
|
||||||
$isInBaseFolder = (strpos($operationFolderPath, $this->baseFolderPath) === 0);
|
$isInBaseFolder = (strpos($operationFolderPath, $this->baseFolderPath) === 0);
|
||||||
if (!$isInBaseFolder) {
|
if (!$isInBaseFolder) {
|
||||||
throw new IOException('Cannot perform I/O operation outside of the base folder: ' . $this->baseFolderPath);
|
throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderPath}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -64,9 +64,9 @@ abstract class AbstractReader implements ReaderInterface
|
|||||||
if (!$this->isPhpStream($filePath)) {
|
if (!$this->isPhpStream($filePath)) {
|
||||||
// we skip the checks if the provided file path points to a PHP stream
|
// we skip the checks if the provided file path points to a PHP stream
|
||||||
if (!$this->globalFunctionsHelper->file_exists($filePath)) {
|
if (!$this->globalFunctionsHelper->file_exists($filePath)) {
|
||||||
throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.');
|
throw new IOException("Could not open $filePath for reading! File does not exist.");
|
||||||
} else if (!$this->globalFunctionsHelper->is_readable($filePath)) {
|
} else if (!$this->globalFunctionsHelper->is_readable($filePath)) {
|
||||||
throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.');
|
throw new IOException("Could not open $filePath for reading! File is not readable.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ abstract class AbstractReader implements ReaderInterface
|
|||||||
$this->openReader($filePath);
|
$this->openReader($filePath);
|
||||||
$this->isStreamOpened = true;
|
$this->isStreamOpened = true;
|
||||||
} catch (\Exception $exception) {
|
} catch (\Exception $exception) {
|
||||||
throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')');
|
throw new IOException("Could not open $filePath for reading! ({$exception->getMessage()})");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ class Reader extends AbstractReader
|
|||||||
{
|
{
|
||||||
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
|
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
|
||||||
if (!$this->filePointer) {
|
if (!$this->filePointer) {
|
||||||
throw new IOException('Could not open file ' . $filePath . ' for reading.');
|
throw new IOException("Could not open file $filePath for reading.");
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->sheetIterator = new SheetIterator($this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, $this->globalFunctionsHelper);
|
$this->sheetIterator = new SheetIterator($this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, $this->globalFunctionsHelper);
|
||||||
|
12
src/Spout/Reader/Exception/XMLProcessingException.php
Normal file
12
src/Spout/Reader/Exception/XMLProcessingException.php
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Box\Spout\Reader\Exception;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class XMLProcessingException
|
||||||
|
*
|
||||||
|
* @package Box\Spout\Reader\Exception
|
||||||
|
*/
|
||||||
|
class XMLProcessingException extends ReaderException
|
||||||
|
{
|
||||||
|
}
|
161
src/Spout/Reader/Wrapper/SimpleXMLElement.php
Normal file
161
src/Spout/Reader/Wrapper/SimpleXMLElement.php
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Box\Spout\Reader\Wrapper;
|
||||||
|
|
||||||
|
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class SimpleXMLElement
|
||||||
|
* Wrapper around the built-in SimpleXMLElement. This class does not extend \SimpleXMLElement
|
||||||
|
* because it its constructor is final... Instead, it is used as a passthrough.
|
||||||
|
* @see \SimpleXMLElement
|
||||||
|
*
|
||||||
|
* @package Box\Spout\Reader\Wrapper
|
||||||
|
*/
|
||||||
|
class SimpleXMLElement
|
||||||
|
{
|
||||||
|
use XMLInternalErrorsHelper;
|
||||||
|
|
||||||
|
/** @var \SimpleXMLElement Instance of the wrapped SimpleXMLElement object */
|
||||||
|
protected $simpleXMLElement;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new SimpleXMLElement object
|
||||||
|
* @see \SimpleXMLElement::__construct
|
||||||
|
*
|
||||||
|
* @param string $xmlData A well-formed XML string
|
||||||
|
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If the XML string is not well-formed
|
||||||
|
*/
|
||||||
|
public function __construct($xmlData)
|
||||||
|
{
|
||||||
|
$this->useXMLInternalErrors();
|
||||||
|
|
||||||
|
try {
|
||||||
|
$this->simpleXMLElement = new \SimpleXMLElement($xmlData);
|
||||||
|
} catch (\Exception $exception) {
|
||||||
|
// if the data is invalid, the constructor will throw an Exception
|
||||||
|
$this->resetXMLInternalErrorsSetting();
|
||||||
|
throw new XMLProcessingException($this->getLastXMLErrorMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->resetXMLInternalErrorsSetting();
|
||||||
|
|
||||||
|
return $this->simpleXMLElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the attribute for the given name.
|
||||||
|
*
|
||||||
|
* @param string $name Attribute name
|
||||||
|
* @param string|null|void $namespace An optional namespace for the retrieved attributes
|
||||||
|
* @return string|null The attribute value or NULL if attribute not found
|
||||||
|
*/
|
||||||
|
public function getAttribute($name, $namespace = null)
|
||||||
|
{
|
||||||
|
$isPrefix = ($namespace !== null);
|
||||||
|
$attributes = $this->simpleXMLElement->attributes($namespace, $isPrefix);
|
||||||
|
$attributeValue = $attributes->{$name};
|
||||||
|
|
||||||
|
return ($attributeValue !== null) ? (string) $attributeValue : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a prefix/ns context for the next XPath query
|
||||||
|
* @see \SimpleXMLElement::registerXPathNamespace
|
||||||
|
*
|
||||||
|
* @param string $prefix The namespace prefix to use in the XPath query for the namespace given in "namespace".
|
||||||
|
* @param string $namespace The namespace to use for the XPath query. This must match a namespace in
|
||||||
|
* use by the XML document or the XPath query using "prefix" will not return any results.
|
||||||
|
* @return bool TRUE on success or FALSE on failure.
|
||||||
|
*/
|
||||||
|
public function registerXPathNamespace($prefix, $namespace)
|
||||||
|
{
|
||||||
|
return $this->simpleXMLElement->registerXPathNamespace($prefix, $namespace);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs XPath query on XML data
|
||||||
|
* @see \SimpleXMLElement::xpath
|
||||||
|
*
|
||||||
|
* @param string $path An XPath path
|
||||||
|
* @return SimpleXMLElement[]|bool an array of SimpleXMLElement objects or FALSE in case of an error.
|
||||||
|
*/
|
||||||
|
public function xpath($path)
|
||||||
|
{
|
||||||
|
$elements = $this->simpleXMLElement->xpath($path);
|
||||||
|
|
||||||
|
if ($elements !== false) {
|
||||||
|
$wrappedElements = [];
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
$wrappedElement = $this->wrapSimpleXMLElement($element);
|
||||||
|
|
||||||
|
if ($wrappedElement !== null) {
|
||||||
|
$wrappedElements[] = $this->wrapSimpleXMLElement($element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$elements = $wrappedElements;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps the given element into an instance of the wrapper
|
||||||
|
*
|
||||||
|
* @param \SimpleXMLElement $element Element to be wrapped
|
||||||
|
* @return SimpleXMLElement|null The wrapped element or NULL if the given element is invalid
|
||||||
|
*/
|
||||||
|
protected function wrapSimpleXMLElement(\SimpleXMLElement $element)
|
||||||
|
{
|
||||||
|
$wrappedElement = null;
|
||||||
|
$elementAsXML = $element->asXML();
|
||||||
|
|
||||||
|
if ($elementAsXML !== false) {
|
||||||
|
$wrappedElement = new SimpleXMLElement($elementAsXML);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $wrappedElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove all nodes matching the given XPath query.
|
||||||
|
* It does not map to any \SimpleXMLElement function.
|
||||||
|
*
|
||||||
|
* @param string $path An XPath path
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function removeNodesMatchingXPath($path)
|
||||||
|
{
|
||||||
|
$nodesToRemove = $this->simpleXMLElement->xpath($path);
|
||||||
|
|
||||||
|
foreach ($nodesToRemove as $nodeToRemove) {
|
||||||
|
unset($nodeToRemove[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first child matching the given tag name
|
||||||
|
*
|
||||||
|
* @param string $tagName
|
||||||
|
* @return SimpleXMLElement|null The first child matching the tag name or NULL if none found
|
||||||
|
*/
|
||||||
|
public function getFirstChildByTagName($tagName)
|
||||||
|
{
|
||||||
|
$doesElementExist = isset($this->simpleXMLElement->{$tagName});
|
||||||
|
|
||||||
|
/** @var \SimpleXMLElement $realElement */
|
||||||
|
$realElement = $this->simpleXMLElement->{$tagName};
|
||||||
|
|
||||||
|
return $doesElementExist ? $this->wrapSimpleXMLElement($realElement) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public function __toString()
|
||||||
|
{
|
||||||
|
return $this->simpleXMLElement->__toString();
|
||||||
|
}
|
||||||
|
}
|
82
src/Spout/Reader/Wrapper/XMLInternalErrorsHelper.php
Normal file
82
src/Spout/Reader/Wrapper/XMLInternalErrorsHelper.php
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Box\Spout\Reader\Wrapper;
|
||||||
|
|
||||||
|
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trait XMLInternalErrorsHelper
|
||||||
|
*
|
||||||
|
* @package Box\Spout\Reader\Wrapper
|
||||||
|
*/
|
||||||
|
trait XMLInternalErrorsHelper
|
||||||
|
{
|
||||||
|
/** @var bool Stores whether XML errors were initially stored internally - used to reset */
|
||||||
|
protected $initialUseInternalErrorsValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To avoid displaying lots of warning/error messages on screen,
|
||||||
|
* stores errors internally instead.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
protected function useXMLInternalErrors()
|
||||||
|
{
|
||||||
|
libxml_clear_errors();
|
||||||
|
$this->initialUseInternalErrorsValue = libxml_use_internal_errors(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throws an XMLProcessingException if an error occured.
|
||||||
|
* It also always resets the "libxml_use_internal_errors" setting back to its initial value.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
* @throws \Box\Spout\Reader\Exception\XMLProcessingException
|
||||||
|
*/
|
||||||
|
protected function resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured()
|
||||||
|
{
|
||||||
|
if ($this->hasXMLErrorOccured()) {
|
||||||
|
$this->resetXMLInternalErrorsSetting();
|
||||||
|
throw new XMLProcessingException($this->getLastXMLErrorMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->resetXMLInternalErrorsSetting();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the a XML error has occured since the last time errors were cleared.
|
||||||
|
*
|
||||||
|
* @return bool TRUE if an error occured, FALSE otherwise
|
||||||
|
*/
|
||||||
|
private function hasXMLErrorOccured()
|
||||||
|
{
|
||||||
|
return (libxml_get_last_error() !== false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the error message for the last XML error that occured.
|
||||||
|
* @see libxml_get_last_error
|
||||||
|
*
|
||||||
|
* @return String|null Last XML error message or null if no error
|
||||||
|
*/
|
||||||
|
private function getLastXMLErrorMessage()
|
||||||
|
{
|
||||||
|
$errorMessage = null;
|
||||||
|
$error = libxml_get_last_error();
|
||||||
|
|
||||||
|
if ($error !== false) {
|
||||||
|
$errorMessage = trim($error->message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $errorMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
protected function resetXMLInternalErrorsSetting()
|
||||||
|
{
|
||||||
|
libxml_use_internal_errors($this->initialUseInternalErrorsValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
123
src/Spout/Reader/Wrapper/XMLReader.php
Normal file
123
src/Spout/Reader/Wrapper/XMLReader.php
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Box\Spout\Reader\Wrapper;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class XMLReader
|
||||||
|
* Wrapper around the built-in XMLReader
|
||||||
|
* @see \XMLReader
|
||||||
|
*
|
||||||
|
* @package Box\Spout\Reader\Wrapper
|
||||||
|
*/
|
||||||
|
class XMLReader extends \XMLReader
|
||||||
|
{
|
||||||
|
use XMLInternalErrorsHelper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the URI containing the XML to parse
|
||||||
|
* @see \XMLReader::open
|
||||||
|
*
|
||||||
|
* @param string $URI URI pointing to the document
|
||||||
|
* @return bool TRUE on success or FALSE on failure
|
||||||
|
*/
|
||||||
|
public function open($URI)
|
||||||
|
{
|
||||||
|
$wasOpenSuccessful = false;
|
||||||
|
|
||||||
|
// HHVM does not check if file exists within zip file
|
||||||
|
// @link https://github.com/facebook/hhvm/issues/5779
|
||||||
|
if ($this->isRunningHHVM() && $this->isZipStream($URI)) {
|
||||||
|
if ($this->fileExistsWithinZip($URI)) {
|
||||||
|
$wasOpenSuccessful = parent::open($URI, null, LIBXML_NONET);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$wasOpenSuccessful = parent::open($URI, null, LIBXML_NONET);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $wasOpenSuccessful;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the given URI is a zip stream.
|
||||||
|
*
|
||||||
|
* @param string $URI URI pointing to a document
|
||||||
|
* @return bool TRUE if URI is a zip stream, FALSE otherwise
|
||||||
|
*/
|
||||||
|
protected function isZipStream($URI)
|
||||||
|
{
|
||||||
|
return (strpos($URI, 'zip://') === 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the current environment is HHVM
|
||||||
|
*
|
||||||
|
* @return bool TRUE if running on HHVM, FALSE otherwise
|
||||||
|
*/
|
||||||
|
protected function isRunningHHVM()
|
||||||
|
{
|
||||||
|
return defined('HHVM_VERSION');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the file at the given location exists
|
||||||
|
*
|
||||||
|
* @param string $zipStreamURI URI of a zip stream, e.g. "zip://file.zip#path/inside.xml"
|
||||||
|
* @return bool TRUE if the file exists, FALSE otherwise
|
||||||
|
*/
|
||||||
|
protected function fileExistsWithinZip($zipStreamURI)
|
||||||
|
{
|
||||||
|
$doesFileExists = false;
|
||||||
|
|
||||||
|
$pattern = '/zip:\/\/([^#]+)#(.*)/';
|
||||||
|
if (preg_match($pattern, $zipStreamURI, $matches)) {
|
||||||
|
$zipFilePath = $matches[1];
|
||||||
|
$innerFilePath = $matches[2];
|
||||||
|
|
||||||
|
$zip = new \ZipArchive();
|
||||||
|
if ($zip->open($zipFilePath) === true) {
|
||||||
|
$doesFileExists = ($zip->locateName($innerFilePath) !== false);
|
||||||
|
$zip->close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $doesFileExists;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move to next node in document
|
||||||
|
* @see \XMLReader::read
|
||||||
|
*
|
||||||
|
* @return bool TRUE on success or FALSE on failure
|
||||||
|
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred
|
||||||
|
*/
|
||||||
|
public function read()
|
||||||
|
{
|
||||||
|
$this->useXMLInternalErrors();
|
||||||
|
|
||||||
|
$wasReadSuccessful = parent::read();
|
||||||
|
|
||||||
|
$this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured();
|
||||||
|
|
||||||
|
return $wasReadSuccessful;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move cursor to next node skipping all subtrees
|
||||||
|
* @see \XMLReader::next
|
||||||
|
*
|
||||||
|
* @param string|void $localName The name of the next node to move to
|
||||||
|
* @return bool TRUE on success or FALSE on failure
|
||||||
|
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred
|
||||||
|
*/
|
||||||
|
public function next($localName = null)
|
||||||
|
{
|
||||||
|
$this->useXMLInternalErrors();
|
||||||
|
|
||||||
|
$wasNextSuccessful = parent::next($localName);
|
||||||
|
|
||||||
|
$this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured();
|
||||||
|
|
||||||
|
return $wasNextSuccessful;
|
||||||
|
}
|
||||||
|
}
|
@ -3,6 +3,9 @@
|
|||||||
namespace Box\Spout\Reader\XLSX\Helper;
|
namespace Box\Spout\Reader\XLSX\Helper;
|
||||||
|
|
||||||
use Box\Spout\Common\Exception\IOException;
|
use Box\Spout\Common\Exception\IOException;
|
||||||
|
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||||
|
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
|
||||||
|
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory;
|
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory;
|
||||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyInterface;
|
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyInterface;
|
||||||
|
|
||||||
@ -74,7 +77,7 @@ class SharedStringsHelper
|
|||||||
*/
|
*/
|
||||||
public function extractSharedStrings()
|
public function extractSharedStrings()
|
||||||
{
|
{
|
||||||
$xmlReader = new \XMLReader();
|
$xmlReader = new XMLReader();
|
||||||
$sharedStringIndex = 0;
|
$sharedStringIndex = 0;
|
||||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||||
|
|
||||||
@ -83,39 +86,44 @@ class SharedStringsHelper
|
|||||||
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
|
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
|
||||||
}
|
}
|
||||||
|
|
||||||
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
|
try {
|
||||||
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
|
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
|
||||||
|
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
|
||||||
|
|
||||||
while ($xmlReader->read() && $xmlReader->name !== 'si') {
|
while ($xmlReader->read() && $xmlReader->name !== 'si') {
|
||||||
// do nothing until a 'si' tag is reached
|
// do nothing until a 'si' tag is reached
|
||||||
}
|
|
||||||
|
|
||||||
while ($xmlReader->name === 'si') {
|
|
||||||
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
|
||||||
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
|
||||||
|
|
||||||
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
|
||||||
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
|
||||||
|
|
||||||
// find all text nodes 't'; there can be multiple if the cell contains formatting
|
|
||||||
$textNodes = $cleanNode->xpath('//ns:t');
|
|
||||||
|
|
||||||
$textValue = '';
|
|
||||||
foreach ($textNodes as $textNode) {
|
|
||||||
if ($this->shouldPreserveWhitespace($textNode)) {
|
|
||||||
$textValue .= $textNode->__toString();
|
|
||||||
} else {
|
|
||||||
$textValue .= trim($textNode->__toString());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$unescapedTextValue = $escaper->unescape($textValue);
|
while ($xmlReader->name === 'si') {
|
||||||
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
||||||
|
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
||||||
|
|
||||||
$sharedStringIndex++;
|
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
||||||
|
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
||||||
|
|
||||||
// jump to the next 'si' tag
|
// find all text nodes 't'; there can be multiple if the cell contains formatting
|
||||||
$xmlReader->next('si');
|
$textNodes = $cleanNode->xpath('//ns:t');
|
||||||
|
|
||||||
|
$textValue = '';
|
||||||
|
foreach ($textNodes as $textNode) {
|
||||||
|
if ($this->shouldPreserveWhitespace($textNode)) {
|
||||||
|
$textValue .= $textNode->__toString();
|
||||||
|
} else {
|
||||||
|
$textValue .= trim($textNode->__toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$unescapedTextValue = $escaper->unescape($textValue);
|
||||||
|
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
||||||
|
|
||||||
|
$sharedStringIndex++;
|
||||||
|
|
||||||
|
// jump to the next 'si' tag
|
||||||
|
$xmlReader->next('si');
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (XMLProcessingException $exception) {
|
||||||
|
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->cachingStrategy->closeCache();
|
$this->cachingStrategy->closeCache();
|
||||||
@ -134,33 +142,19 @@ class SharedStringsHelper
|
|||||||
/**
|
/**
|
||||||
* Returns the shared strings unique count, as specified in <sst> tag.
|
* Returns the shared strings unique count, as specified in <sst> tag.
|
||||||
*
|
*
|
||||||
* @param \XMLReader $xmlReader XMLReader instance
|
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance
|
||||||
* @return int Number of unique shared strings in the sharedStrings.xml file
|
* @return int Number of unique shared strings in the sharedStrings.xml file
|
||||||
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
|
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
|
||||||
*/
|
*/
|
||||||
protected function getSharedStringsUniqueCount($xmlReader)
|
protected function getSharedStringsUniqueCount($xmlReader)
|
||||||
{
|
{
|
||||||
// Use internal errors to avoid displaying lots of warning messages in case of invalid file
|
|
||||||
// For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks
|
|
||||||
libxml_clear_errors();
|
|
||||||
libxml_use_internal_errors(true);
|
|
||||||
|
|
||||||
$xmlReader->next('sst');
|
$xmlReader->next('sst');
|
||||||
|
|
||||||
// Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE)
|
// Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE)
|
||||||
while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) {
|
while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== XMLReader::ELEMENT) {
|
||||||
$xmlReader->read();
|
$xmlReader->read();
|
||||||
}
|
}
|
||||||
|
|
||||||
$readError = libxml_get_last_error();
|
|
||||||
if ($readError !== false) {
|
|
||||||
$readErrorMessage = trim($readError->message);
|
|
||||||
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readErrorMessage}]");
|
|
||||||
}
|
|
||||||
|
|
||||||
// reset the setting to display XML warnings/errors
|
|
||||||
libxml_use_internal_errors(false);
|
|
||||||
|
|
||||||
return intval($xmlReader->getAttribute('uniqueCount'));
|
return intval($xmlReader->getAttribute('uniqueCount'));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,29 +174,19 @@ class SharedStringsHelper
|
|||||||
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
|
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
|
||||||
* This is to simplify the parsing of the subtree.
|
* This is to simplify the parsing of the subtree.
|
||||||
*
|
*
|
||||||
* @param \XMLReader $xmlReader
|
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader
|
||||||
* @return \SimpleXMLElement
|
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement
|
||||||
* @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read
|
* @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read
|
||||||
*/
|
*/
|
||||||
protected function getSimpleXmlElementNodeFromXMLReader($xmlReader)
|
protected function getSimpleXmlElementNodeFromXMLReader($xmlReader)
|
||||||
{
|
{
|
||||||
// Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node.
|
|
||||||
// For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks
|
|
||||||
libxml_clear_errors();
|
|
||||||
libxml_use_internal_errors(true);
|
|
||||||
|
|
||||||
$node = null;
|
$node = null;
|
||||||
try {
|
try {
|
||||||
$node = new \SimpleXMLElement($xmlReader->readOuterXml());
|
$node = new SimpleXMLElement($xmlReader->readOuterXml());
|
||||||
} catch (\Exception $exception) {
|
} catch (XMLProcessingException $exception) {
|
||||||
$error = libxml_get_last_error();
|
throw new IOException("The sharedStrings.xml file contains unreadable data [{$exception->getMessage()}].");
|
||||||
libxml_use_internal_errors(false);
|
|
||||||
|
|
||||||
throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
libxml_use_internal_errors(false);
|
|
||||||
|
|
||||||
return $node;
|
return $node;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,8 +194,8 @@ class SharedStringsHelper
|
|||||||
* Removes nodes that should not be read, like the pronunciation of the Kanji characters.
|
* Removes nodes that should not be read, like the pronunciation of the Kanji characters.
|
||||||
* By keeping them, their text content would be added to the read string.
|
* By keeping them, their text content would be added to the read string.
|
||||||
*
|
*
|
||||||
* @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove
|
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $parentNode Parent node that may contain nodes to remove
|
||||||
* @return \SimpleXMLElement Cleaned parent node
|
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement Cleaned parent node
|
||||||
*/
|
*/
|
||||||
protected function removeSuperfluousTextNodes($parentNode)
|
protected function removeSuperfluousTextNodes($parentNode)
|
||||||
{
|
{
|
||||||
@ -221,12 +205,7 @@ class SharedStringsHelper
|
|||||||
|
|
||||||
foreach ($tagsToRemove as $tagToRemove) {
|
foreach ($tagsToRemove as $tagToRemove) {
|
||||||
$xpath = '//ns:' . $tagToRemove;
|
$xpath = '//ns:' . $tagToRemove;
|
||||||
$nodesToRemove = $parentNode->xpath($xpath);
|
$parentNode->removeNodesMatchingXPath($xpath);
|
||||||
|
|
||||||
foreach ($nodesToRemove as $nodeToRemove) {
|
|
||||||
// This is how to remove a node from the XML
|
|
||||||
unset($nodeToRemove[0]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $parentNode;
|
return $parentNode;
|
||||||
@ -235,24 +214,13 @@ class SharedStringsHelper
|
|||||||
/**
|
/**
|
||||||
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
||||||
*
|
*
|
||||||
* @param \SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved
|
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved
|
||||||
* @return bool Whether whitespace should be preserved
|
* @return bool Whether whitespace should be preserved
|
||||||
*/
|
*/
|
||||||
protected function shouldPreserveWhitespace($textNode)
|
protected function shouldPreserveWhitespace($textNode)
|
||||||
{
|
{
|
||||||
$shouldPreserveWhitespace = false;
|
$spaceValue = $textNode->getAttribute('space', 'xml');
|
||||||
|
return ($spaceValue === 'preserve');
|
||||||
$attributes = $textNode->attributes('xml', true);
|
|
||||||
if ($attributes) {
|
|
||||||
foreach ($attributes as $attributeName => $attributeValue) {
|
|
||||||
if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') {
|
|
||||||
$shouldPreserveWhitespace = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $shouldPreserveWhitespace;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
namespace Box\Spout\Reader\XLSX\Helper;
|
namespace Box\Spout\Reader\XLSX\Helper;
|
||||||
|
|
||||||
|
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
|
||||||
use Box\Spout\Reader\XLSX\Sheet;
|
use Box\Spout\Reader\XLSX\Sheet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -37,10 +38,10 @@ class SheetHelper
|
|||||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||||
protected $globalFunctionsHelper;
|
protected $globalFunctionsHelper;
|
||||||
|
|
||||||
/** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */
|
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml.rels file */
|
||||||
protected $workbookXMLRelsAsXMLElement;
|
protected $workbookXMLRelsAsXMLElement;
|
||||||
|
|
||||||
/** @var \SimpleXMLElement XML element representing the workbook.xml file */
|
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml file */
|
||||||
protected $workbookXMLAsXMLElement;
|
protected $workbookXMLAsXMLElement;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -76,7 +77,7 @@ class SheetHelper
|
|||||||
|
|
||||||
for ($i = 0; $i < $numSheetNodes; $i++) {
|
for ($i = 0; $i < $numSheetNodes; $i++) {
|
||||||
$sheetNode = $sheetNodes[$i];
|
$sheetNode = $sheetNodes[$i];
|
||||||
$sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName;
|
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
|
||||||
|
|
||||||
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i);
|
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i);
|
||||||
}
|
}
|
||||||
@ -115,15 +116,15 @@ class SheetHelper
|
|||||||
|
|
||||||
if (count($relationshipNodes) === 1) {
|
if (count($relationshipNodes) === 1) {
|
||||||
$relationshipNode = $relationshipNodes[0];
|
$relationshipNode = $relationshipNodes[0];
|
||||||
$sheetId = (string) $relationshipNode->attributes()->Id;
|
$sheetId = $relationshipNode->getAttribute('Id');
|
||||||
|
|
||||||
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
||||||
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
|
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
|
||||||
|
|
||||||
if (count($sheetNodes) === 1) {
|
if (count($sheetNodes) === 1) {
|
||||||
$sheetNode = $sheetNodes[0];
|
$sheetNode = $sheetNodes[0];
|
||||||
$sheetId = (int) $sheetNode->attributes()->sheetId;
|
$sheetId = (int) $sheetNode->getAttribute('sheetId');
|
||||||
$escapedSheetName = (string) $sheetNode->attributes()->name;
|
$escapedSheetName = $sheetNode->getAttribute('name');
|
||||||
|
|
||||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||||
$sheetName = $escaper->unescape($escapedSheetName);
|
$sheetName = $escaper->unescape($escapedSheetName);
|
||||||
@ -149,7 +150,7 @@ class SheetHelper
|
|||||||
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
||||||
* The returned value is cached.
|
* The returned value is cached.
|
||||||
*
|
*
|
||||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||||
*/
|
*/
|
||||||
protected function getWorkbookXMLRelsAsXMLElement()
|
protected function getWorkbookXMLRelsAsXMLElement()
|
||||||
{
|
{
|
||||||
@ -167,7 +168,7 @@ class SheetHelper
|
|||||||
* Returns a representation of the workbook.xml file, ready to be parsed.
|
* Returns a representation of the workbook.xml file, ready to be parsed.
|
||||||
* The returned value is cached.
|
* The returned value is cached.
|
||||||
*
|
*
|
||||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||||
*/
|
*/
|
||||||
protected function getWorkbookXMLAsXMLElement()
|
protected function getWorkbookXMLAsXMLElement()
|
||||||
{
|
{
|
||||||
@ -186,13 +187,13 @@ class SheetHelper
|
|||||||
*
|
*
|
||||||
* @param string $xmlFilePath The path of the XML file inside the XLSX file
|
* @param string $xmlFilePath The path of the XML file inside the XLSX file
|
||||||
* @param string $mainNamespace The main XPath namespace to register
|
* @param string $mainNamespace The main XPath namespace to register
|
||||||
* @return \SimpleXMLElement The XML element representing the file
|
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement The XML element representing the file
|
||||||
*/
|
*/
|
||||||
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
|
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
|
||||||
{
|
{
|
||||||
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
|
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
|
||||||
|
|
||||||
$xmlElement = new \SimpleXMLElement($xmlContents);
|
$xmlElement = new SimpleXMLElement($xmlContents);
|
||||||
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
|
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
|
||||||
|
|
||||||
return $xmlElement;
|
return $xmlElement;
|
||||||
|
@ -61,7 +61,7 @@ class Reader extends AbstractReader
|
|||||||
|
|
||||||
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper);
|
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException('Could not open ' . $filePath . ' for reading.');
|
throw new IOException("Could not open $filePath for reading.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,9 @@
|
|||||||
namespace Box\Spout\Reader\XLSX;
|
namespace Box\Spout\Reader\XLSX;
|
||||||
|
|
||||||
use Box\Spout\Common\Exception\IOException;
|
use Box\Spout\Common\Exception\IOException;
|
||||||
|
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||||
use Box\Spout\Reader\IteratorInterface;
|
use Box\Spout\Reader\IteratorInterface;
|
||||||
|
use Box\Spout\Reader\Wrapper\XMLReader;
|
||||||
use Box\Spout\Reader\XLSX\Helper\CellHelper;
|
use Box\Spout\Reader\XLSX\Helper\CellHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -45,7 +47,7 @@ class RowIterator implements IteratorInterface
|
|||||||
/** @var Helper\SharedStringsHelper Helper to work with shared strings */
|
/** @var Helper\SharedStringsHelper Helper to work with shared strings */
|
||||||
protected $sharedStringsHelper;
|
protected $sharedStringsHelper;
|
||||||
|
|
||||||
/** @var \XMLReader The XMLReader object that will help read sheet's XML data */
|
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
|
||||||
protected $xmlReader;
|
protected $xmlReader;
|
||||||
|
|
||||||
/** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
|
/** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
|
||||||
@ -74,7 +76,7 @@ class RowIterator implements IteratorInterface
|
|||||||
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
|
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
|
||||||
$this->sharedStringsHelper = $sharedStringsHelper;
|
$this->sharedStringsHelper = $sharedStringsHelper;
|
||||||
|
|
||||||
$this->xmlReader = new \XMLReader();
|
$this->xmlReader = new XMLReader();
|
||||||
$this->escaper = new \Box\Spout\Common\Escaper\XLSX();
|
$this->escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,8 +104,8 @@ class RowIterator implements IteratorInterface
|
|||||||
$this->xmlReader->close();
|
$this->xmlReader->close();
|
||||||
|
|
||||||
$sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
|
$sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
|
||||||
if ($this->xmlReader->open($sheetDataFilePath, null, LIBXML_NONET) === false) {
|
if ($this->xmlReader->open($sheetDataFilePath) === false) {
|
||||||
throw new IOException('Could not open "' . $this->sheetDataXMLFilePath . '".');
|
throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->numReadRows = 0;
|
$this->numReadRows = 0;
|
||||||
@ -138,59 +140,52 @@ class RowIterator implements IteratorInterface
|
|||||||
$isInsideRowTag = false;
|
$isInsideRowTag = false;
|
||||||
$rowData = [];
|
$rowData = [];
|
||||||
|
|
||||||
// Use internal errors to avoid displaying lots of warning messages in case of invalid file
|
try {
|
||||||
// For instance on HHVM, XMLReader->open() won't fail when trying to read a unexisting file within a zip...
|
while ($this->xmlReader->read()) {
|
||||||
// But the XMLReader->read() will fail!
|
if ($this->xmlReader->nodeType == XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) {
|
||||||
libxml_clear_errors();
|
// Read dimensions of the sheet
|
||||||
libxml_use_internal_errors(true);
|
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||||
|
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
||||||
|
$lastCellIndex = $matches[1];
|
||||||
|
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
while ($this->xmlReader->read()) {
|
} else if ($this->xmlReader->nodeType == XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
||||||
if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) {
|
// Start of the row description
|
||||||
// Read dimensions of the sheet
|
$isInsideRowTag = true;
|
||||||
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
|
||||||
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
// Read spans info if present
|
||||||
$lastCellIndex = $matches[1];
|
$numberOfColumnsForRow = $this->numColumns;
|
||||||
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
||||||
|
if ($spans) {
|
||||||
|
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||||
|
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||||
|
}
|
||||||
|
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||||
|
|
||||||
|
} else if ($isInsideRowTag && $this->xmlReader->nodeType == XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) {
|
||||||
|
// Start of a cell description
|
||||||
|
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
||||||
|
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
|
||||||
|
|
||||||
|
$node = $this->xmlReader->expand();
|
||||||
|
$rowData[$currentColumnIndex] = $this->getCellValue($node);
|
||||||
|
|
||||||
|
} else if ($this->xmlReader->nodeType == XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
||||||
|
// End of the row description
|
||||||
|
// If needed, we fill the empty cells
|
||||||
|
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||||
|
$this->numReadRows++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
} else if ($this->xmlReader->nodeType == XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) {
|
||||||
|
// The closing "</worksheet>" marks the end of the file
|
||||||
|
$this->hasReachedEndOfFile = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
|
||||||
// Start of the row description
|
|
||||||
$isInsideRowTag = true;
|
|
||||||
|
|
||||||
// Read spans info if present
|
|
||||||
$numberOfColumnsForRow = $this->numColumns;
|
|
||||||
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
|
|
||||||
if ($spans) {
|
|
||||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
|
||||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
|
||||||
}
|
|
||||||
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
|
||||||
|
|
||||||
} else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) {
|
|
||||||
// Start of a cell description
|
|
||||||
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
|
|
||||||
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
|
|
||||||
|
|
||||||
$node = $this->xmlReader->expand();
|
|
||||||
$rowData[$currentColumnIndex] = $this->getCellValue($node);
|
|
||||||
|
|
||||||
} else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) {
|
|
||||||
// End of the row description
|
|
||||||
// If needed, we fill the empty cells
|
|
||||||
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
|
||||||
$this->numReadRows++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
} else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) {
|
|
||||||
// The closing "</worksheet>" marks the end of the file
|
|
||||||
$this->hasReachedEndOfFile = true;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
$readError = libxml_get_last_error();
|
} catch (XMLProcessingException $exception) {
|
||||||
if ($readError !== false) {
|
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
|
||||||
$readErrorMessage = trim($readError->message);
|
|
||||||
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$readErrorMessage}]");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->rowDataBuffer = $rowData;
|
$this->rowDataBuffer = $rowData;
|
||||||
|
@ -156,7 +156,7 @@ EOD;
|
|||||||
|
|
||||||
$wasWriteSuccessful = fwrite($this->sheetFilePointer, $data);
|
$wasWriteSuccessful = fwrite($this->sheetFilePointer, $data);
|
||||||
if ($wasWriteSuccessful === false) {
|
if ($wasWriteSuccessful === false) {
|
||||||
throw new IOException('Unable to write data in ' . $this->worksheetFilePath);
|
throw new IOException("Unable to write data in {$this->worksheetFilePath}");
|
||||||
}
|
}
|
||||||
|
|
||||||
// only update the count if the write worked
|
// only update the count if the write worked
|
||||||
|
127
tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php
Normal file
127
tests/Spout/Reader/Wrapper/SimpleXMLElementTest.php
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Box\Spout\Reader\Wrapper;
|
||||||
|
|
||||||
|
use Box\Spout\TestUsingResource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class SimpleXMLElementTest
|
||||||
|
*
|
||||||
|
* @package Box\Spout\Reader\Wrapper
|
||||||
|
*/
|
||||||
|
class SimpleXMLElementTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
use TestUsingResource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Box\Spout\Reader\Exception\XMLProcessingException
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testConstructShouldThrowExceptionIfInvalidData()
|
||||||
|
{
|
||||||
|
$invalidXML = '<invalid><xml></invalid>';
|
||||||
|
new SimpleXMLElement($invalidXML);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function dataProviderForTestGetAttribute()
|
||||||
|
{
|
||||||
|
$xmlWithoutNamespace = <<<XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<worksheet foo="bar" type="test" />
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$xmlWithHalfNamespace = <<<XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<worksheet
|
||||||
|
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||||
|
foo="bar" r:type="test" />
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$xmlWithFullNamespace = <<<XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<worksheet
|
||||||
|
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||||
|
r:foo="bar" r:type="test" />
|
||||||
|
XML;
|
||||||
|
|
||||||
|
return [
|
||||||
|
[$xmlWithoutNamespace, null, ['foo' => 'bar', 'type' => 'test']],
|
||||||
|
[$xmlWithHalfNamespace, null, ['foo' => 'bar', 'type' => null]],
|
||||||
|
[$xmlWithFullNamespace, null, ['foo' => null, 'type' => null]],
|
||||||
|
[$xmlWithoutNamespace, 'r', ['foo' => null, 'type' => null]],
|
||||||
|
[$xmlWithHalfNamespace, 'r', ['foo' => null, 'type' => 'test']],
|
||||||
|
[$xmlWithFullNamespace, 'r', ['foo' => 'bar', 'type' => 'test']],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataProviderForTestGetAttribute
|
||||||
|
*
|
||||||
|
* @param string $xml
|
||||||
|
* @param string|null $namespace
|
||||||
|
* @param array $expectedAttributes
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testGetAttribute($xml, $namespace, $expectedAttributes)
|
||||||
|
{
|
||||||
|
$element = new SimpleXMLElement($xml);
|
||||||
|
|
||||||
|
foreach ($expectedAttributes as $name => $expectedValue) {
|
||||||
|
$value = $element->getAttribute($name, $namespace);
|
||||||
|
$this->assertEquals($expectedValue, $value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testXPath()
|
||||||
|
{
|
||||||
|
$xml = <<<XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<worksheet>
|
||||||
|
<sheetData>
|
||||||
|
<row r="1">
|
||||||
|
<c r="A1"><v>0</v></c>
|
||||||
|
<c r="A2"><v>1</v></c>
|
||||||
|
</row>
|
||||||
|
</sheetData>
|
||||||
|
</worksheet>
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$element = new SimpleXMLElement($xml);
|
||||||
|
$matchedElements = $element->xpath('//c');
|
||||||
|
|
||||||
|
$this->assertEquals(2, count($matchedElements));
|
||||||
|
$this->assertTrue($matchedElements[0] instanceof SimpleXMLElement, 'The SimpleXMLElement should be wrapped');
|
||||||
|
$this->assertEquals('A2', $matchedElements[1]->getAttribute('r'));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testRemoveNodeMatchingXPath()
|
||||||
|
{
|
||||||
|
$xml = <<<XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<worksheet>
|
||||||
|
<sheetData>
|
||||||
|
<row r="1">
|
||||||
|
<c r="A1"><v>0</v></c>
|
||||||
|
<c r="A2"><v>1</v></c>
|
||||||
|
</row>
|
||||||
|
</sheetData>
|
||||||
|
</worksheet>
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$element = new SimpleXMLElement($xml);
|
||||||
|
$this->assertNotNull($element->getFirstChildByTagName('sheetData'));
|
||||||
|
|
||||||
|
$element->removeNodesMatchingXPath('//sheetData');
|
||||||
|
$this->assertNull($element->getFirstChildByTagName('sheetData'));
|
||||||
|
}
|
||||||
|
}
|
166
tests/Spout/Reader/Wrapper/XMLReaderTest.php
Normal file
166
tests/Spout/Reader/Wrapper/XMLReaderTest.php
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Box\Spout\Reader\Wrapper;
|
||||||
|
|
||||||
|
use Box\Spout\TestUsingResource;
|
||||||
|
use Box\Spout\Reader\Exception\XMLProcessingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class XMLReaderTest
|
||||||
|
*
|
||||||
|
* @package Box\Spout\Reader\Wrapper
|
||||||
|
*/
|
||||||
|
class XMLReaderTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
use TestUsingResource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testOpenShouldFailIfFileInsideZipDoesNotExist()
|
||||||
|
{
|
||||||
|
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
|
||||||
|
$nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml';
|
||||||
|
|
||||||
|
$xmlReader = new XMLReader();
|
||||||
|
|
||||||
|
// using "@" to prevent errors/warning to be displayed
|
||||||
|
$wasOpenSuccessful = @$xmlReader->open($nonExistingXMLFilePath);
|
||||||
|
|
||||||
|
$this->assertTrue($wasOpenSuccessful === false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testing a HHVM bug: https://github.com/facebook/hhvm/issues/5779
|
||||||
|
* The associated code in XMLReader::open() can be removed when the issue is fixed (and this test starts failing).
|
||||||
|
* @see XMLReader::open()
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testHHVMStillDoesNotComplainWhenCallingOpenWithFileInsideZipNotExisting()
|
||||||
|
{
|
||||||
|
// Test should only be run on HHVM
|
||||||
|
if ($this->isRunningHHVM()) {
|
||||||
|
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
|
||||||
|
$nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml';
|
||||||
|
|
||||||
|
libxml_clear_errors();
|
||||||
|
$initialUseInternalErrorsSetting = libxml_use_internal_errors(true);
|
||||||
|
|
||||||
|
// using the built-in XMLReader
|
||||||
|
$xmlReader = new \XMLReader();
|
||||||
|
$this->assertTrue($xmlReader->open($nonExistingXMLFilePath) !== false);
|
||||||
|
$this->assertTrue(libxml_get_last_error() === false);
|
||||||
|
|
||||||
|
libxml_use_internal_errors($initialUseInternalErrorsSetting);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return bool TRUE if running on HHVM, FALSE otherwise
|
||||||
|
*/
|
||||||
|
private function isRunningHHVM()
|
||||||
|
{
|
||||||
|
return defined('HHVM_VERSION');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Box\Spout\Reader\Exception\XMLProcessingException
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testReadShouldThrowExceptionOnError()
|
||||||
|
{
|
||||||
|
$resourcePath = $this->getResourcePath('one_sheet_with_invalid_xml_characters.xlsx');
|
||||||
|
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/worksheets/sheet1.xml';
|
||||||
|
|
||||||
|
$xmlReader = new XMLReader();
|
||||||
|
if ($xmlReader->open($sheetDataXMLFilePath) === false) {
|
||||||
|
$this->fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
// using "@" to prevent errors/warning to be displayed
|
||||||
|
while (@$xmlReader->read()) {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Box\Spout\Reader\Exception\XMLProcessingException
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testNextShouldThrowExceptionOnError()
|
||||||
|
{
|
||||||
|
// The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains
|
||||||
|
// a doctype element that causes read errors
|
||||||
|
$resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx');
|
||||||
|
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/sharedStrings.xml';
|
||||||
|
|
||||||
|
$xmlReader = new XMLReader();
|
||||||
|
if ($xmlReader->open($sheetDataXMLFilePath) !== false) {
|
||||||
|
@$xmlReader->next('sst');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function dataProviderForTestIsZipStream()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
['/absolute/path/to/file.xlsx', false],
|
||||||
|
['relative/path/to/file.xlsx', false],
|
||||||
|
['php://temp', false],
|
||||||
|
['zip:///absolute/path/to/file.xlsx', true],
|
||||||
|
['zip://relative/path/to/file.xlsx', true],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataProviderForTestIsZipStream
|
||||||
|
*
|
||||||
|
* @param string $URI
|
||||||
|
* @param bool $expectedResult
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testIsZipStream($URI, $expectedResult)
|
||||||
|
{
|
||||||
|
$xmlReader = new XMLReader();
|
||||||
|
$isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'isZipStream', $URI);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedResult, $isZipStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function dataProviderForTestFileExistsWithinZip()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
['[Content_Types].xml', true],
|
||||||
|
['xl/sharedStrings.xml', true],
|
||||||
|
['xl/worksheets/sheet1.xml', true],
|
||||||
|
['/invalid/file.xml', false],
|
||||||
|
['another/invalid/file.xml', false],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider dataProviderForTestFileExistsWithinZip
|
||||||
|
*
|
||||||
|
* @param string $innerFilePath
|
||||||
|
* @param bool $expectedResult
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testFileExistsWithinZip($innerFilePath, $expectedResult)
|
||||||
|
{
|
||||||
|
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
|
||||||
|
$zipStreamURI = 'zip://' . $resourcePath . '#' . $innerFilePath;
|
||||||
|
|
||||||
|
$xmlReader = new XMLReader();
|
||||||
|
$isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'fileExistsWithinZip', $zipStreamURI);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedResult, $isZipStream);
|
||||||
|
}
|
||||||
|
}
|
@ -6,7 +6,6 @@ use Box\Spout\Common\Exception\IOException;
|
|||||||
use Box\Spout\Common\Type;
|
use Box\Spout\Common\Type;
|
||||||
use Box\Spout\Reader\ReaderFactory;
|
use Box\Spout\Reader\ReaderFactory;
|
||||||
use Box\Spout\TestUsingResource;
|
use Box\Spout\TestUsingResource;
|
||||||
use Symfony\Component\Config\Definition\Exception\Exception;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class ReaderTest
|
* Class ReaderTest
|
||||||
|
BIN
tests/resources/xlsx/one_sheet_with_invalid_xml_characters.xlsx
Normal file
BIN
tests/resources/xlsx/one_sheet_with_invalid_xml_characters.xlsx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user