Refactor SharedStringsHelper::extractSharedStrings
This commit is contained in:
parent
2fa01cd838
commit
441c33100f
@ -94,44 +94,19 @@ class SharedStringsHelper
|
|||||||
$xmlReader->readUntilNodeFound('si');
|
$xmlReader->readUntilNodeFound('si');
|
||||||
|
|
||||||
while ($xmlReader->name === 'si') {
|
while ($xmlReader->name === 'si') {
|
||||||
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
$this->processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper);
|
||||||
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
|
||||||
|
|
||||||
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
|
||||||
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
|
||||||
|
|
||||||
// find all text nodes "t"; there can be multiple if the cell contains formatting
|
|
||||||
$textNodes = $cleanNode->xpath('//ns:t');
|
|
||||||
|
|
||||||
$textValue = '';
|
|
||||||
foreach ($textNodes as $nodeIndex => $textNode) {
|
|
||||||
if ($nodeIndex !== 0) {
|
|
||||||
// add a space between each "t" node
|
|
||||||
$textValue .= ' ';
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($this->shouldPreserveWhitespace($textNode)) {
|
|
||||||
$textValue .= $textNode->__toString();
|
|
||||||
} else {
|
|
||||||
$textValue .= trim($textNode->__toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$unescapedTextValue = $escaper->unescape($textValue);
|
|
||||||
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
|
||||||
|
|
||||||
$sharedStringIndex++;
|
$sharedStringIndex++;
|
||||||
|
|
||||||
// jump to the next 'si' tag
|
// jump to the next 'si' tag
|
||||||
$xmlReader->next('si');
|
$xmlReader->next('si');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$this->cachingStrategy->closeCache();
|
||||||
|
|
||||||
} catch (XMLProcessingException $exception) {
|
} catch (XMLProcessingException $exception) {
|
||||||
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
|
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->cachingStrategy->closeCache();
|
|
||||||
|
|
||||||
$xmlReader->close();
|
$xmlReader->close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,6 +157,31 @@ class SharedStringsHelper
|
|||||||
->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder);
|
->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes the shared strings item XML node which the given XML reader is positioned on.
|
||||||
|
*
|
||||||
|
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader
|
||||||
|
* @param int $sharedStringIndex Index of the processed shared strings item
|
||||||
|
* @param \Box\Spout\Common\Escaper\XLSX $escaper Helper to escape values
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
protected function processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper)
|
||||||
|
{
|
||||||
|
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
||||||
|
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
||||||
|
|
||||||
|
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
||||||
|
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
||||||
|
|
||||||
|
// find all text nodes "t"; there can be multiple if the cell contains formatting
|
||||||
|
$textNodes = $cleanNode->xpath('//ns:t');
|
||||||
|
|
||||||
|
$textValue = $this->extractTextValueForNodes($textNodes);
|
||||||
|
$unescapedTextValue = $escaper->unescape($textValue);
|
||||||
|
|
||||||
|
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
|
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
|
||||||
* This is to simplify the parsing of the subtree.
|
* This is to simplify the parsing of the subtree.
|
||||||
@ -225,6 +225,29 @@ class SharedStringsHelper
|
|||||||
return $parentNode;
|
return $parentNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $textNodes Text XML nodes ("<t>")
|
||||||
|
* @return string The value associated with the given text node(s)
|
||||||
|
*/
|
||||||
|
protected function extractTextValueForNodes($textNodes)
|
||||||
|
{
|
||||||
|
$textValue = '';
|
||||||
|
|
||||||
|
foreach ($textNodes as $nodeIndex => $textNode) {
|
||||||
|
if ($nodeIndex !== 0) {
|
||||||
|
// add a space between each "t" node
|
||||||
|
$textValue .= ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
$textNodeAsString = $textNode->__toString();
|
||||||
|
$shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode);
|
||||||
|
|
||||||
|
$textValue .= ($shouldPreserveWhitespace) ? $textNodeAsString : trim($textNodeAsString);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $textValue;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
||||||
*
|
*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user