diff --git a/src/Spout/Reader/ODS/Helper/CellValueFormatter.php b/src/Spout/Reader/ODS/Helper/CellValueFormatter.php
index 50209ec..0fb0ac6 100644
--- a/src/Spout/Reader/ODS/Helper/CellValueFormatter.php
+++ b/src/Spout/Reader/ODS/Helper/CellValueFormatter.php
@@ -22,9 +22,11 @@ class CellValueFormatter
/** Definition of XML nodes names used to parse data */
const XML_NODE_P = 'p';
- const XML_NODE_S = 'text:s';
- const XML_NODE_A = 'text:a';
- const XML_NODE_SPAN = 'text:span';
+ const XML_NODE_TEXT_A = 'text:a';
+ const XML_NODE_TEXT_SPAN = 'text:span';
+ const XML_NODE_TEXT_S = 'text:s';
+ const XML_NODE_TEXT_TAB = 'text:tab';
+ const XML_NODE_TEXT_LINE_BREAK = 'text:line-break';
/** Definition of XML attributes used to parse data */
const XML_ATTRIBUTE_TYPE = 'office:value-type';
@@ -41,6 +43,13 @@ class CellValueFormatter
/** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */
protected $escaper;
+ /** @var array List of XML nodes representing whitespaces and their corresponding value */
+ private static $WHITESPACE_XML_NODES = [
+ self::XML_NODE_TEXT_S => ' ',
+ self::XML_NODE_TEXT_TAB => "\t",
+ self::XML_NODE_TEXT_LINE_BREAK => "\n",
+ ];
+
/**
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
@@ -96,21 +105,7 @@ class CellValueFormatter
$pNodes = $node->getElementsByTagName(self::XML_NODE_P);
foreach ($pNodes as $pNode) {
- $currentPValue = '';
-
- foreach ($pNode->childNodes as $childNode) {
- if ($childNode instanceof \DOMText) {
- $currentPValue .= $childNode->nodeValue;
- } elseif ($childNode->nodeName === self::XML_NODE_S) {
- $spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C);
- $numSpaces = (!empty($spaceAttribute)) ? (int) $spaceAttribute : 1;
- $currentPValue .= str_repeat(' ', $numSpaces);
- } elseif ($childNode->nodeName === self::XML_NODE_A || $childNode->nodeName === self::XML_NODE_SPAN) {
- $currentPValue .= $childNode->nodeValue;
- }
- }
-
- $pNodeValues[] = $currentPValue;
+ $pNodeValues[] = $this->extractTextValueFromNode($pNode);
}
$escapedCellValue = implode("\n", $pNodeValues);
@@ -119,6 +114,62 @@ class CellValueFormatter
return $cellValue;
}
+ /**
+ * @param $pNode
+ * @return string
+ */
+ private function extractTextValueFromNode($pNode)
+ {
+ $textValue = '';
+
+ foreach ($pNode->childNodes as $childNode) {
+ if ($childNode instanceof \DOMText) {
+ $textValue .= $childNode->nodeValue;
+ } elseif ($this->isWhitespaceNode($childNode->nodeName)) {
+ $textValue .= $this->transformWhitespaceNode($childNode);
+ } elseif ($childNode->nodeName === self::XML_NODE_TEXT_A || $childNode->nodeName === self::XML_NODE_TEXT_SPAN) {
+ $textValue .= $this->extractTextValueFromNode($childNode);
+ }
+ }
+
+ return $textValue;
+ }
+
+ /**
+ * Returns whether the given node is a whitespace node. It must be one of these:
+ * -
+ * -
+ * -
+ *
+ * @param string $nodeName
+ * @return bool
+ */
+ private function isWhitespaceNode($nodeName)
+ {
+ return isset(self::$WHITESPACE_XML_NODES[$nodeName]);
+ }
+
+ /**
+ * The "" node can contain the string value directly
+ * or contain child elements. In this case, whitespaces contain in
+ * the child elements should be replaced by their XML equivalent:
+ * - space =>
+ * - tab =>
+ * - line break =>
+ *
+ * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
+ *
+ * @param \DOMNode $node The XML node representing a whitespace
+ * @return string The corresponding whitespace value
+ */
+ private function transformWhitespaceNode($node)
+ {
+ $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for ""
+ $numWhitespaces = (!empty($countAttribute)) ? (int) $countAttribute : 1;
+
+ return str_repeat(self::$WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
+ }
+
/**
* Returns the cell Numeric value from the given node.
*
diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php
index 674e4b0..ce56e4b 100644
--- a/tests/Spout/Reader/ODS/ReaderTest.php
+++ b/tests/Spout/Reader/ODS/ReaderTest.php
@@ -277,6 +277,17 @@ class ReaderTest extends TestCase
$this->assertEquals([$expectedRow], $allRows);
}
+ /**
+ * @return void
+ */
+ public function testReadShouldSupportWhitespaceAsXML()
+ {
+ $allRows = $this->getAllRowsForFile('sheet_with_whitespaces_as_xml.ods');
+
+ $expectedRow = ["Lorem ipsum\tdolor sit amet"];
+ $this->assertEquals([$expectedRow], $allRows);
+ }
+
/**
* @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used)
*
diff --git a/tests/resources/ods/sheet_with_whitespaces_as_xml.ods b/tests/resources/ods/sheet_with_whitespaces_as_xml.ods
new file mode 100644
index 0000000..d345951
Binary files /dev/null and b/tests/resources/ods/sheet_with_whitespaces_as_xml.ods differ