Support for strict OOXML

There are 2 types of OOXML format: transitional and strict. Transitional is what's mostly used but some softwares still allow XLSX to be generated using the strict OOXML format.
In this format, namespaces of the XML files are different: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings` is replaced by `http://purl.oclc.org/ooxml/officeDocument/relationships/sharedStrings` for instance. To support both formats, Spout needs to be able to look for both.
This commit is contained in:
Adrien Loison 2021-05-12 22:45:44 +02:00
parent 0837d49c2b
commit 2ff515c306
6 changed files with 24 additions and 18 deletions

View File

@ -65,7 +65,7 @@ class Cell
protected $style;
/**
* @param $value mixed
* @param mixed|null $value
* @param Style|null $style
*/
public function __construct($value, Style $style = null)

View File

@ -31,14 +31,6 @@ class CellValueFormatter
/** Constants used for date formatting */
const NUM_SECONDS_IN_ONE_DAY = 86400;
const NUM_SECONDS_IN_ONE_HOUR = 3600;
const NUM_SECONDS_IN_ONE_MINUTE = 60;
/**
* February 29th, 1900 is NOT a leap year but Excel thinks it is...
* @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
*/
const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
/** @var SharedStringsManager Manages shared strings */
protected $sharedStringsManager;

View File

@ -16,9 +16,6 @@ use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface;
*/
class SharedStringsManager
{
/** Main namespace for the sharedStrings.xml file */
const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/** Definition of XML nodes names used to parse data */
const XML_NODE_SST = 'sst';
const XML_NODE_SI = 'si';

View File

@ -17,10 +17,11 @@ class WorkbookRelationshipsManager
/** Path of workbook relationships XML file inside the XLSX file */
const WORKBOOK_RELS_XML_FILE_PATH = 'xl/_rels/workbook.xml.rels';
/** Relationships types */
/** Relationships types - For Transitional and Strict OOXML */
const RELATIONSHIP_TYPE_SHARED_STRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
const RELATIONSHIP_TYPE_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles';
const RELATIONSHIP_TYPE_WORKSHEET = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
const RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/sharedStrings';
const RELATIONSHIP_TYPE_STYLES_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/styles';
/** Nodes and attributes used to find relevant information in the workbook relationships XML file */
const XML_NODE_RELATIONSHIP = 'Relationship';
@ -52,7 +53,8 @@ class WorkbookRelationshipsManager
public function getSharedStringsXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$sharedStringsXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS];
$sharedStringsXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS]
?? $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = (\strpos($sharedStringsXMLFilePath, self::BASE_PATH) !== false);
@ -71,7 +73,8 @@ class WorkbookRelationshipsManager
{
$workbookRelationships = $this->getWorkbookRelationships();
return isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS]);
return isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS])
|| isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT]);
}
/**
@ -81,7 +84,8 @@ class WorkbookRelationshipsManager
{
$workbookRelationships = $this->getWorkbookRelationships();
return isset($workbookRelationships[self::RELATIONSHIP_TYPE_STYLES]);
return isset($workbookRelationships[self::RELATIONSHIP_TYPE_STYLES])
|| isset($workbookRelationships[self::RELATIONSHIP_TYPE_STYLES_STRICT]);
}
/**
@ -90,7 +94,8 @@ class WorkbookRelationshipsManager
public function getStylesXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$stylesXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES];
$stylesXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES]
?? $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES_STRICT];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = (\strpos($stylesXMLFilePath, self::BASE_PATH) !== false);

View File

@ -703,6 +703,18 @@ class ReaderTest extends TestCase
$this->assertEquals($expectedRows, $allRows, 'Cell values should not be trimmed');
}
/**
* https://github.com/box/spout/issues/726
* @return void
*/
public function testReaderShouldSupportStrictOOXML()
{
$allRows = $this->getAllRowsForFile('sheet_with_strict_ooxml.xlsx');
$this->assertEquals('UNIQUE_ACCOUNT_IDENTIFIER', $allRows[0][0]);
$this->assertEquals('A2Z34NJA7N2ESJ', $allRows[1][0]);
}
/**
* @param string $fileName
* @param bool $shouldFormatDates

Binary file not shown.