diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index 3400509..23a2b08 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -14,18 +14,13 @@ use Box\Spout\Reader\XLSX\Sheet; class SheetHelper { /** Paths of XML files relative to the XLSX file root */ - const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml'; const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels'; const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml'; /** Namespaces for the XML files */ - const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types'; const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships'; const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; - /** Value of the Override attribute used in [Content_Types].xml to define sheets */ - const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'; - /** @var string Path of the XLSX file being read */ protected $filePath; @@ -63,65 +58,51 @@ class SheetHelper { $sheets = []; - $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::CONTENT_TYPES_XML_FILE_PATH, - self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML - ); + // Starting from "workbook.xml" as this file is the source of truth for the sheets order + $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); + $sheetNodes = $workbookXMLElement->xpath('//ns:sheet'); - // find all nodes defining a sheet - $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); - $numSheetNodes = count($sheetNodes); - - for ($i = 0; $i < $numSheetNodes; $i++) { - $sheetNode = $sheetNodes[$i]; - $sheetDataXMLFilePath = $sheetNode->getAttribute('PartName'); - - $sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath); + foreach ($sheetNodes as $sheetIndex => $sheetNode) { + $sheets[] = $this->getSheetFromSheetXMLNode($sheetNode, $sheetIndex); } - // make sure the sheets are sorted by index - // (as the sheets are not necessarily in this order in the XML file) - usort($sheets, function ($sheet1, $sheet2) { - return ($sheet1->getIndex() - $sheet2->getIndex()); - }); - return $sheets; } /** - * Returns an instance of a sheet, given the path of its data XML file. - * We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet. - * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. - * The entry contains the ID and name of the sheet. + * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml". + * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID + * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res"). * - * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $sheetNode XML Node describing the sheet, as defined in "workbook.xml" + * @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based) * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance */ - protected function getSheetFromXML($sheetDataXMLFilePath) + protected function getSheetFromSheetXMLNode($sheetNode, $sheetIndexZeroBased) { - // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" - // In workbook.xml.rels, it is only "worksheets/sheet1.xml" - $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); - - // find the node associated to the given file path - $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); - $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); - $relationshipNode = $relationshipNodes[0]; - - $relationshipSheetId = $relationshipNode->getAttribute('Id'); - - $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); - $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $relationshipSheetId . '"]'); - $sheetNode = $sheetNodes[0]; + // To retrieve namespaced attributes, some versions of LibXML will accept prefixing the attribute + // with the namespace directly (tested on LibXML 2.9.3). For older versions (tested on LibXML 2.7.8), + // attributes need to be retrieved without the namespace hint. + $sheetId = $sheetNode->getAttribute('r:id'); + if ($sheetId === null) { + $sheetId = $sheetNode->getAttribute('id'); + } $escapedSheetName = $sheetNode->getAttribute('name'); - $sheetIdOneBased = $sheetNode->getAttribute('sheetId'); - $sheetIndexZeroBased = $sheetIdOneBased - 1; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = new \Box\Spout\Common\Escaper\XLSX(); $sheetName = $escaper->unescape($escapedSheetName); + // find the file path of the sheet, by looking at the "workbook.xml.res" file + $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); + $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Id="' . $sheetId . '"]'); + $relationshipNode = $relationshipNodes[0]; + + // In workbook.xml.rels, it is only "worksheets/sheet1.xml" + // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" + $sheetDataXMLFilePath = '/xl/' . $relationshipNode->getAttribute('Target'); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName); } diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index b1e6fdd..176f6d0 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -23,7 +23,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase { return [ ['/path/to/fake/file.xlsx'], - ['file_with_no_sheets_in_content_types.xlsx'], + ['file_with_no_sheets_in_workbook_xml.xlsx'], ['file_with_sheet_xml_not_matching_content_types.xlsx'], ['file_corrupted.xlsx'], ]; diff --git a/tests/resources/xlsx/file_with_no_sheets_in_content_types.xlsx b/tests/resources/xlsx/file_with_no_sheets_in_content_types.xlsx deleted file mode 100644 index 597b230..0000000 Binary files a/tests/resources/xlsx/file_with_no_sheets_in_content_types.xlsx and /dev/null differ diff --git a/tests/resources/xlsx/file_with_no_sheets_in_workbook_xml.xlsx b/tests/resources/xlsx/file_with_no_sheets_in_workbook_xml.xlsx new file mode 100644 index 0000000..74de527 Binary files /dev/null and b/tests/resources/xlsx/file_with_no_sheets_in_workbook_xml.xlsx differ