diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index 5f22ed6..550a6b1 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -119,7 +119,7 @@ class GlobalFunctionsHelper * Wrapper around global function file_exists() * @see file_exists() * - * @param string $filename + * @param string $fileName * @return bool */ public function file_exists($fileName) @@ -127,11 +127,23 @@ class GlobalFunctionsHelper return file_exists($fileName); } + /** + * Wrapper around global function file_get_contents() + * @see file_get_contents() + * + * @param string $filePath + * @return bool + */ + public function file_get_contents($filePath) + { + return file_get_contents($filePath); + } + /** * Wrapper around global function is_readable() * @see is_readable() * - * @param string $filename + * @param string $fileName * @return bool */ public function is_readable($fileName) @@ -144,11 +156,12 @@ class GlobalFunctionsHelper * @see basename() * * @param string $path + * @param string|void $suffix * @return string */ - public function basename($path) + public function basename($path, $suffix = null) { - return basename($path); + return basename($path, $suffix); } /** diff --git a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php b/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php index a105e01..f531b37 100644 --- a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php +++ b/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php @@ -3,6 +3,7 @@ namespace Box\Spout\Reader\Helper\XLSX; use Box\Spout\Reader\Internal\XLSX\Worksheet; +use Box\Spout\Reader\Sheet; /** * Class WorksheetHelper @@ -12,11 +13,18 @@ use Box\Spout\Reader\Internal\XLSX\Worksheet; */ class WorksheetHelper { - /** Path of Content_Types XML file inside the XLSX file */ - const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml'; + /** Extension for XML files */ + const XML_EXTENSION = '.xml'; - /** Main namespace for the [Content_Types].xml file */ + /** Paths of XML files relative to the XLSX file root */ + const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml'; + const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels'; + const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml'; + + /** Namespaces for the XML files */ const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types'; + const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships'; + const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; /** Value of the Override attribute used in [Content_Types].xml to define worksheets */ const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'; @@ -24,12 +32,23 @@ class WorksheetHelper /** @var string Path of the XLSX file being read */ protected $filePath; + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ + protected $globalFunctionsHelper; + + /** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */ + protected $workbookXMLRelsAsXMLElement; + + /** @var \SimpleXMLElement XML element representing the workbook.xml file */ + protected $workbookXMLAsXMLElement; + /** * @param string $filePath Path of the XLSX file being read + * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePath) + public function __construct($filePath, $globalFunctionsHelper) { $this->filePath = $filePath; + $this->globalFunctionsHelper = $globalFunctionsHelper; } /** @@ -42,23 +61,139 @@ class WorksheetHelper { $worksheets = []; - $xmlContents = file_get_contents('zip://' . $this->filePath . '#' . self::CONTENT_TYPES_XML_FILE_PATH); - - $contentTypes = new \SimpleXMLElement($xmlContents); - $contentTypes->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML); + $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::CONTENT_TYPES_XML_FILE_PATH, + self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML + ); // find all nodes defining a worksheet - $sheetNodes = $contentTypes->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); + $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); for ($i = 0; $i < count($sheetNodes); $i++) { $sheetNode = $sheetNodes[$i]; $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; - $worksheets[] = new Worksheet($i, $sheetDataXMLFilePath); + + $sheet = $this->getSheet($sheetDataXMLFilePath, $i); + $worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath); } return $worksheets; } + /** + * Returns an instance of a sheet, given the path of its data XML file. + * We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet. + * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. + * The entry contains the ID and name of the sheet. + * + * If this piece of data can't be found by parsing the different XML files, the ID will default + * to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will + * default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2"). + * + * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @param int $sheetNumberZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) + * @return \Box\Spout\Reader\Sheet Sheet instance + */ + protected function getSheet($sheetDataXMLFilePath, $sheetNumberZeroBased) + { + $sheetId = $sheetNumberZeroBased + 1; + $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); + + /* + * In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" + * In workbook.xml.rels, it is only "worksheets/sheet1.xml" + */ + $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); + + // find the node associated to the given file path + $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); + $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); + + if (count($relationshipNodes) === 1) { + $relationshipNode = $relationshipNodes[0]; + $sheetId = (string) $relationshipNode->attributes()->Id; + + $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); + $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]'); + + if (count($sheetNodes) === 1) { + $sheetNode = $sheetNodes[0]; + $sheetId = (int) $sheetNode->attributes()->sheetId; + $escapedSheetName = (string) $sheetNode->attributes()->name; + + $escaper = new \Box\Spout\Common\Escaper\XLSX(); + $sheetName = $escaper->unescape($escapedSheetName); + } + } + + return new Sheet($sheetId, $sheetNumberZeroBased, $sheetName); + } + + /** + * Returns the default name of the sheet whose data is located + * at the given path. + * + * @param $sheetDataXMLFilePath + * @return string The default sheet name + */ + protected function getDefaultSheetName($sheetDataXMLFilePath) + { + return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION); + } + + /** + * Returns a representation of the workbook.xml.rels file, ready to be parsed. + * The returned value is cached. + * + * @return \SimpleXMLElement XML element representating the workbook.xml.rels file + */ + protected function getWorkbookXMLRelsAsXMLElement() + { + if (!$this->workbookXMLRelsAsXMLElement) { + $this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::WORKBOOK_XML_RELS_FILE_PATH, + self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS + ); + } + + return $this->workbookXMLRelsAsXMLElement; + } + + /** + * Returns a representation of the workbook.xml file, ready to be parsed. + * The returned value is cached. + * + * @return \SimpleXMLElement XML element representating the workbook.xml.rels file + */ + protected function getWorkbookXMLAsXMLElement() + { + if (!$this->workbookXMLAsXMLElement) { + $this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::WORKBOOK_XML_FILE_PATH, + self::MAIN_NAMESPACE_FOR_WORKBOOK_XML + ); + } + + return $this->workbookXMLAsXMLElement; + } + + /** + * Loads the contents of the given file in an XML parser and register the given XPath namespace. + * + * @param string $xmlFilePath The path of the XML file inside the XLSX file + * @param string $mainNamespace The main XPath namespace to register + * @return \SimpleXMLElement The XML element representing the file + */ + protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace) + { + $xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath); + + $xmlElement = new \SimpleXMLElement($xmlContents); + $xmlElement->registerXPathNamespace('ns', $mainNamespace); + + return $xmlElement; + } + /** * Returns whether another worksheet exists after the current worksheet. * The order is determined by the order of appearance in the [Content_Types].xml file. diff --git a/src/Spout/Reader/Internal/XLSX/Worksheet.php b/src/Spout/Reader/Internal/XLSX/Worksheet.php index 552c53e..4fd6ca2 100644 --- a/src/Spout/Reader/Internal/XLSX/Worksheet.php +++ b/src/Spout/Reader/Internal/XLSX/Worksheet.php @@ -10,18 +10,23 @@ namespace Box\Spout\Reader\Internal\XLSX; */ class Worksheet { + /** @var \Box\Spout\Reader\Sheet The "external" sheet */ + protected $externalSheet; + /** @var int Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based) */ protected $worksheetNumber; /** @var string Path of the XML file containing the worksheet data */ protected $dataXmlFilePath; - /** + /**\ + * @param \Box\Spout\Reader\Sheet $externalSheet The associated "external" sheet * @param int $worksheetNumber Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based) * @param string $dataXmlFilePath Path of the XML file containing the worksheet data */ - public function __construct($worksheetNumber, $dataXmlFilePath) + public function __construct($externalSheet, $worksheetNumber, $dataXmlFilePath) { + $this->externalSheet = $externalSheet; $this->worksheetNumber = $worksheetNumber; $this->dataXmlFilePath = $dataXmlFilePath; } @@ -34,6 +39,14 @@ class Worksheet return ltrim($this->dataXmlFilePath, '/'); } + /** + * @return \Box\Spout\Reader\Sheet The "external" sheet + */ + public function getExternalSheet() + { + return $this->externalSheet; + } + /** * @return int */ diff --git a/src/Spout/Reader/Sheet.php b/src/Spout/Reader/Sheet.php new file mode 100644 index 0000000..5b6b0ab --- /dev/null +++ b/src/Spout/Reader/Sheet.php @@ -0,0 +1,57 @@ +id = $sheetId; + $this->number = $sheetNumber; + $this->name = $sheetName; + } + + /** + * @return int ID of the sheet + */ + public function getId() + { + return $this->id; + } + + /** + * @return int Number of the sheet, based on order of creation (zero-based) + */ + public function getNumber() + { + return $this->number; + } + + /** + * @return string Name of the sheet + */ + public function getName() + { + return $this->name; + } +} diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php index db46707..2fa85f0 100644 --- a/src/Spout/Reader/XLSX.php +++ b/src/Spout/Reader/XLSX.php @@ -76,7 +76,7 @@ class XLSX extends AbstractReader $this->extractSharedStrings($filePath); // Fetch all available worksheets - $this->worksheetHelper = new WorksheetHelper($filePath); + $this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper); $this->worksheets = $this->worksheetHelper->getWorksheets($filePath); if (count($this->worksheets) === 0) { @@ -119,29 +119,31 @@ class XLSX extends AbstractReader * Moves the pointer to the current worksheet. * Moving to another worksheet will stop the reading in the current worksheet. * - * @return void + * @return \Box\Spout\Reader\Sheet The next sheet * @throws Exception\ReaderNotOpenedException If the stream was not opened first * @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read */ public function nextSheet() { - if ($this->hasNextSheet()) { - if ($this->currentWorksheet === null) { - $nextWorksheet = $this->worksheets[0]; - } else { - $currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber(); - $nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1]; - } - - $this->initXmlReaderForWorksheetData($nextWorksheet); - $this->currentWorksheet = $nextWorksheet; - - // make sure that we are ready to read more rows - $this->hasReachedEndOfFile = false; - $this->emptyRowDataBuffer(); - } else { + if (!$this->hasNextSheet()) { throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.'); } + + if ($this->currentWorksheet === null) { + $nextWorksheet = $this->worksheets[0]; + } else { + $currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber(); + $nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1]; + } + + $this->initXmlReaderForWorksheetData($nextWorksheet); + $this->currentWorksheet = $nextWorksheet; + + // make sure that we are ready to read more rows + $this->hasReachedEndOfFile = false; + $this->emptyRowDataBuffer(); + + return $this->currentWorksheet->getExternalSheet(); } /** diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php index 23d26bc..3a1a34f 100644 --- a/tests/Spout/Reader/XLSXTest.php +++ b/tests/Spout/Reader/XLSXTest.php @@ -200,6 +200,32 @@ class XLSXTest extends \PHPUnit_Framework_TestCase $this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.'); } + /** + * @return void + */ + public function testNextSheetShouldReturnCorrectSheetInfos() + { + $resourcePath = $this->getResourcePath('two_sheets_with_custom_names.xlsx'); + $reader = ReaderFactory::create(Type::XLSX); + $reader->open($resourcePath); + + /** @var \Box\Spout\Reader\Sheet[] $sheets */ + $sheets = []; + while ($reader->hasNextSheet()) { + $sheets[] = $reader->nextSheet(); + } + + $reader->close(); + + $this->assertEquals('CustomName1', $sheets[0]->getName()); + $this->assertEquals(0, $sheets[0]->getNumber()); + $this->assertEquals(1, $sheets[0]->getId()); + + $this->assertEquals('CustomName2', $sheets[1]->getName()); + $this->assertEquals(1, $sheets[1]->getNumber()); + $this->assertEquals(2, $sheets[1]->getId()); + } + /** * @param string $fileName * @return array All the read rows the given file diff --git a/tests/resources/xlsx/two_sheets_with_custom_names.xlsx b/tests/resources/xlsx/two_sheets_with_custom_names.xlsx new file mode 100644 index 0000000..722e212 Binary files /dev/null and b/tests/resources/xlsx/two_sheets_with_custom_names.xlsx differ