Expose a Sheet object on Reader::XLSX::nextSheet()

Added Sheet class for the XLSX reader that exposes basic sheet info, such as name or ID.
When retrieving the sheet data XML, added extra XML parsing to retrieve sheet data.
Added test
This commit is contained in:
Adrien Loison 2015-04-29 00:16:03 -07:00
parent e58284d27b
commit e9ec4e745c
7 changed files with 279 additions and 33 deletions

View File

@ -119,7 +119,7 @@ class GlobalFunctionsHelper
* Wrapper around global function file_exists()
* @see file_exists()
*
* @param string $filename
* @param string $fileName
* @return bool
*/
public function file_exists($fileName)
@ -127,11 +127,23 @@ class GlobalFunctionsHelper
return file_exists($fileName);
}
/**
* Wrapper around global function file_get_contents()
* @see file_get_contents()
*
* @param string $filePath
* @return bool
*/
public function file_get_contents($filePath)
{
return file_get_contents($filePath);
}
/**
* Wrapper around global function is_readable()
* @see is_readable()
*
* @param string $filename
* @param string $fileName
* @return bool
*/
public function is_readable($fileName)
@ -144,11 +156,12 @@ class GlobalFunctionsHelper
* @see basename()
*
* @param string $path
* @param string|void $suffix
* @return string
*/
public function basename($path)
public function basename($path, $suffix = null)
{
return basename($path);
return basename($path, $suffix);
}
/**

View File

@ -3,6 +3,7 @@
namespace Box\Spout\Reader\Helper\XLSX;
use Box\Spout\Reader\Internal\XLSX\Worksheet;
use Box\Spout\Reader\Sheet;
/**
* Class WorksheetHelper
@ -12,11 +13,18 @@ use Box\Spout\Reader\Internal\XLSX\Worksheet;
*/
class WorksheetHelper
{
/** Path of Content_Types XML file inside the XLSX file */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
/** Extension for XML files */
const XML_EXTENSION = '.xml';
/** Main namespace for the [Content_Types].xml file */
/** Paths of XML files relative to the XLSX file root */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
/** Namespaces for the XML files */
const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/** Value of the Override attribute used in [Content_Types].xml to define worksheets */
const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml';
@ -24,12 +32,23 @@ class WorksheetHelper
/** @var string Path of the XLSX file being read */
protected $filePath;
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */
protected $workbookXMLRelsAsXMLElement;
/** @var \SimpleXMLElement XML element representing the workbook.xml file */
protected $workbookXMLAsXMLElement;
/**
* @param string $filePath Path of the XLSX file being read
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePath)
public function __construct($filePath, $globalFunctionsHelper)
{
$this->filePath = $filePath;
$this->globalFunctionsHelper = $globalFunctionsHelper;
}
/**
@ -42,23 +61,139 @@ class WorksheetHelper
{
$worksheets = [];
$xmlContents = file_get_contents('zip://' . $this->filePath . '#' . self::CONTENT_TYPES_XML_FILE_PATH);
$contentTypes = new \SimpleXMLElement($xmlContents);
$contentTypes->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML);
$contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::CONTENT_TYPES_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML
);
// find all nodes defining a worksheet
$sheetNodes = $contentTypes->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
$sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
for ($i = 0; $i < count($sheetNodes); $i++) {
$sheetNode = $sheetNodes[$i];
$sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName;
$worksheets[] = new Worksheet($i, $sheetDataXMLFilePath);
$sheet = $this->getSheet($sheetDataXMLFilePath, $i);
$worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath);
}
return $worksheets;
}
/**
* Returns an instance of a sheet, given the path of its data XML file.
* We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet.
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
* The entry contains the ID and name of the sheet.
*
* If this piece of data can't be found by parsing the different XML files, the ID will default
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param int $sheetNumberZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
* @return \Box\Spout\Reader\Sheet Sheet instance
*/
protected function getSheet($sheetDataXMLFilePath, $sheetNumberZeroBased)
{
$sheetId = $sheetNumberZeroBased + 1;
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
/*
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
*/
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
// find the node associated to the given file path
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
if (count($relationshipNodes) === 1) {
$relationshipNode = $relationshipNodes[0];
$sheetId = (string) $relationshipNode->attributes()->Id;
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
if (count($sheetNodes) === 1) {
$sheetNode = $sheetNodes[0];
$sheetId = (int) $sheetNode->attributes()->sheetId;
$escapedSheetName = (string) $sheetNode->attributes()->name;
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$sheetName = $escaper->unescape($escapedSheetName);
}
}
return new Sheet($sheetId, $sheetNumberZeroBased, $sheetName);
}
/**
* Returns the default name of the sheet whose data is located
* at the given path.
*
* @param $sheetDataXMLFilePath
* @return string The default sheet name
*/
protected function getDefaultSheetName($sheetDataXMLFilePath)
{
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
}
/**
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
* The returned value is cached.
*
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
*/
protected function getWorkbookXMLRelsAsXMLElement()
{
if (!$this->workbookXMLRelsAsXMLElement) {
$this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::WORKBOOK_XML_RELS_FILE_PATH,
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS
);
}
return $this->workbookXMLRelsAsXMLElement;
}
/**
* Returns a representation of the workbook.xml file, ready to be parsed.
* The returned value is cached.
*
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
*/
protected function getWorkbookXMLAsXMLElement()
{
if (!$this->workbookXMLAsXMLElement) {
$this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::WORKBOOK_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML
);
}
return $this->workbookXMLAsXMLElement;
}
/**
* Loads the contents of the given file in an XML parser and register the given XPath namespace.
*
* @param string $xmlFilePath The path of the XML file inside the XLSX file
* @param string $mainNamespace The main XPath namespace to register
* @return \SimpleXMLElement The XML element representing the file
*/
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
{
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
$xmlElement = new \SimpleXMLElement($xmlContents);
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
return $xmlElement;
}
/**
* Returns whether another worksheet exists after the current worksheet.
* The order is determined by the order of appearance in the [Content_Types].xml file.

View File

@ -10,18 +10,23 @@ namespace Box\Spout\Reader\Internal\XLSX;
*/
class Worksheet
{
/** @var \Box\Spout\Reader\Sheet The "external" sheet */
protected $externalSheet;
/** @var int Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based) */
protected $worksheetNumber;
/** @var string Path of the XML file containing the worksheet data */
protected $dataXmlFilePath;
/**
/**\
* @param \Box\Spout\Reader\Sheet $externalSheet The associated "external" sheet
* @param int $worksheetNumber Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based)
* @param string $dataXmlFilePath Path of the XML file containing the worksheet data
*/
public function __construct($worksheetNumber, $dataXmlFilePath)
public function __construct($externalSheet, $worksheetNumber, $dataXmlFilePath)
{
$this->externalSheet = $externalSheet;
$this->worksheetNumber = $worksheetNumber;
$this->dataXmlFilePath = $dataXmlFilePath;
}
@ -34,6 +39,14 @@ class Worksheet
return ltrim($this->dataXmlFilePath, '/');
}
/**
* @return \Box\Spout\Reader\Sheet The "external" sheet
*/
public function getExternalSheet()
{
return $this->externalSheet;
}
/**
* @return int
*/

View File

@ -0,0 +1,57 @@
<?php
namespace Box\Spout\Reader;
/**
* Class Sheet
* Represents a worksheet within a XLSX file
*
* @package Box\Spout\Reader
*/
class Sheet
{
/** @var int ID of the sheet */
protected $id;
/** @var int Number of the sheet, based on order of creation (zero-based) */
protected $number;
/** @var string Name of the sheet */
protected $name;
/**
* @param int $sheetId ID of the sheet
* @param int $sheetNumber Number of the sheet, based on order of creation (zero-based)
* @param string $sheetName Name of the sheet
*/
function __construct($sheetId, $sheetNumber, $sheetName)
{
$this->id = $sheetId;
$this->number = $sheetNumber;
$this->name = $sheetName;
}
/**
* @return int ID of the sheet
*/
public function getId()
{
return $this->id;
}
/**
* @return int Number of the sheet, based on order of creation (zero-based)
*/
public function getNumber()
{
return $this->number;
}
/**
* @return string Name of the sheet
*/
public function getName()
{
return $this->name;
}
}

View File

@ -76,7 +76,7 @@ class XLSX extends AbstractReader
$this->extractSharedStrings($filePath);
// Fetch all available worksheets
$this->worksheetHelper = new WorksheetHelper($filePath);
$this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper);
$this->worksheets = $this->worksheetHelper->getWorksheets($filePath);
if (count($this->worksheets) === 0) {
@ -119,29 +119,31 @@ class XLSX extends AbstractReader
* Moves the pointer to the current worksheet.
* Moving to another worksheet will stop the reading in the current worksheet.
*
* @return void
* @return \Box\Spout\Reader\Sheet The next sheet
* @throws Exception\ReaderNotOpenedException If the stream was not opened first
* @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read
*/
public function nextSheet()
{
if ($this->hasNextSheet()) {
if ($this->currentWorksheet === null) {
$nextWorksheet = $this->worksheets[0];
} else {
$currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber();
$nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1];
}
$this->initXmlReaderForWorksheetData($nextWorksheet);
$this->currentWorksheet = $nextWorksheet;
// make sure that we are ready to read more rows
$this->hasReachedEndOfFile = false;
$this->emptyRowDataBuffer();
} else {
if (!$this->hasNextSheet()) {
throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.');
}
if ($this->currentWorksheet === null) {
$nextWorksheet = $this->worksheets[0];
} else {
$currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber();
$nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1];
}
$this->initXmlReaderForWorksheetData($nextWorksheet);
$this->currentWorksheet = $nextWorksheet;
// make sure that we are ready to read more rows
$this->hasReachedEndOfFile = false;
$this->emptyRowDataBuffer();
return $this->currentWorksheet->getExternalSheet();
}
/**

View File

@ -200,6 +200,32 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
$this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.');
}
/**
* @return void
*/
public function testNextSheetShouldReturnCorrectSheetInfos()
{
$resourcePath = $this->getResourcePath('two_sheets_with_custom_names.xlsx');
$reader = ReaderFactory::create(Type::XLSX);
$reader->open($resourcePath);
/** @var \Box\Spout\Reader\Sheet[] $sheets */
$sheets = [];
while ($reader->hasNextSheet()) {
$sheets[] = $reader->nextSheet();
}
$reader->close();
$this->assertEquals('CustomName1', $sheets[0]->getName());
$this->assertEquals(0, $sheets[0]->getNumber());
$this->assertEquals(1, $sheets[0]->getId());
$this->assertEquals('CustomName2', $sheets[1]->getName());
$this->assertEquals(1, $sheets[1]->getNumber());
$this->assertEquals(2, $sheets[1]->getId());
}
/**
* @param string $fileName
* @return array All the read rows the given file

Binary file not shown.