Merge pull request #29 from box/expose_sheet_xlsx_reader

Expose a Sheet object on Reader::XLSX::nextSheet()
This commit is contained in:
Adrien Loison 2015-04-29 00:32:12 -07:00
commit 71d9c18a81
7 changed files with 279 additions and 33 deletions

View File

@ -119,7 +119,7 @@ class GlobalFunctionsHelper
* Wrapper around global function file_exists()
* @see file_exists()
*
* @param string $filename
* @param string $fileName
* @return bool
*/
public function file_exists($fileName)
@ -127,11 +127,23 @@ class GlobalFunctionsHelper
return file_exists($fileName);
}
/**
* Wrapper around global function file_get_contents()
* @see file_get_contents()
*
* @param string $filePath
* @return bool
*/
public function file_get_contents($filePath)
{
return file_get_contents($filePath);
}
/**
* Wrapper around global function is_readable()
* @see is_readable()
*
* @param string $filename
* @param string $fileName
* @return bool
*/
public function is_readable($fileName)
@ -144,11 +156,12 @@ class GlobalFunctionsHelper
* @see basename()
*
* @param string $path
* @param string|void $suffix
* @return string
*/
public function basename($path)
public function basename($path, $suffix = null)
{
return basename($path);
return basename($path, $suffix);
}
/**

View File

@ -3,6 +3,7 @@
namespace Box\Spout\Reader\Helper\XLSX;
use Box\Spout\Reader\Internal\XLSX\Worksheet;
use Box\Spout\Reader\Sheet;
/**
* Class WorksheetHelper
@ -12,11 +13,18 @@ use Box\Spout\Reader\Internal\XLSX\Worksheet;
*/
class WorksheetHelper
{
/** Path of Content_Types XML file inside the XLSX file */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
/** Extension for XML files */
const XML_EXTENSION = '.xml';
/** Main namespace for the [Content_Types].xml file */
/** Paths of XML files relative to the XLSX file root */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
/** Namespaces for the XML files */
const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/** Value of the Override attribute used in [Content_Types].xml to define worksheets */
const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml';
@ -24,12 +32,23 @@ class WorksheetHelper
/** @var string Path of the XLSX file being read */
protected $filePath;
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */
protected $workbookXMLRelsAsXMLElement;
/** @var \SimpleXMLElement XML element representing the workbook.xml file */
protected $workbookXMLAsXMLElement;
/**
* @param string $filePath Path of the XLSX file being read
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePath)
public function __construct($filePath, $globalFunctionsHelper)
{
$this->filePath = $filePath;
$this->globalFunctionsHelper = $globalFunctionsHelper;
}
/**
@ -42,23 +61,139 @@ class WorksheetHelper
{
$worksheets = [];
$xmlContents = file_get_contents('zip://' . $this->filePath . '#' . self::CONTENT_TYPES_XML_FILE_PATH);
$contentTypes = new \SimpleXMLElement($xmlContents);
$contentTypes->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML);
$contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::CONTENT_TYPES_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML
);
// find all nodes defining a worksheet
$sheetNodes = $contentTypes->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
$sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
for ($i = 0; $i < count($sheetNodes); $i++) {
$sheetNode = $sheetNodes[$i];
$sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName;
$worksheets[] = new Worksheet($i, $sheetDataXMLFilePath);
$sheet = $this->getSheet($sheetDataXMLFilePath, $i);
$worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath);
}
return $worksheets;
}
/**
* Returns an instance of a sheet, given the path of its data XML file.
* We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet.
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
* The entry contains the ID and name of the sheet.
*
* If this piece of data can't be found by parsing the different XML files, the ID will default
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param int $sheetNumberZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
* @return \Box\Spout\Reader\Sheet Sheet instance
*/
protected function getSheet($sheetDataXMLFilePath, $sheetNumberZeroBased)
{
$sheetId = $sheetNumberZeroBased + 1;
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
/*
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
*/
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
// find the node associated to the given file path
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
if (count($relationshipNodes) === 1) {
$relationshipNode = $relationshipNodes[0];
$sheetId = (string) $relationshipNode->attributes()->Id;
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
if (count($sheetNodes) === 1) {
$sheetNode = $sheetNodes[0];
$sheetId = (int) $sheetNode->attributes()->sheetId;
$escapedSheetName = (string) $sheetNode->attributes()->name;
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$sheetName = $escaper->unescape($escapedSheetName);
}
}
return new Sheet($sheetId, $sheetNumberZeroBased, $sheetName);
}
/**
* Returns the default name of the sheet whose data is located
* at the given path.
*
* @param $sheetDataXMLFilePath
* @return string The default sheet name
*/
protected function getDefaultSheetName($sheetDataXMLFilePath)
{
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
}
/**
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
* The returned value is cached.
*
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
*/
protected function getWorkbookXMLRelsAsXMLElement()
{
if (!$this->workbookXMLRelsAsXMLElement) {
$this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::WORKBOOK_XML_RELS_FILE_PATH,
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS
);
}
return $this->workbookXMLRelsAsXMLElement;
}
/**
* Returns a representation of the workbook.xml file, ready to be parsed.
* The returned value is cached.
*
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
*/
protected function getWorkbookXMLAsXMLElement()
{
if (!$this->workbookXMLAsXMLElement) {
$this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::WORKBOOK_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML
);
}
return $this->workbookXMLAsXMLElement;
}
/**
* Loads the contents of the given file in an XML parser and register the given XPath namespace.
*
* @param string $xmlFilePath The path of the XML file inside the XLSX file
* @param string $mainNamespace The main XPath namespace to register
* @return \SimpleXMLElement The XML element representing the file
*/
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
{
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
$xmlElement = new \SimpleXMLElement($xmlContents);
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
return $xmlElement;
}
/**
* Returns whether another worksheet exists after the current worksheet.
* The order is determined by the order of appearance in the [Content_Types].xml file.

View File

@ -10,18 +10,23 @@ namespace Box\Spout\Reader\Internal\XLSX;
*/
class Worksheet
{
/** @var \Box\Spout\Reader\Sheet The "external" sheet */
protected $externalSheet;
/** @var int Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based) */
protected $worksheetNumber;
/** @var string Path of the XML file containing the worksheet data */
protected $dataXmlFilePath;
/**
/**\
* @param \Box\Spout\Reader\Sheet $externalSheet The associated "external" sheet
* @param int $worksheetNumber Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based)
* @param string $dataXmlFilePath Path of the XML file containing the worksheet data
*/
public function __construct($worksheetNumber, $dataXmlFilePath)
public function __construct($externalSheet, $worksheetNumber, $dataXmlFilePath)
{
$this->externalSheet = $externalSheet;
$this->worksheetNumber = $worksheetNumber;
$this->dataXmlFilePath = $dataXmlFilePath;
}
@ -34,6 +39,14 @@ class Worksheet
return ltrim($this->dataXmlFilePath, '/');
}
/**
* @return \Box\Spout\Reader\Sheet The "external" sheet
*/
public function getExternalSheet()
{
return $this->externalSheet;
}
/**
* @return int
*/

View File

@ -0,0 +1,57 @@
<?php
namespace Box\Spout\Reader;
/**
* Class Sheet
* Represents a worksheet within a XLSX file
*
* @package Box\Spout\Reader
*/
class Sheet
{
/** @var int ID of the sheet */
protected $id;
/** @var int Number of the sheet, based on order of creation (zero-based) */
protected $number;
/** @var string Name of the sheet */
protected $name;
/**
* @param int $sheetId ID of the sheet
* @param int $sheetNumber Number of the sheet, based on order of creation (zero-based)
* @param string $sheetName Name of the sheet
*/
function __construct($sheetId, $sheetNumber, $sheetName)
{
$this->id = $sheetId;
$this->number = $sheetNumber;
$this->name = $sheetName;
}
/**
* @return int ID of the sheet
*/
public function getId()
{
return $this->id;
}
/**
* @return int Number of the sheet, based on order of creation (zero-based)
*/
public function getNumber()
{
return $this->number;
}
/**
* @return string Name of the sheet
*/
public function getName()
{
return $this->name;
}
}

View File

@ -76,7 +76,7 @@ class XLSX extends AbstractReader
$this->extractSharedStrings($filePath);
// Fetch all available worksheets
$this->worksheetHelper = new WorksheetHelper($filePath);
$this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper);
$this->worksheets = $this->worksheetHelper->getWorksheets($filePath);
if (count($this->worksheets) === 0) {
@ -119,29 +119,31 @@ class XLSX extends AbstractReader
* Moves the pointer to the current worksheet.
* Moving to another worksheet will stop the reading in the current worksheet.
*
* @return void
* @return \Box\Spout\Reader\Sheet The next sheet
* @throws Exception\ReaderNotOpenedException If the stream was not opened first
* @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read
*/
public function nextSheet()
{
if ($this->hasNextSheet()) {
if ($this->currentWorksheet === null) {
$nextWorksheet = $this->worksheets[0];
} else {
$currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber();
$nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1];
}
$this->initXmlReaderForWorksheetData($nextWorksheet);
$this->currentWorksheet = $nextWorksheet;
// make sure that we are ready to read more rows
$this->hasReachedEndOfFile = false;
$this->emptyRowDataBuffer();
} else {
if (!$this->hasNextSheet()) {
throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.');
}
if ($this->currentWorksheet === null) {
$nextWorksheet = $this->worksheets[0];
} else {
$currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber();
$nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1];
}
$this->initXmlReaderForWorksheetData($nextWorksheet);
$this->currentWorksheet = $nextWorksheet;
// make sure that we are ready to read more rows
$this->hasReachedEndOfFile = false;
$this->emptyRowDataBuffer();
return $this->currentWorksheet->getExternalSheet();
}
/**

View File

@ -200,6 +200,32 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
$this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.');
}
/**
* @return void
*/
public function testNextSheetShouldReturnCorrectSheetInfos()
{
$resourcePath = $this->getResourcePath('two_sheets_with_custom_names.xlsx');
$reader = ReaderFactory::create(Type::XLSX);
$reader->open($resourcePath);
/** @var \Box\Spout\Reader\Sheet[] $sheets */
$sheets = [];
while ($reader->hasNextSheet()) {
$sheets[] = $reader->nextSheet();
}
$reader->close();
$this->assertEquals('CustomName1', $sheets[0]->getName());
$this->assertEquals(0, $sheets[0]->getNumber());
$this->assertEquals(1, $sheets[0]->getId());
$this->assertEquals('CustomName2', $sheets[1]->getName());
$this->assertEquals(1, $sheets[1]->getNumber());
$this->assertEquals(2, $sheets[1]->getId());
}
/**
* @param string $fileName
* @return array All the read rows the given file

Binary file not shown.