Merge pull request #29 from box/expose_sheet_xlsx_reader
Expose a Sheet object on Reader::XLSX::nextSheet()
This commit is contained in:
commit
71d9c18a81
@ -119,7 +119,7 @@ class GlobalFunctionsHelper
|
||||
* Wrapper around global function file_exists()
|
||||
* @see file_exists()
|
||||
*
|
||||
* @param string $filename
|
||||
* @param string $fileName
|
||||
* @return bool
|
||||
*/
|
||||
public function file_exists($fileName)
|
||||
@ -127,11 +127,23 @@ class GlobalFunctionsHelper
|
||||
return file_exists($fileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function file_get_contents()
|
||||
* @see file_get_contents()
|
||||
*
|
||||
* @param string $filePath
|
||||
* @return bool
|
||||
*/
|
||||
public function file_get_contents($filePath)
|
||||
{
|
||||
return file_get_contents($filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function is_readable()
|
||||
* @see is_readable()
|
||||
*
|
||||
* @param string $filename
|
||||
* @param string $fileName
|
||||
* @return bool
|
||||
*/
|
||||
public function is_readable($fileName)
|
||||
@ -144,11 +156,12 @@ class GlobalFunctionsHelper
|
||||
* @see basename()
|
||||
*
|
||||
* @param string $path
|
||||
* @param string|void $suffix
|
||||
* @return string
|
||||
*/
|
||||
public function basename($path)
|
||||
public function basename($path, $suffix = null)
|
||||
{
|
||||
return basename($path);
|
||||
return basename($path, $suffix);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3,6 +3,7 @@
|
||||
namespace Box\Spout\Reader\Helper\XLSX;
|
||||
|
||||
use Box\Spout\Reader\Internal\XLSX\Worksheet;
|
||||
use Box\Spout\Reader\Sheet;
|
||||
|
||||
/**
|
||||
* Class WorksheetHelper
|
||||
@ -12,11 +13,18 @@ use Box\Spout\Reader\Internal\XLSX\Worksheet;
|
||||
*/
|
||||
class WorksheetHelper
|
||||
{
|
||||
/** Path of Content_Types XML file inside the XLSX file */
|
||||
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
|
||||
/** Extension for XML files */
|
||||
const XML_EXTENSION = '.xml';
|
||||
|
||||
/** Main namespace for the [Content_Types].xml file */
|
||||
/** Paths of XML files relative to the XLSX file root */
|
||||
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
|
||||
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
|
||||
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
|
||||
|
||||
/** Namespaces for the XML files */
|
||||
const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types';
|
||||
const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships';
|
||||
const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
/** Value of the Override attribute used in [Content_Types].xml to define worksheets */
|
||||
const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml';
|
||||
@ -24,12 +32,23 @@ class WorksheetHelper
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */
|
||||
protected $workbookXMLRelsAsXMLElement;
|
||||
|
||||
/** @var \SimpleXMLElement XML element representing the workbook.xml file */
|
||||
protected $workbookXMLAsXMLElement;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePath)
|
||||
public function __construct($filePath, $globalFunctionsHelper)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -42,23 +61,139 @@ class WorksheetHelper
|
||||
{
|
||||
$worksheets = [];
|
||||
|
||||
$xmlContents = file_get_contents('zip://' . $this->filePath . '#' . self::CONTENT_TYPES_XML_FILE_PATH);
|
||||
|
||||
$contentTypes = new \SimpleXMLElement($xmlContents);
|
||||
$contentTypes->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML);
|
||||
$contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace(
|
||||
self::CONTENT_TYPES_XML_FILE_PATH,
|
||||
self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML
|
||||
);
|
||||
|
||||
// find all nodes defining a worksheet
|
||||
$sheetNodes = $contentTypes->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
|
||||
$sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
|
||||
|
||||
for ($i = 0; $i < count($sheetNodes); $i++) {
|
||||
$sheetNode = $sheetNodes[$i];
|
||||
$sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName;
|
||||
$worksheets[] = new Worksheet($i, $sheetDataXMLFilePath);
|
||||
|
||||
$sheet = $this->getSheet($sheetDataXMLFilePath, $i);
|
||||
$worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath);
|
||||
}
|
||||
|
||||
return $worksheets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of a sheet, given the path of its data XML file.
|
||||
* We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet.
|
||||
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
|
||||
* The entry contains the ID and name of the sheet.
|
||||
*
|
||||
* If this piece of data can't be found by parsing the different XML files, the ID will default
|
||||
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
|
||||
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
|
||||
*
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param int $sheetNumberZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
|
||||
* @return \Box\Spout\Reader\Sheet Sheet instance
|
||||
*/
|
||||
protected function getSheet($sheetDataXMLFilePath, $sheetNumberZeroBased)
|
||||
{
|
||||
$sheetId = $sheetNumberZeroBased + 1;
|
||||
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
|
||||
|
||||
/*
|
||||
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
|
||||
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
|
||||
*/
|
||||
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
|
||||
|
||||
// find the node associated to the given file path
|
||||
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
|
||||
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
|
||||
|
||||
if (count($relationshipNodes) === 1) {
|
||||
$relationshipNode = $relationshipNodes[0];
|
||||
$sheetId = (string) $relationshipNode->attributes()->Id;
|
||||
|
||||
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
||||
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
|
||||
|
||||
if (count($sheetNodes) === 1) {
|
||||
$sheetNode = $sheetNodes[0];
|
||||
$sheetId = (int) $sheetNode->attributes()->sheetId;
|
||||
$escapedSheetName = (string) $sheetNode->attributes()->name;
|
||||
|
||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
$sheetName = $escaper->unescape($escapedSheetName);
|
||||
}
|
||||
}
|
||||
|
||||
return new Sheet($sheetId, $sheetNumberZeroBased, $sheetName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default name of the sheet whose data is located
|
||||
* at the given path.
|
||||
*
|
||||
* @param $sheetDataXMLFilePath
|
||||
* @return string The default sheet name
|
||||
*/
|
||||
protected function getDefaultSheetName($sheetDataXMLFilePath)
|
||||
{
|
||||
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
||||
* The returned value is cached.
|
||||
*
|
||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
*/
|
||||
protected function getWorkbookXMLRelsAsXMLElement()
|
||||
{
|
||||
if (!$this->workbookXMLRelsAsXMLElement) {
|
||||
$this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace(
|
||||
self::WORKBOOK_XML_RELS_FILE_PATH,
|
||||
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS
|
||||
);
|
||||
}
|
||||
|
||||
return $this->workbookXMLRelsAsXMLElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a representation of the workbook.xml file, ready to be parsed.
|
||||
* The returned value is cached.
|
||||
*
|
||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
*/
|
||||
protected function getWorkbookXMLAsXMLElement()
|
||||
{
|
||||
if (!$this->workbookXMLAsXMLElement) {
|
||||
$this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace(
|
||||
self::WORKBOOK_XML_FILE_PATH,
|
||||
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML
|
||||
);
|
||||
}
|
||||
|
||||
return $this->workbookXMLAsXMLElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the contents of the given file in an XML parser and register the given XPath namespace.
|
||||
*
|
||||
* @param string $xmlFilePath The path of the XML file inside the XLSX file
|
||||
* @param string $mainNamespace The main XPath namespace to register
|
||||
* @return \SimpleXMLElement The XML element representing the file
|
||||
*/
|
||||
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
|
||||
{
|
||||
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
|
||||
|
||||
$xmlElement = new \SimpleXMLElement($xmlContents);
|
||||
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
|
||||
|
||||
return $xmlElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether another worksheet exists after the current worksheet.
|
||||
* The order is determined by the order of appearance in the [Content_Types].xml file.
|
||||
|
@ -10,18 +10,23 @@ namespace Box\Spout\Reader\Internal\XLSX;
|
||||
*/
|
||||
class Worksheet
|
||||
{
|
||||
/** @var \Box\Spout\Reader\Sheet The "external" sheet */
|
||||
protected $externalSheet;
|
||||
|
||||
/** @var int Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based) */
|
||||
protected $worksheetNumber;
|
||||
|
||||
/** @var string Path of the XML file containing the worksheet data */
|
||||
protected $dataXmlFilePath;
|
||||
|
||||
/**
|
||||
/**\
|
||||
* @param \Box\Spout\Reader\Sheet $externalSheet The associated "external" sheet
|
||||
* @param int $worksheetNumber Worksheet number, based on the order of appareance in [Content_Types].xml (zero-based)
|
||||
* @param string $dataXmlFilePath Path of the XML file containing the worksheet data
|
||||
*/
|
||||
public function __construct($worksheetNumber, $dataXmlFilePath)
|
||||
public function __construct($externalSheet, $worksheetNumber, $dataXmlFilePath)
|
||||
{
|
||||
$this->externalSheet = $externalSheet;
|
||||
$this->worksheetNumber = $worksheetNumber;
|
||||
$this->dataXmlFilePath = $dataXmlFilePath;
|
||||
}
|
||||
@ -34,6 +39,14 @@ class Worksheet
|
||||
return ltrim($this->dataXmlFilePath, '/');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return \Box\Spout\Reader\Sheet The "external" sheet
|
||||
*/
|
||||
public function getExternalSheet()
|
||||
{
|
||||
return $this->externalSheet;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
|
57
src/Spout/Reader/Sheet.php
Normal file
57
src/Spout/Reader/Sheet.php
Normal file
@ -0,0 +1,57 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
/**
|
||||
* Class Sheet
|
||||
* Represents a worksheet within a XLSX file
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class Sheet
|
||||
{
|
||||
/** @var int ID of the sheet */
|
||||
protected $id;
|
||||
|
||||
/** @var int Number of the sheet, based on order of creation (zero-based) */
|
||||
protected $number;
|
||||
|
||||
/** @var string Name of the sheet */
|
||||
protected $name;
|
||||
|
||||
/**
|
||||
* @param int $sheetId ID of the sheet
|
||||
* @param int $sheetNumber Number of the sheet, based on order of creation (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
*/
|
||||
function __construct($sheetId, $sheetNumber, $sheetName)
|
||||
{
|
||||
$this->id = $sheetId;
|
||||
$this->number = $sheetNumber;
|
||||
$this->name = $sheetName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int ID of the sheet
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int Number of the sheet, based on order of creation (zero-based)
|
||||
*/
|
||||
public function getNumber()
|
||||
{
|
||||
return $this->number;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string Name of the sheet
|
||||
*/
|
||||
public function getName()
|
||||
{
|
||||
return $this->name;
|
||||
}
|
||||
}
|
@ -76,7 +76,7 @@ class XLSX extends AbstractReader
|
||||
$this->extractSharedStrings($filePath);
|
||||
|
||||
// Fetch all available worksheets
|
||||
$this->worksheetHelper = new WorksheetHelper($filePath);
|
||||
$this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper);
|
||||
$this->worksheets = $this->worksheetHelper->getWorksheets($filePath);
|
||||
|
||||
if (count($this->worksheets) === 0) {
|
||||
@ -119,29 +119,31 @@ class XLSX extends AbstractReader
|
||||
* Moves the pointer to the current worksheet.
|
||||
* Moving to another worksheet will stop the reading in the current worksheet.
|
||||
*
|
||||
* @return void
|
||||
* @return \Box\Spout\Reader\Sheet The next sheet
|
||||
* @throws Exception\ReaderNotOpenedException If the stream was not opened first
|
||||
* @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read
|
||||
*/
|
||||
public function nextSheet()
|
||||
{
|
||||
if ($this->hasNextSheet()) {
|
||||
if ($this->currentWorksheet === null) {
|
||||
$nextWorksheet = $this->worksheets[0];
|
||||
} else {
|
||||
$currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber();
|
||||
$nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1];
|
||||
}
|
||||
|
||||
$this->initXmlReaderForWorksheetData($nextWorksheet);
|
||||
$this->currentWorksheet = $nextWorksheet;
|
||||
|
||||
// make sure that we are ready to read more rows
|
||||
$this->hasReachedEndOfFile = false;
|
||||
$this->emptyRowDataBuffer();
|
||||
} else {
|
||||
if (!$this->hasNextSheet()) {
|
||||
throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.');
|
||||
}
|
||||
|
||||
if ($this->currentWorksheet === null) {
|
||||
$nextWorksheet = $this->worksheets[0];
|
||||
} else {
|
||||
$currentWorksheetNumber = $this->currentWorksheet->getWorksheetNumber();
|
||||
$nextWorksheet = $this->worksheets[$currentWorksheetNumber + 1];
|
||||
}
|
||||
|
||||
$this->initXmlReaderForWorksheetData($nextWorksheet);
|
||||
$this->currentWorksheet = $nextWorksheet;
|
||||
|
||||
// make sure that we are ready to read more rows
|
||||
$this->hasReachedEndOfFile = false;
|
||||
$this->emptyRowDataBuffer();
|
||||
|
||||
return $this->currentWorksheet->getExternalSheet();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -200,6 +200,32 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testNextSheetShouldReturnCorrectSheetInfos()
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('two_sheets_with_custom_names.xlsx');
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
/** @var \Box\Spout\Reader\Sheet[] $sheets */
|
||||
$sheets = [];
|
||||
while ($reader->hasNextSheet()) {
|
||||
$sheets[] = $reader->nextSheet();
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
$this->assertEquals('CustomName1', $sheets[0]->getName());
|
||||
$this->assertEquals(0, $sheets[0]->getNumber());
|
||||
$this->assertEquals(1, $sheets[0]->getId());
|
||||
|
||||
$this->assertEquals('CustomName2', $sheets[1]->getName());
|
||||
$this->assertEquals(1, $sheets[1]->getNumber());
|
||||
$this->assertEquals(2, $sheets[1]->getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @return array All the read rows the given file
|
||||
|
BIN
tests/resources/xlsx/two_sheets_with_custom_names.xlsx
Normal file
BIN
tests/resources/xlsx/two_sheets_with_custom_names.xlsx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user