Merge pull request #165 from box/support_xlsx_sheets_random_order

Support XLSX that are defined in random order
This commit is contained in:
Adrien Loison 2016-01-08 08:50:49 -08:00
commit c48c07db99
6 changed files with 52 additions and 50 deletions

View File

@ -19,7 +19,7 @@ class Sheet implements SheetInterface
/** @var int ID of the sheet */
protected $id;
/** @var int Index of the sheet, based on order of creation (zero-based) */
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index;
/** @var string Name of the sheet */
@ -27,7 +27,7 @@ class Sheet implements SheetInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($xmlReader, $sheetIndex, $sheetName)
@ -48,7 +48,7 @@ class Sheet implements SheetInterface
/**
* @api
* @return int Index of the sheet, based on order of creation (zero-based)
* @return int Index of the sheet, based on order in the workbook (zero-based)
*/
public function getIndex()
{

View File

@ -13,9 +13,6 @@ use Box\Spout\Reader\XLSX\Sheet;
*/
class SheetHelper
{
/** Extension for XML files */
const XML_EXTENSION = '.xml';
/** Paths of XML files relative to the XLSX file root */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
@ -79,9 +76,15 @@ class SheetHelper
$sheetNode = $sheetNodes[$i];
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i);
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath);
}
// make sure the sheets are sorted by index
// (as the sheets are not necessarily in this order in the XML file)
usort($sheets, function ($sheet1, $sheet2) {
return ($sheet1->getIndex() - $sheet2->getIndex());
});
return $sheets;
}
@ -91,60 +94,37 @@ class SheetHelper
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
* The entry contains the ID and name of the sheet.
*
* If this piece of data can't be found by parsing the different XML files, the ID will default
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
*/
protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased)
protected function getSheetFromXML($sheetDataXMLFilePath)
{
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
/*
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
*/
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
// find the node associated to the given file path
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
$relationshipNode = $relationshipNodes[0];
if (count($relationshipNodes) === 1) {
$relationshipNode = $relationshipNodes[0];
$sheetId = $relationshipNode->getAttribute('Id');
$relationshipSheetId = $relationshipNode->getAttribute('Id');
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $relationshipSheetId . '"]');
$sheetNode = $sheetNodes[0];
if (count($sheetNodes) === 1) {
$sheetNode = $sheetNodes[0];
$escapedSheetName = $sheetNode->getAttribute('name');
$escapedSheetName = $sheetNode->getAttribute('name');
$sheetIdOneBased = $sheetNode->getAttribute('sheetId');
$sheetIndexZeroBased = $sheetIdOneBased - 1;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$sheetName = $escaper->unescape($escapedSheetName);
}
}
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$sheetName = $escaper->unescape($escapedSheetName);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
}
/**
* Returns the default name of the sheet whose data is located
* at the given path.
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file
* @return string The default sheet name
*/
protected function getDefaultSheetName($sheetDataXMLFilePath)
{
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
}
/**
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
* The returned value is cached.

View File

@ -15,7 +15,7 @@ class Sheet implements SheetInterface
/** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */
protected $rowIterator;
/** @var int Index of the sheet, based on order of creation (zero-based) */
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index;
/** @var string Name of the sheet */
@ -25,7 +25,7 @@ class Sheet implements SheetInterface
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
@ -46,7 +46,7 @@ class Sheet implements SheetInterface
/**
* @api
* @return int Index of the sheet, based on order of creation (zero-based)
* @return int Index of the sheet, based on order in the workbook (zero-based)
*/
public function getIndex()
{

View File

@ -24,7 +24,7 @@ class Sheet
/** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */
protected static $SHEETS_NAME_USED = [];
/** @var int Index of the sheet, based on order of creation (zero-based) */
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index;
/** @var string Name of the sheet */
@ -34,7 +34,7 @@ class Sheet
protected $stringHelper;
/**
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
*/
public function __construct($sheetIndex)
{
@ -45,7 +45,7 @@ class Sheet
/**
* @api
* @return int Index of the sheet, based on order of creation (zero-based)
* @return int Index of the sheet, based on order in the workbook (zero-based)
*/
public function getIndex()
{

View File

@ -73,6 +73,28 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
}
}
/**
* @return void
*/
public function testReadShouldSupportSheetsDefinitionInRandomOrder()
{
$allRows = $this->getAllRowsForFile('two_sheets_with_sheets_definition_in_reverse_order.xlsx');
$expectedRows = [
['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'],
['s1 - A2', 's1 - B2', 's1 - C2', 's1 - D2', 's1 - E2'],
['s1 - A3', 's1 - B3', 's1 - C3', 's1 - D3', 's1 - E3'],
['s1 - A4', 's1 - B4', 's1 - C4', 's1 - D4', 's1 - E4'],
['s1 - A5', 's1 - B5', 's1 - C5', 's1 - D5', 's1 - E5'],
['s2 - A1', 's2 - B1', 's2 - C1', 's2 - D1', 's2 - E1'],
['s2 - A2', 's2 - B2', 's2 - C2', 's2 - D2', 's2 - E2'],
['s2 - A3', 's2 - B3', 's2 - C3', 's2 - D3', 's2 - E3'],
['s2 - A4', 's2 - B4', 's2 - C4', 's2 - D4', 's2 - E4'],
['s2 - A5', 's2 - B5', 's2 - C5', 's2 - D5', 's2 - E5'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/