Merge pull request #165 from box/support_xlsx_sheets_random_order

Support XLSX that are defined in random order
This commit is contained in:
Adrien Loison 2016-01-08 08:50:49 -08:00
commit c48c07db99
6 changed files with 52 additions and 50 deletions

View File

@ -19,7 +19,7 @@ class Sheet implements SheetInterface
/** @var int ID of the sheet */ /** @var int ID of the sheet */
protected $id; protected $id;
/** @var int Index of the sheet, based on order of creation (zero-based) */ /** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index; protected $index;
/** @var string Name of the sheet */ /** @var string Name of the sheet */
@ -27,7 +27,7 @@ class Sheet implements SheetInterface
/** /**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet * @param string $sheetName Name of the sheet
*/ */
public function __construct($xmlReader, $sheetIndex, $sheetName) public function __construct($xmlReader, $sheetIndex, $sheetName)
@ -48,7 +48,7 @@ class Sheet implements SheetInterface
/** /**
* @api * @api
* @return int Index of the sheet, based on order of creation (zero-based) * @return int Index of the sheet, based on order in the workbook (zero-based)
*/ */
public function getIndex() public function getIndex()
{ {

View File

@ -13,9 +13,6 @@ use Box\Spout\Reader\XLSX\Sheet;
*/ */
class SheetHelper class SheetHelper
{ {
/** Extension for XML files */
const XML_EXTENSION = '.xml';
/** Paths of XML files relative to the XLSX file root */ /** Paths of XML files relative to the XLSX file root */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml'; const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels'; const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
@ -79,9 +76,15 @@ class SheetHelper
$sheetNode = $sheetNodes[$i]; $sheetNode = $sheetNodes[$i];
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName'); $sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i); $sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath);
} }
// make sure the sheets are sorted by index
// (as the sheets are not necessarily in this order in the XML file)
usort($sheets, function ($sheet1, $sheet2) {
return ($sheet1->getIndex() - $sheet2->getIndex());
});
return $sheets; return $sheets;
} }
@ -91,60 +94,37 @@ class SheetHelper
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
* The entry contains the ID and name of the sheet. * The entry contains the ID and name of the sheet.
* *
* If this piece of data can't be found by parsing the different XML files, the ID will default
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
*/ */
protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased) protected function getSheetFromXML($sheetDataXMLFilePath)
{ {
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
/*
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
*/
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
// find the node associated to the given file path // find the node associated to the given file path
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
$relationshipNode = $relationshipNodes[0];
if (count($relationshipNodes) === 1) { $relationshipSheetId = $relationshipNode->getAttribute('Id');
$relationshipNode = $relationshipNodes[0];
$sheetId = $relationshipNode->getAttribute('Id');
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); $workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]'); $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $relationshipSheetId . '"]');
$sheetNode = $sheetNodes[0];
if (count($sheetNodes) === 1) { $escapedSheetName = $sheetNode->getAttribute('name');
$sheetNode = $sheetNodes[0]; $sheetIdOneBased = $sheetNode->getAttribute('sheetId');
$escapedSheetName = $sheetNode->getAttribute('name'); $sheetIndexZeroBased = $sheetIdOneBased - 1;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX(); $escaper = new \Box\Spout\Common\Escaper\XLSX();
$sheetName = $escaper->unescape($escapedSheetName); $sheetName = $escaper->unescape($escapedSheetName);
}
}
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName); return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
} }
/**
* Returns the default name of the sheet whose data is located
* at the given path.
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file
* @return string The default sheet name
*/
protected function getDefaultSheetName($sheetDataXMLFilePath)
{
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
}
/** /**
* Returns a representation of the workbook.xml.rels file, ready to be parsed. * Returns a representation of the workbook.xml.rels file, ready to be parsed.
* The returned value is cached. * The returned value is cached.

View File

@ -15,7 +15,7 @@ class Sheet implements SheetInterface
/** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */ /** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */
protected $rowIterator; protected $rowIterator;
/** @var int Index of the sheet, based on order of creation (zero-based) */ /** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index; protected $index;
/** @var string Name of the sheet */ /** @var string Name of the sheet */
@ -25,7 +25,7 @@ class Sheet implements SheetInterface
* @param string $filePath Path of the XLSX file being read * @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings * @param Helper\SharedStringsHelper Helper to work with shared strings
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet * @param string $sheetName Name of the sheet
*/ */
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName) public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
@ -46,7 +46,7 @@ class Sheet implements SheetInterface
/** /**
* @api * @api
* @return int Index of the sheet, based on order of creation (zero-based) * @return int Index of the sheet, based on order in the workbook (zero-based)
*/ */
public function getIndex() public function getIndex()
{ {

View File

@ -24,7 +24,7 @@ class Sheet
/** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */ /** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */
protected static $SHEETS_NAME_USED = []; protected static $SHEETS_NAME_USED = [];
/** @var int Index of the sheet, based on order of creation (zero-based) */ /** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index; protected $index;
/** @var string Name of the sheet */ /** @var string Name of the sheet */
@ -34,7 +34,7 @@ class Sheet
protected $stringHelper; protected $stringHelper;
/** /**
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
*/ */
public function __construct($sheetIndex) public function __construct($sheetIndex)
{ {
@ -45,7 +45,7 @@ class Sheet
/** /**
* @api * @api
* @return int Index of the sheet, based on order of creation (zero-based) * @return int Index of the sheet, based on order in the workbook (zero-based)
*/ */
public function getIndex() public function getIndex()
{ {

View File

@ -73,6 +73,28 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
} }
} }
/**
* @return void
*/
public function testReadShouldSupportSheetsDefinitionInRandomOrder()
{
$allRows = $this->getAllRowsForFile('two_sheets_with_sheets_definition_in_reverse_order.xlsx');
$expectedRows = [
['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'],
['s1 - A2', 's1 - B2', 's1 - C2', 's1 - D2', 's1 - E2'],
['s1 - A3', 's1 - B3', 's1 - C3', 's1 - D3', 's1 - E3'],
['s1 - A4', 's1 - B4', 's1 - C4', 's1 - D4', 's1 - E4'],
['s1 - A5', 's1 - B5', 's1 - C5', 's1 - D5', 's1 - E5'],
['s2 - A1', 's2 - B1', 's2 - C1', 's2 - D1', 's2 - E1'],
['s2 - A2', 's2 - B2', 's2 - C2', 's2 - D2', 's2 - E2'],
['s2 - A3', 's2 - B3', 's2 - C3', 's2 - D3', 's2 - E3'],
['s2 - A4', 's2 - B4', 's2 - C4', 's2 - D4', 's2 - E4'],
['s2 - A5', 's2 - B5', 's2 - C5', 's2 - D5', 's2 - E5'],
];
$this->assertEquals($expectedRows, $allRows);
}
/** /**
* @return void * @return void
*/ */