Merge pull request #165 from box/support_xlsx_sheets_random_order
Support XLSX that are defined in random order
This commit is contained in:
commit
c48c07db99
@ -19,7 +19,7 @@ class Sheet implements SheetInterface
|
|||||||
/** @var int ID of the sheet */
|
/** @var int ID of the sheet */
|
||||||
protected $id;
|
protected $id;
|
||||||
|
|
||||||
/** @var int Index of the sheet, based on order of creation (zero-based) */
|
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
|
||||||
protected $index;
|
protected $index;
|
||||||
|
|
||||||
/** @var string Name of the sheet */
|
/** @var string Name of the sheet */
|
||||||
@ -27,7 +27,7 @@ class Sheet implements SheetInterface
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
|
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
|
||||||
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
|
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||||
* @param string $sheetName Name of the sheet
|
* @param string $sheetName Name of the sheet
|
||||||
*/
|
*/
|
||||||
public function __construct($xmlReader, $sheetIndex, $sheetName)
|
public function __construct($xmlReader, $sheetIndex, $sheetName)
|
||||||
@ -48,7 +48,7 @@ class Sheet implements SheetInterface
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @api
|
* @api
|
||||||
* @return int Index of the sheet, based on order of creation (zero-based)
|
* @return int Index of the sheet, based on order in the workbook (zero-based)
|
||||||
*/
|
*/
|
||||||
public function getIndex()
|
public function getIndex()
|
||||||
{
|
{
|
||||||
|
@ -13,9 +13,6 @@ use Box\Spout\Reader\XLSX\Sheet;
|
|||||||
*/
|
*/
|
||||||
class SheetHelper
|
class SheetHelper
|
||||||
{
|
{
|
||||||
/** Extension for XML files */
|
|
||||||
const XML_EXTENSION = '.xml';
|
|
||||||
|
|
||||||
/** Paths of XML files relative to the XLSX file root */
|
/** Paths of XML files relative to the XLSX file root */
|
||||||
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
|
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
|
||||||
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
|
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
|
||||||
@ -79,9 +76,15 @@ class SheetHelper
|
|||||||
$sheetNode = $sheetNodes[$i];
|
$sheetNode = $sheetNodes[$i];
|
||||||
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
|
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
|
||||||
|
|
||||||
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i);
|
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// make sure the sheets are sorted by index
|
||||||
|
// (as the sheets are not necessarily in this order in the XML file)
|
||||||
|
usort($sheets, function ($sheet1, $sheet2) {
|
||||||
|
return ($sheet1->getIndex() - $sheet2->getIndex());
|
||||||
|
});
|
||||||
|
|
||||||
return $sheets;
|
return $sheets;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,60 +94,37 @@ class SheetHelper
|
|||||||
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
|
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
|
||||||
* The entry contains the ID and name of the sheet.
|
* The entry contains the ID and name of the sheet.
|
||||||
*
|
*
|
||||||
* If this piece of data can't be found by parsing the different XML files, the ID will default
|
|
||||||
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
|
|
||||||
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
|
|
||||||
*
|
|
||||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||||
* @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
|
|
||||||
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
|
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
|
||||||
*/
|
*/
|
||||||
protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased)
|
protected function getSheetFromXML($sheetDataXMLFilePath)
|
||||||
{
|
{
|
||||||
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
|
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
|
||||||
|
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
|
||||||
/*
|
|
||||||
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
|
|
||||||
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
|
|
||||||
*/
|
|
||||||
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
|
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
|
||||||
|
|
||||||
// find the node associated to the given file path
|
// find the node associated to the given file path
|
||||||
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
|
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
|
||||||
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
|
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
|
||||||
|
$relationshipNode = $relationshipNodes[0];
|
||||||
|
|
||||||
if (count($relationshipNodes) === 1) {
|
$relationshipSheetId = $relationshipNode->getAttribute('Id');
|
||||||
$relationshipNode = $relationshipNodes[0];
|
|
||||||
$sheetId = $relationshipNode->getAttribute('Id');
|
|
||||||
|
|
||||||
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
||||||
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
|
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $relationshipSheetId . '"]');
|
||||||
|
$sheetNode = $sheetNodes[0];
|
||||||
|
|
||||||
if (count($sheetNodes) === 1) {
|
$escapedSheetName = $sheetNode->getAttribute('name');
|
||||||
$sheetNode = $sheetNodes[0];
|
$sheetIdOneBased = $sheetNode->getAttribute('sheetId');
|
||||||
$escapedSheetName = $sheetNode->getAttribute('name');
|
$sheetIndexZeroBased = $sheetIdOneBased - 1;
|
||||||
|
|
||||||
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
|
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
|
||||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||||
$sheetName = $escaper->unescape($escapedSheetName);
|
$sheetName = $escaper->unescape($escapedSheetName);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
|
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the default name of the sheet whose data is located
|
|
||||||
* at the given path.
|
|
||||||
*
|
|
||||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file
|
|
||||||
* @return string The default sheet name
|
|
||||||
*/
|
|
||||||
protected function getDefaultSheetName($sheetDataXMLFilePath)
|
|
||||||
{
|
|
||||||
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
||||||
* The returned value is cached.
|
* The returned value is cached.
|
||||||
|
@ -15,7 +15,7 @@ class Sheet implements SheetInterface
|
|||||||
/** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */
|
/** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */
|
||||||
protected $rowIterator;
|
protected $rowIterator;
|
||||||
|
|
||||||
/** @var int Index of the sheet, based on order of creation (zero-based) */
|
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
|
||||||
protected $index;
|
protected $index;
|
||||||
|
|
||||||
/** @var string Name of the sheet */
|
/** @var string Name of the sheet */
|
||||||
@ -25,7 +25,7 @@ class Sheet implements SheetInterface
|
|||||||
* @param string $filePath Path of the XLSX file being read
|
* @param string $filePath Path of the XLSX file being read
|
||||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||||
* @param Helper\SharedStringsHelper Helper to work with shared strings
|
* @param Helper\SharedStringsHelper Helper to work with shared strings
|
||||||
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
|
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||||
* @param string $sheetName Name of the sheet
|
* @param string $sheetName Name of the sheet
|
||||||
*/
|
*/
|
||||||
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
|
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
|
||||||
@ -46,7 +46,7 @@ class Sheet implements SheetInterface
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @api
|
* @api
|
||||||
* @return int Index of the sheet, based on order of creation (zero-based)
|
* @return int Index of the sheet, based on order in the workbook (zero-based)
|
||||||
*/
|
*/
|
||||||
public function getIndex()
|
public function getIndex()
|
||||||
{
|
{
|
||||||
|
@ -24,7 +24,7 @@ class Sheet
|
|||||||
/** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */
|
/** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */
|
||||||
protected static $SHEETS_NAME_USED = [];
|
protected static $SHEETS_NAME_USED = [];
|
||||||
|
|
||||||
/** @var int Index of the sheet, based on order of creation (zero-based) */
|
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
|
||||||
protected $index;
|
protected $index;
|
||||||
|
|
||||||
/** @var string Name of the sheet */
|
/** @var string Name of the sheet */
|
||||||
@ -34,7 +34,7 @@ class Sheet
|
|||||||
protected $stringHelper;
|
protected $stringHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
|
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
|
||||||
*/
|
*/
|
||||||
public function __construct($sheetIndex)
|
public function __construct($sheetIndex)
|
||||||
{
|
{
|
||||||
@ -45,7 +45,7 @@ class Sheet
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @api
|
* @api
|
||||||
* @return int Index of the sheet, based on order of creation (zero-based)
|
* @return int Index of the sheet, based on order in the workbook (zero-based)
|
||||||
*/
|
*/
|
||||||
public function getIndex()
|
public function getIndex()
|
||||||
{
|
{
|
||||||
|
@ -73,6 +73,28 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testReadShouldSupportSheetsDefinitionInRandomOrder()
|
||||||
|
{
|
||||||
|
$allRows = $this->getAllRowsForFile('two_sheets_with_sheets_definition_in_reverse_order.xlsx');
|
||||||
|
|
||||||
|
$expectedRows = [
|
||||||
|
['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'],
|
||||||
|
['s1 - A2', 's1 - B2', 's1 - C2', 's1 - D2', 's1 - E2'],
|
||||||
|
['s1 - A3', 's1 - B3', 's1 - C3', 's1 - D3', 's1 - E3'],
|
||||||
|
['s1 - A4', 's1 - B4', 's1 - C4', 's1 - D4', 's1 - E4'],
|
||||||
|
['s1 - A5', 's1 - B5', 's1 - C5', 's1 - D5', 's1 - E5'],
|
||||||
|
['s2 - A1', 's2 - B1', 's2 - C1', 's2 - D1', 's2 - E1'],
|
||||||
|
['s2 - A2', 's2 - B2', 's2 - C2', 's2 - D2', 's2 - E2'],
|
||||||
|
['s2 - A3', 's2 - B3', 's2 - C3', 's2 - D3', 's2 - E3'],
|
||||||
|
['s2 - A4', 's2 - B4', 's2 - C4', 's2 - D4', 's2 - E4'],
|
||||||
|
['s2 - A5', 's2 - B5', 's2 - C5', 's2 - D5', 's2 - E5'],
|
||||||
|
];
|
||||||
|
$this->assertEquals($expectedRows, $allRows);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user