Fetch XML file paths from Workbook Relationships

This commit is contained in:
Adrien Loison 2017-11-11 15:06:18 +01:00
parent 0c8a53c821
commit e2b519d6f9
9 changed files with 226 additions and 33 deletions

View File

@ -45,7 +45,10 @@ class XMLReader extends \XMLReader
*/
public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath)
{
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath);
// The file path should not start with a '/', otherwise it won't be found
$fileInsideZipPathWithoutLeadingSlash = ltrim($fileInsideZipPath, '/');
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPathWithoutLeadingSlash);
}
/**

View File

@ -6,6 +6,7 @@ use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
use Box\Spout\Reader\XLSX\Manager\SheetManager;
use Box\Spout\Reader\XLSX\Manager\StyleManager;
use Box\Spout\Reader\XLSX\Manager\WorkbookRelationshipsManager;
/**
* Class ManagerFactory
@ -19,6 +20,9 @@ class ManagerFactory
/** @var CachingStrategyFactory */
private $cachingStrategyFactory;
/** @var WorkbookRelationshipsManager */
private $cachedWorkbookRelationshipsManager;
/**
* @param HelperFactory $helperFactory Factory to create helpers
* @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies
@ -37,7 +41,30 @@ class ManagerFactory
*/
public function createSharedStringsManager($filePath, $tempFolder, $entityFactory)
{
return new SharedStringsManager($filePath, $tempFolder, $entityFactory, $this->helperFactory, $this->cachingStrategyFactory);
$workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory);
return new SharedStringsManager(
$filePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$this->helperFactory,
$this->cachingStrategyFactory
);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param EntityFactory $entityFactory Factory to create entities
* @return WorkbookRelationshipsManager
*/
private function createWorkbookRelationshipsManager($filePath, $entityFactory)
{
if (!isset($this->cachedWorkbookRelationshipsManager)) {
$this->cachedWorkbookRelationshipsManager = new WorkbookRelationshipsManager($filePath, $entityFactory);
}
return $this->cachedWorkbookRelationshipsManager;
}
/**
@ -61,6 +88,8 @@ class ManagerFactory
*/
public function createStyleManager($filePath, $entityFactory)
{
return new StyleManager($filePath, $entityFactory);
$workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory);
return new StyleManager($filePath, $workbookRelationshipsManager, $entityFactory);
}
}

View File

@ -9,6 +9,7 @@ use Box\Spout\Reader\XLSX\Creator\EntityFactory;
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface;
use Box\Spout\Writer\Common\Entity\Workbook;
/**
* Class SharedStringsManager
@ -16,9 +17,6 @@ use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface;
*/
class SharedStringsManager
{
/** Path of sharedStrings XML file inside the XLSX file */
const SHARED_STRINGS_XML_FILE_PATH = 'xl/sharedStrings.xml';
/** Main namespace for the sharedStrings.xml file */
const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
@ -40,6 +38,9 @@ class SharedStringsManager
/** @var string Temporary folder where the temporary files to store shared strings will be stored */
protected $tempFolder;
/** @var WorkbookRelationshipsManager Helps retrieving workbook relationships */
protected $workbookRelationshipsManager;
/** @var EntityFactory Factory to create entities */
protected $entityFactory;
@ -55,14 +56,22 @@ class SharedStringsManager
/**
* @param string $filePath Path of the XLSX file being read
* @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
* @param EntityFactory $entityFactory Factory to create entities
* @param HelperFactory $helperFactory Factory to create helpers
* @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies
*/
public function __construct($filePath, $tempFolder, $entityFactory, $helperFactory, $cachingStrategyFactory)
{
public function __construct(
$filePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$helperFactory,
$cachingStrategyFactory
) {
$this->filePath = $filePath;
$this->tempFolder = $tempFolder;
$this->workbookRelationshipsManager = $workbookRelationshipsManager;
$this->entityFactory = $entityFactory;
$this->helperFactory = $helperFactory;
$this->cachingStrategyFactory = $cachingStrategyFactory;
@ -75,15 +84,7 @@ class SharedStringsManager
*/
public function hasSharedStrings()
{
$hasSharedStrings = false;
$zip = $this->entityFactory->createZipArchive();
if ($zip->open($this->filePath) === true) {
$hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false);
$zip->close();
}
return $hasSharedStrings;
return $this->workbookRelationshipsManager->hasSharedStringsXMLFile();
}
/**
@ -96,16 +97,17 @@ class SharedStringsManager
* The XML file can be really big with sheets containing a lot of data. That is why
* we need to use a XML reader that provides streaming like the XMLReader library.
*
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
* @throws \Box\Spout\Common\Exception\IOException If shared strings XML file can't be read
* @return void
*/
public function extractSharedStrings()
{
$sharedStringsXMLFilePath = $this->workbookRelationshipsManager->getSharedStringsXMLFilePath();
$xmlReader = $this->entityFactory->createXMLReader();
$sharedStringIndex = 0;
if ($xmlReader->openFileInZip($this->filePath, self::SHARED_STRINGS_XML_FILE_PATH) === false) {
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
if ($xmlReader->openFileInZip($this->filePath, $sharedStringsXMLFilePath) === false) {
throw new IOException('Could not open "' . $sharedStringsXMLFilePath . '".');
}
try {

View File

@ -10,9 +10,6 @@ use Box\Spout\Reader\XLSX\Creator\EntityFactory;
*/
class StyleManager
{
/** Paths of XML files relative to the XLSX file root */
const STYLES_XML_FILE_PATH = 'xl/styles.xml';
/** Nodes used to find relevant information in the styles XML file */
const XML_NODE_NUM_FMTS = 'numFmts';
const XML_NODE_NUM_FMT = 'numFmt';
@ -51,6 +48,9 @@ class StyleManager
/** @var string Path of the XLSX file being read */
protected $filePath;
/** @var string Path of the styles XML file */
protected $stylesXMLFilePath;
/** @var EntityFactory Factory to create entities */
protected $entityFactory;
@ -68,13 +68,15 @@ class StyleManager
/**
* @param string $filePath Path of the XLSX file being read
* @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
* @param EntityFactory $entityFactory Factory to create entities
*/
public function __construct($filePath, $entityFactory)
public function __construct($filePath, $workbookRelationshipsManager, $entityFactory)
{
$this->filePath = $filePath;
$this->entityFactory = $entityFactory;
$this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
$this->stylesXMLFilePath = $workbookRelationshipsManager->getStylesXMLFilePath();
}
/**
@ -112,7 +114,7 @@ class StyleManager
$xmlReader = $this->entityFactory->createXMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) {
if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
$this->extractNumberFormats($xmlReader);

View File

@ -0,0 +1,136 @@
<?php
namespace Box\Spout\Reader\XLSX\Manager;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Creator\EntityFactory;
/**
* Class WorkbookRelationshipsManager
* This class manages the workbook relationships defined in the associated XML file
*/
class WorkbookRelationshipsManager
{
const BASE_PATH = 'xl/';
/** Path of workbook relationships XML file inside the XLSX file */
const WORKBOOK_RELS_XML_FILE_PATH = 'xl/_rels/workbook.xml.rels';
/** Relationships types */
const RELATIONSHIP_TYPE_SHARED_STRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
const RELATIONSHIP_TYPE_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles';
const RELATIONSHIP_TYPE_WORKSHEET = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
/** Nodes and attributes used to find relevant information in the workbook relationships XML file */
const XML_NODE_RELATIONSHIP = 'Relationship';
const XML_ATTRIBUTE_TYPE = 'Type';
const XML_ATTRIBUTE_TARGET = 'Target';
/** @var string Path of the XLSX file being read */
private $filePath;
/** @var EntityFactory Factory to create entities */
private $entityFactory;
/** @var array Cache of the already read workbook relationships: [TYPE] => [FILE_NAME] */
private $cachedWorkbookRelationships;
/**
* @param string $filePath Path of the XLSX file being read
* @param EntityFactory $entityFactory Factory to create entities
*/
public function __construct($filePath, $entityFactory)
{
$this->filePath = $filePath;
$this->entityFactory = $entityFactory;
}
/**
* @return string The path of the shared string XML file
*/
public function getSharedStringsXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$sharedStringsXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = (strpos($sharedStringsXMLFilePath, self::BASE_PATH) !== false);
if (!$doesContainBasePath) {
// make sure we return an absolute file path
$sharedStringsXMLFilePath = self::BASE_PATH . $sharedStringsXMLFilePath;
}
return $sharedStringsXMLFilePath;
}
/**
* @return bool Whether the XLSX file contains a shared string XML file
*/
public function hasSharedStringsXMLFile()
{
$workbookRelationships = $this->getWorkbookRelationships();
return isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS]);
}
/**
* @return string|null The path of the styles XML file
*/
public function getStylesXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$stylesXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = (strpos($stylesXMLFilePath, self::BASE_PATH) !== false);
if (!$doesContainBasePath) {
// make sure we return a full path
$stylesXMLFilePath = self::BASE_PATH . $stylesXMLFilePath;
}
return $stylesXMLFilePath;
}
/**
* Reads the workbook.xml.rels and extracts the filename associated to the different types.
* It caches the result so that the file is read only once.
*
* @throws \Box\Spout\Common\Exception\IOException If workbook.xml.rels can't be read
* @return array
*/
private function getWorkbookRelationships()
{
if (!isset($this->cachedWorkbookRelationships)) {
$xmlReader = $this->entityFactory->createXMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_RELS_XML_FILE_PATH) === false) {
throw new IOException('Could not open "' . self::WORKBOOK_RELS_XML_FILE_PATH . '".');
}
$this->cachedWorkbookRelationships = [];
while ($xmlReader->readUntilNodeFound(self::XML_NODE_RELATIONSHIP)) {
$this->processWorkbookRelationship($xmlReader);
}
}
return $this->cachedWorkbookRelationships;
}
/**
* Extracts and store the data of the current workbook relationship.
*
* @param XMLReader $xmlReader
* @return void
*/
private function processWorkbookRelationship($xmlReader)
{
$type = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TYPE);
$target = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
// @NOTE: if a type is defined more than once, we overwrite the previous value
// To be changed if we want to get the file paths of sheet XML files for instance.
$this->cachedWorkbookRelationships[$type] = $target;
}
}

View File

@ -51,8 +51,16 @@ class SharedStringsManagerTest extends \PHPUnit_Framework_TestCase
$helperFactory = new HelperFactory();
$managerFactory = new ManagerFactory($helperFactory, $cachingStrategyFactory);
$entityFactory = new EntityFactory($managerFactory, $helperFactory);
$workbookRelationshipsManager = new WorkbookRelationshipsManager($resourcePath, $entityFactory);
$this->sharedStringsManager = new SharedStringsManager($resourcePath, $tempFolder, $entityFactory, $helperFactory, $cachingStrategyFactory);
$this->sharedStringsManager = new SharedStringsManager(
$resourcePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$helperFactory,
$cachingStrategyFactory
);
return $this->sharedStringsManager;
}

View File

@ -3,9 +3,6 @@
namespace Box\Spout\Reader\XLSX\Manager;
use Box\Spout\Reader\XLSX\Creator\EntityFactory;
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
use Box\Spout\Reader\XLSX\Creator\ManagerFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
/**
* Class StyleManagerTest
@ -19,13 +16,12 @@ class StyleManagerTest extends \PHPUnit_Framework_TestCase
*/
private function getStyleManagerMock($styleAttributes = [], $customNumberFormats = [])
{
$helperFactory = new HelperFactory();
$managerFactory = new ManagerFactory($helperFactory, new CachingStrategyFactory());
$entityFactory = new EntityFactory($managerFactory, $helperFactory);
$entityFactory = $this->createMock(EntityFactory::class);
$workbookRelationshipsManager = $this->createMock(WorkbookRelationshipsManager::class);
/** @var StyleManager $styleManager */
$styleManager = $this->getMockBuilder('\Box\Spout\Reader\XLSX\Manager\StyleManager')
->setConstructorArgs(['/path/to/file.xlsx', $entityFactory])
->setConstructorArgs(['/path/to/file.xlsx', $workbookRelationshipsManager, $entityFactory])
->setMethods(['getCustomNumberFormats', 'getStylesAttributes'])
->getMock();

View File

@ -169,6 +169,23 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportFilesWithCapitalSharedStringsFileName()
{
$allRows = $this->getAllRowsForFile('one_sheet_with_capital_shared_strings_filename.xlsx');
$expectedRows = [
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
['s1--A2', 's1--B2', 's1--C2', 's1--D2', 's1--E2'],
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
['s1--A4', 's1--B4', 's1--C4', 's1--D4', 's1--E4'],
['s1--A5', 's1--B5', 's1--C5', 's1--D5', 's1--E5'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/