Fetch XML file paths from Workbook Relationships

This commit is contained in:
Adrien Loison 2017-11-11 15:06:18 +01:00
parent 0c8a53c821
commit e2b519d6f9
9 changed files with 226 additions and 33 deletions

View File

@ -45,7 +45,10 @@ class XMLReader extends \XMLReader
*/ */
public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath) public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath)
{ {
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath); // The file path should not start with a '/', otherwise it won't be found
$fileInsideZipPathWithoutLeadingSlash = ltrim($fileInsideZipPath, '/');
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPathWithoutLeadingSlash);
} }
/** /**

View File

@ -6,6 +6,7 @@ use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
use Box\Spout\Reader\XLSX\Manager\SheetManager; use Box\Spout\Reader\XLSX\Manager\SheetManager;
use Box\Spout\Reader\XLSX\Manager\StyleManager; use Box\Spout\Reader\XLSX\Manager\StyleManager;
use Box\Spout\Reader\XLSX\Manager\WorkbookRelationshipsManager;
/** /**
* Class ManagerFactory * Class ManagerFactory
@ -19,6 +20,9 @@ class ManagerFactory
/** @var CachingStrategyFactory */ /** @var CachingStrategyFactory */
private $cachingStrategyFactory; private $cachingStrategyFactory;
/** @var WorkbookRelationshipsManager */
private $cachedWorkbookRelationshipsManager;
/** /**
* @param HelperFactory $helperFactory Factory to create helpers * @param HelperFactory $helperFactory Factory to create helpers
* @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies * @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies
@ -37,7 +41,30 @@ class ManagerFactory
*/ */
public function createSharedStringsManager($filePath, $tempFolder, $entityFactory) public function createSharedStringsManager($filePath, $tempFolder, $entityFactory)
{ {
return new SharedStringsManager($filePath, $tempFolder, $entityFactory, $this->helperFactory, $this->cachingStrategyFactory); $workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory);
return new SharedStringsManager(
$filePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$this->helperFactory,
$this->cachingStrategyFactory
);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param EntityFactory $entityFactory Factory to create entities
* @return WorkbookRelationshipsManager
*/
private function createWorkbookRelationshipsManager($filePath, $entityFactory)
{
if (!isset($this->cachedWorkbookRelationshipsManager)) {
$this->cachedWorkbookRelationshipsManager = new WorkbookRelationshipsManager($filePath, $entityFactory);
}
return $this->cachedWorkbookRelationshipsManager;
} }
/** /**
@ -61,6 +88,8 @@ class ManagerFactory
*/ */
public function createStyleManager($filePath, $entityFactory) public function createStyleManager($filePath, $entityFactory)
{ {
return new StyleManager($filePath, $entityFactory); $workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory);
return new StyleManager($filePath, $workbookRelationshipsManager, $entityFactory);
} }
} }

View File

@ -9,6 +9,7 @@ use Box\Spout\Reader\XLSX\Creator\EntityFactory;
use Box\Spout\Reader\XLSX\Creator\HelperFactory; use Box\Spout\Reader\XLSX\Creator\HelperFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface;
use Box\Spout\Writer\Common\Entity\Workbook;
/** /**
* Class SharedStringsManager * Class SharedStringsManager
@ -16,9 +17,6 @@ use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface;
*/ */
class SharedStringsManager class SharedStringsManager
{ {
/** Path of sharedStrings XML file inside the XLSX file */
const SHARED_STRINGS_XML_FILE_PATH = 'xl/sharedStrings.xml';
/** Main namespace for the sharedStrings.xml file */ /** Main namespace for the sharedStrings.xml file */
const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
@ -40,6 +38,9 @@ class SharedStringsManager
/** @var string Temporary folder where the temporary files to store shared strings will be stored */ /** @var string Temporary folder where the temporary files to store shared strings will be stored */
protected $tempFolder; protected $tempFolder;
/** @var WorkbookRelationshipsManager Helps retrieving workbook relationships */
protected $workbookRelationshipsManager;
/** @var EntityFactory Factory to create entities */ /** @var EntityFactory Factory to create entities */
protected $entityFactory; protected $entityFactory;
@ -55,14 +56,22 @@ class SharedStringsManager
/** /**
* @param string $filePath Path of the XLSX file being read * @param string $filePath Path of the XLSX file being read
* @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
* @param EntityFactory $entityFactory Factory to create entities * @param EntityFactory $entityFactory Factory to create entities
* @param HelperFactory $helperFactory Factory to create helpers * @param HelperFactory $helperFactory Factory to create helpers
* @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies * @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies
*/ */
public function __construct($filePath, $tempFolder, $entityFactory, $helperFactory, $cachingStrategyFactory) public function __construct(
{ $filePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$helperFactory,
$cachingStrategyFactory
) {
$this->filePath = $filePath; $this->filePath = $filePath;
$this->tempFolder = $tempFolder; $this->tempFolder = $tempFolder;
$this->workbookRelationshipsManager = $workbookRelationshipsManager;
$this->entityFactory = $entityFactory; $this->entityFactory = $entityFactory;
$this->helperFactory = $helperFactory; $this->helperFactory = $helperFactory;
$this->cachingStrategyFactory = $cachingStrategyFactory; $this->cachingStrategyFactory = $cachingStrategyFactory;
@ -75,15 +84,7 @@ class SharedStringsManager
*/ */
public function hasSharedStrings() public function hasSharedStrings()
{ {
$hasSharedStrings = false; return $this->workbookRelationshipsManager->hasSharedStringsXMLFile();
$zip = $this->entityFactory->createZipArchive();
if ($zip->open($this->filePath) === true) {
$hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false);
$zip->close();
}
return $hasSharedStrings;
} }
/** /**
@ -96,16 +97,17 @@ class SharedStringsManager
* The XML file can be really big with sheets containing a lot of data. That is why * The XML file can be really big with sheets containing a lot of data. That is why
* we need to use a XML reader that provides streaming like the XMLReader library. * we need to use a XML reader that provides streaming like the XMLReader library.
* *
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read * @throws \Box\Spout\Common\Exception\IOException If shared strings XML file can't be read
* @return void * @return void
*/ */
public function extractSharedStrings() public function extractSharedStrings()
{ {
$sharedStringsXMLFilePath = $this->workbookRelationshipsManager->getSharedStringsXMLFilePath();
$xmlReader = $this->entityFactory->createXMLReader(); $xmlReader = $this->entityFactory->createXMLReader();
$sharedStringIndex = 0; $sharedStringIndex = 0;
if ($xmlReader->openFileInZip($this->filePath, self::SHARED_STRINGS_XML_FILE_PATH) === false) { if ($xmlReader->openFileInZip($this->filePath, $sharedStringsXMLFilePath) === false) {
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); throw new IOException('Could not open "' . $sharedStringsXMLFilePath . '".');
} }
try { try {

View File

@ -10,9 +10,6 @@ use Box\Spout\Reader\XLSX\Creator\EntityFactory;
*/ */
class StyleManager class StyleManager
{ {
/** Paths of XML files relative to the XLSX file root */
const STYLES_XML_FILE_PATH = 'xl/styles.xml';
/** Nodes used to find relevant information in the styles XML file */ /** Nodes used to find relevant information in the styles XML file */
const XML_NODE_NUM_FMTS = 'numFmts'; const XML_NODE_NUM_FMTS = 'numFmts';
const XML_NODE_NUM_FMT = 'numFmt'; const XML_NODE_NUM_FMT = 'numFmt';
@ -51,6 +48,9 @@ class StyleManager
/** @var string Path of the XLSX file being read */ /** @var string Path of the XLSX file being read */
protected $filePath; protected $filePath;
/** @var string Path of the styles XML file */
protected $stylesXMLFilePath;
/** @var EntityFactory Factory to create entities */ /** @var EntityFactory Factory to create entities */
protected $entityFactory; protected $entityFactory;
@ -68,13 +68,15 @@ class StyleManager
/** /**
* @param string $filePath Path of the XLSX file being read * @param string $filePath Path of the XLSX file being read
* @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
* @param EntityFactory $entityFactory Factory to create entities * @param EntityFactory $entityFactory Factory to create entities
*/ */
public function __construct($filePath, $entityFactory) public function __construct($filePath, $workbookRelationshipsManager, $entityFactory)
{ {
$this->filePath = $filePath; $this->filePath = $filePath;
$this->entityFactory = $entityFactory; $this->entityFactory = $entityFactory;
$this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping); $this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
$this->stylesXMLFilePath = $workbookRelationshipsManager->getStylesXMLFilePath();
} }
/** /**
@ -112,7 +114,7 @@ class StyleManager
$xmlReader = $this->entityFactory->createXMLReader(); $xmlReader = $this->entityFactory->createXMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) { if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) {
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
$this->extractNumberFormats($xmlReader); $this->extractNumberFormats($xmlReader);

View File

@ -0,0 +1,136 @@
<?php
namespace Box\Spout\Reader\XLSX\Manager;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Creator\EntityFactory;
/**
* Class WorkbookRelationshipsManager
* This class manages the workbook relationships defined in the associated XML file
*/
class WorkbookRelationshipsManager
{
const BASE_PATH = 'xl/';
/** Path of workbook relationships XML file inside the XLSX file */
const WORKBOOK_RELS_XML_FILE_PATH = 'xl/_rels/workbook.xml.rels';
/** Relationships types */
const RELATIONSHIP_TYPE_SHARED_STRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
const RELATIONSHIP_TYPE_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles';
const RELATIONSHIP_TYPE_WORKSHEET = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
/** Nodes and attributes used to find relevant information in the workbook relationships XML file */
const XML_NODE_RELATIONSHIP = 'Relationship';
const XML_ATTRIBUTE_TYPE = 'Type';
const XML_ATTRIBUTE_TARGET = 'Target';
/** @var string Path of the XLSX file being read */
private $filePath;
/** @var EntityFactory Factory to create entities */
private $entityFactory;
/** @var array Cache of the already read workbook relationships: [TYPE] => [FILE_NAME] */
private $cachedWorkbookRelationships;
/**
* @param string $filePath Path of the XLSX file being read
* @param EntityFactory $entityFactory Factory to create entities
*/
public function __construct($filePath, $entityFactory)
{
$this->filePath = $filePath;
$this->entityFactory = $entityFactory;
}
/**
* @return string The path of the shared string XML file
*/
public function getSharedStringsXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$sharedStringsXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = (strpos($sharedStringsXMLFilePath, self::BASE_PATH) !== false);
if (!$doesContainBasePath) {
// make sure we return an absolute file path
$sharedStringsXMLFilePath = self::BASE_PATH . $sharedStringsXMLFilePath;
}
return $sharedStringsXMLFilePath;
}
/**
* @return bool Whether the XLSX file contains a shared string XML file
*/
public function hasSharedStringsXMLFile()
{
$workbookRelationships = $this->getWorkbookRelationships();
return isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS]);
}
/**
* @return string|null The path of the styles XML file
*/
public function getStylesXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$stylesXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = (strpos($stylesXMLFilePath, self::BASE_PATH) !== false);
if (!$doesContainBasePath) {
// make sure we return a full path
$stylesXMLFilePath = self::BASE_PATH . $stylesXMLFilePath;
}
return $stylesXMLFilePath;
}
/**
* Reads the workbook.xml.rels and extracts the filename associated to the different types.
* It caches the result so that the file is read only once.
*
* @throws \Box\Spout\Common\Exception\IOException If workbook.xml.rels can't be read
* @return array
*/
private function getWorkbookRelationships()
{
if (!isset($this->cachedWorkbookRelationships)) {
$xmlReader = $this->entityFactory->createXMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_RELS_XML_FILE_PATH) === false) {
throw new IOException('Could not open "' . self::WORKBOOK_RELS_XML_FILE_PATH . '".');
}
$this->cachedWorkbookRelationships = [];
while ($xmlReader->readUntilNodeFound(self::XML_NODE_RELATIONSHIP)) {
$this->processWorkbookRelationship($xmlReader);
}
}
return $this->cachedWorkbookRelationships;
}
/**
* Extracts and store the data of the current workbook relationship.
*
* @param XMLReader $xmlReader
* @return void
*/
private function processWorkbookRelationship($xmlReader)
{
$type = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TYPE);
$target = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
// @NOTE: if a type is defined more than once, we overwrite the previous value
// To be changed if we want to get the file paths of sheet XML files for instance.
$this->cachedWorkbookRelationships[$type] = $target;
}
}

View File

@ -51,8 +51,16 @@ class SharedStringsManagerTest extends \PHPUnit_Framework_TestCase
$helperFactory = new HelperFactory(); $helperFactory = new HelperFactory();
$managerFactory = new ManagerFactory($helperFactory, $cachingStrategyFactory); $managerFactory = new ManagerFactory($helperFactory, $cachingStrategyFactory);
$entityFactory = new EntityFactory($managerFactory, $helperFactory); $entityFactory = new EntityFactory($managerFactory, $helperFactory);
$workbookRelationshipsManager = new WorkbookRelationshipsManager($resourcePath, $entityFactory);
$this->sharedStringsManager = new SharedStringsManager($resourcePath, $tempFolder, $entityFactory, $helperFactory, $cachingStrategyFactory); $this->sharedStringsManager = new SharedStringsManager(
$resourcePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$helperFactory,
$cachingStrategyFactory
);
return $this->sharedStringsManager; return $this->sharedStringsManager;
} }

View File

@ -3,9 +3,6 @@
namespace Box\Spout\Reader\XLSX\Manager; namespace Box\Spout\Reader\XLSX\Manager;
use Box\Spout\Reader\XLSX\Creator\EntityFactory; use Box\Spout\Reader\XLSX\Creator\EntityFactory;
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
use Box\Spout\Reader\XLSX\Creator\ManagerFactory;
use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
/** /**
* Class StyleManagerTest * Class StyleManagerTest
@ -19,13 +16,12 @@ class StyleManagerTest extends \PHPUnit_Framework_TestCase
*/ */
private function getStyleManagerMock($styleAttributes = [], $customNumberFormats = []) private function getStyleManagerMock($styleAttributes = [], $customNumberFormats = [])
{ {
$helperFactory = new HelperFactory(); $entityFactory = $this->createMock(EntityFactory::class);
$managerFactory = new ManagerFactory($helperFactory, new CachingStrategyFactory()); $workbookRelationshipsManager = $this->createMock(WorkbookRelationshipsManager::class);
$entityFactory = new EntityFactory($managerFactory, $helperFactory);
/** @var StyleManager $styleManager */ /** @var StyleManager $styleManager */
$styleManager = $this->getMockBuilder('\Box\Spout\Reader\XLSX\Manager\StyleManager') $styleManager = $this->getMockBuilder('\Box\Spout\Reader\XLSX\Manager\StyleManager')
->setConstructorArgs(['/path/to/file.xlsx', $entityFactory]) ->setConstructorArgs(['/path/to/file.xlsx', $workbookRelationshipsManager, $entityFactory])
->setMethods(['getCustomNumberFormats', 'getStylesAttributes']) ->setMethods(['getCustomNumberFormats', 'getStylesAttributes'])
->getMock(); ->getMock();

View File

@ -169,6 +169,23 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows); $this->assertEquals($expectedRows, $allRows);
} }
/**
* @return void
*/
public function testReadShouldSupportFilesWithCapitalSharedStringsFileName()
{
$allRows = $this->getAllRowsForFile('one_sheet_with_capital_shared_strings_filename.xlsx');
$expectedRows = [
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
['s1--A2', 's1--B2', 's1--C2', 's1--D2', 's1--E2'],
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
['s1--A4', 's1--B4', 's1--C4', 's1--D4', 's1--E4'],
['s1--A5', 's1--B5', 's1--C5', 's1--D5', 's1--E5'],
];
$this->assertEquals($expectedRows, $allRows);
}
/** /**
* @return void * @return void
*/ */