Adding open_file_in_zip() helper function to XMLReader (#238)

This commit is contained in:
Adrien Loison 2016-05-29 23:22:57 -07:00
parent 03866a6604
commit 251c0bebc1
8 changed files with 64 additions and 111 deletions

View File

@ -15,6 +15,8 @@ use Box\Spout\Reader\Wrapper\XMLReader;
*/ */
class SheetIterator implements IteratorInterface class SheetIterator implements IteratorInterface
{ {
const CONTENT_XML_FILE_PATH = 'content.xml';
/** Definition of XML nodes name and attribute used to parse sheet data */ /** Definition of XML nodes name and attribute used to parse sheet data */
const XML_NODE_TABLE = 'table:table'; const XML_NODE_TABLE = 'table:table';
const XML_ATTRIBUTE_TABLE_NAME = 'table:name'; const XML_ATTRIBUTE_TABLE_NAME = 'table:name';
@ -63,8 +65,8 @@ class SheetIterator implements IteratorInterface
{ {
$this->xmlReader->close(); $this->xmlReader->close();
$contentXmlFilePath = $this->filePath . '#content.xml'; if ($this->xmlReader->openFileInZip($this->filePath, self::CONTENT_XML_FILE_PATH) === false) {
if ($this->xmlReader->open('zip://' . $contentXmlFilePath) === false) { $contentXmlFilePath = $this->filePath . '#' . self::CONTENT_XML_FILE_PATH;
throw new IOException("Could not open \"{$contentXmlFilePath}\"."); throw new IOException("Could not open \"{$contentXmlFilePath}\".");
} }

View File

@ -14,66 +14,44 @@ class XMLReader extends \XMLReader
{ {
use XMLInternalErrorsHelper; use XMLInternalErrorsHelper;
const ZIP_WRAPPER = 'zip://';
/** /**
* Set the URI containing the XML to parse * Opens the XML Reader to read a file located inside a ZIP file.
* @see \XMLReader::open
* *
* @param string $URI URI pointing to the document * @param string $zipFilePath Path to the ZIP file
* @param string|null|void $encoding The document encoding * @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
* @param int $options A bitmask of the LIBXML_* constants
* @return bool TRUE on success or FALSE on failure * @return bool TRUE on success or FALSE on failure
*/ */
public function open($URI, $encoding = null, $options = 0) public function openFileInZip($zipFilePath, $fileInsideZipPath)
{ {
$wasOpenSuccessful = false; $wasOpenSuccessful = false;
$realPathURI = $this->convertURIToUseRealPath($URI); $realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath);
// HHVM does not check if file exists within zip file // HHVM does not check if file exists within zip file
// @link https://github.com/facebook/hhvm/issues/5779 // @link https://github.com/facebook/hhvm/issues/5779
if ($this->isRunningHHVM() && $this->isZipStream($realPathURI)) { if ($this->isRunningHHVM()) {
if ($this->fileExistsWithinZip($realPathURI)) { if ($this->fileExistsWithinZip($realPathURI)) {
$wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET); $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
} }
} else { } else {
$wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET); $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
} }
return $wasOpenSuccessful; return $wasOpenSuccessful;
} }
/** /**
* Updates the given URI to use a real path. * Returns the real path for the given path components.
* This is to avoid issues on some Windows setup. * This is useful to avoid issues on some Windows setup.
* *
* @param string $URI URI * @param string $zipFilePath Path to the ZIP file
* @return string The URI using a real path * @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
* @return string The real path URI
*/ */
protected function convertURIToUseRealPath($URI) public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath)
{ {
$realPathURI = $URI; return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath);
if ($this->isZipStream($URI)) {
if (preg_match('/zip:\/\/(.*)#(.*)/', $URI, $matches)) {
$documentPath = $matches[1];
$documentInsideZipPath = $matches[2];
$realPathURI = 'zip://' . realpath($documentPath) . '#' . $documentInsideZipPath;
}
} else {
$realPathURI = realpath($URI);
}
return $realPathURI;
}
/**
* Returns whether the given URI is a zip stream.
*
* @param string $URI URI pointing to a document
* @return bool TRUE if URI is a zip stream, FALSE otherwise
*/
protected function isZipStream($URI)
{
return (strpos($URI, 'zip://') === 0);
} }
/** /**

View File

@ -55,7 +55,7 @@ class SheetHelper
$sheetIndex = 0; $sheetIndex = 0;
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $this->filePath . '#' . self::WORKBOOK_XML_FILE_PATH)) { if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('sheet')) { if ($xmlReader->isPositionedOnStartingNode('sheet')) {
$sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex); $sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex);
@ -105,7 +105,7 @@ class SheetHelper
// find the file path of the sheet, by looking at the "workbook.xml.res" file // find the file path of the sheet, by looking at the "workbook.xml.res" file
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $this->filePath . '#' . self::WORKBOOK_XML_RELS_FILE_PATH)) { if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('Relationship')) { if ($xmlReader->isPositionedOnStartingNode('Relationship')) {
$relationshipSheetId = $xmlReader->getAttribute('Id'); $relationshipSheetId = $xmlReader->getAttribute('Id');

View File

@ -76,10 +76,9 @@ class StyleHelper
$this->customNumberFormats = []; $this->customNumberFormats = [];
$this->stylesAttributes = []; $this->stylesAttributes = [];
$stylesXmlFilePath = $this->filePath .'#' . self::STYLES_XML_FILE_PATH;
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $stylesXmlFilePath)) { if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) {
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
$numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml()); $numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml());

View File

@ -20,12 +20,11 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
public function testOpenShouldFailIfFileInsideZipDoesNotExist() public function testOpenShouldFailIfFileInsideZipDoesNotExist()
{ {
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx'); $resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
$nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml';
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
// using "@" to prevent errors/warning to be displayed // using "@" to prevent errors/warning to be displayed
$wasOpenSuccessful = @$xmlReader->open($nonExistingXMLFilePath); $wasOpenSuccessful = @$xmlReader->openFileInZip($resourcePath, 'path/to/fake/file.xml');
$this->assertTrue($wasOpenSuccessful === false); $this->assertTrue($wasOpenSuccessful === false);
} }
@ -72,10 +71,9 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
public function testReadShouldThrowExceptionOnError() public function testReadShouldThrowExceptionOnError()
{ {
$resourcePath = $this->getResourcePath('one_sheet_with_invalid_xml_characters.xlsx'); $resourcePath = $this->getResourcePath('one_sheet_with_invalid_xml_characters.xlsx');
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/worksheets/sheet1.xml';
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
if ($xmlReader->open($sheetDataXMLFilePath) === false) { if ($xmlReader->openFileInZip($resourcePath, 'xl/worksheets/sheet1.xml') === false) {
$this->fail(); $this->fail();
} }
@ -95,43 +93,13 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
// The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains // The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains
// a doctype element that causes read errors // a doctype element that causes read errors
$resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx'); $resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx');
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/sharedStrings.xml';
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
if ($xmlReader->open($sheetDataXMLFilePath) !== false) { if ($xmlReader->openFileInZip($resourcePath, 'xl/sharedStrings.xml') !== false) {
@$xmlReader->next('sst'); @$xmlReader->next('sst');
} }
} }
/**
* @return array
*/
public function dataProviderForTestIsZipStream()
{
return [
['/absolute/path/to/file.xlsx', false],
['relative/path/to/file.xlsx', false],
['php://temp', false],
['zip:///absolute/path/to/file.xlsx', true],
['zip://relative/path/to/file.xlsx', true],
];
}
/**
* @dataProvider dataProviderForTestIsZipStream
*
* @param string $URI
* @param bool $expectedResult
* @return void
*/
public function testIsZipStream($URI, $expectedResult)
{
$xmlReader = new XMLReader();
$isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'isZipStream', $URI);
$this->assertEquals($expectedResult, $isZipStream);
}
/** /**
* @return array * @return array
*/ */
@ -167,34 +135,34 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
/** /**
* @return array * @return array
*/ */
public function dataProviderForTestConvertURIToUseRealPath() public function dataProviderForTestGetRealPathURIForFileInZip()
{ {
$tempFolder = realpath(sys_get_temp_dir()); $tempFolder = realpath(sys_get_temp_dir());
$expectedRealPathURI = 'zip://' . $tempFolder . '/test.xlsx#test.xml';
return [ return [
['/../../../' . $tempFolder . '/test.xlsx', $tempFolder . '/test.xlsx'], [$tempFolder, "$tempFolder/test.xlsx", 'test.xml', $expectedRealPathURI],
[$tempFolder . '/test.xlsx', $tempFolder . '/test.xlsx'], [$tempFolder, "/../../../$tempFolder/test.xlsx", 'test.xml', $expectedRealPathURI],
['zip://' . $tempFolder . '/test.xlsx#test.xml', 'zip://' . $tempFolder . '/test.xlsx#test.xml'],
['zip:///../../../' . $tempFolder . '/test.xlsx#test.xml', 'zip://' . $tempFolder . '/test.xlsx#test.xml'],
]; ];
} }
/** /**
* @dataProvider dataProviderForTestConvertURIToUseRealPath * @dataProvider dataProviderForTestGetRealPathURIForFileInZip
* *
* @param string $URI * @param string $tempFolder
* @param string $expectedConvertedURI * @param string $zipFilePath
* @param string $fileInsideZipPath
* @param string $expectedRealPathURI
* @return void * @return void
*/ */
public function testConvertURIToUseRealPath($URI, $expectedConvertedURI) public function testGetRealPathURIForFileInZip($tempFolder, $zipFilePath, $fileInsideZipPath, $expectedRealPathURI)
{ {
$tempFolder = sys_get_temp_dir();
touch($tempFolder . '/test.xlsx'); touch($tempFolder . '/test.xlsx');
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
$convertedURI = \ReflectionHelper::callMethodOnObject($xmlReader, 'convertURIToUseRealPath', $URI); $realPathURI = \ReflectionHelper::callMethodOnObject($xmlReader, 'getRealPathURIForFileInZip', $zipFilePath, $fileInsideZipPath);
$this->assertEquals($expectedConvertedURI, $convertedURI); $this->assertEquals($expectedRealPathURI, $realPathURI);
unlink($tempFolder . '/test.xlsx'); unlink($tempFolder . '/test.xlsx');
} }
@ -230,5 +198,7 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
$xmlReader->read(); $xmlReader->read();
$this->assertFalse($xmlReader->isPositionedOnStartingNode('test')); $this->assertFalse($xmlReader->isPositionedOnStartingNode('test'));
$this->assertTrue($xmlReader->isPositionedOnEndingNode('test')); $this->assertTrue($xmlReader->isPositionedOnEndingNode('test'));
$xmlReader->close();
} }
} }

View File

@ -538,10 +538,9 @@ class WriterTest extends \PHPUnit_Framework_TestCase
private function moveReaderToCorrectTableNode($fileName, $sheetIndex) private function moveReaderToCorrectTableNode($fileName, $sheetIndex)
{ {
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToSheetFile = $resourcePath . '#content.xml';
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToSheetFile); $xmlReader->openFileInZip($resourcePath, 'content.xml');
$xmlReader->readUntilNodeFound('table:table'); $xmlReader->readUntilNodeFound('table:table');
for ($i = 1; $i < $sheetIndex; $i++) { for ($i = 1; $i < $sheetIndex; $i++) {

View File

@ -296,17 +296,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
$cellElements = []; $cellElements = [];
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#content.xml';
$xmlReader = new \XMLReader(); $xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->openFileInZip($resourcePath, 'content.xml');
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'table:table-cell' && $xmlReader->getAttribute('office:value-type') !== null) { if ($xmlReader->isPositionedOnStartingNode('table:table-cell') && $xmlReader->getAttribute('office:value-type') !== null) {
$cellElements[] = $xmlReader->expand(); $cellElements[] = $xmlReader->expand();
} }
} }
$xmlReader->close();
return $cellElements; return $cellElements;
} }
@ -319,17 +320,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
$cellStyleElements = []; $cellStyleElements = [];
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#content.xml';
$xmlReader = new \XMLReader(); $xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->openFileInZip($resourcePath, 'content.xml');
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'style:style' && $xmlReader->getAttribute('style:family') === 'table-cell') { if ($xmlReader->isPositionedOnStartingNode('style:style') && $xmlReader->getAttribute('style:family') === 'table-cell') {
$cellStyleElements[] = $xmlReader->expand(); $cellStyleElements[] = $xmlReader->expand();
} }
} }
$xmlReader->close();
return $cellStyleElements; return $cellStyleElements;
} }
@ -341,10 +343,9 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
private function getXmlSectionFromStylesXmlFile($fileName, $section) private function getXmlSectionFromStylesXmlFile($fileName, $section)
{ {
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#styles.xml';
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->openFileInZip($resourcePath, 'styles.xml');
$xmlReader->readUntilNodeFound($section); $xmlReader->readUntilNodeFound($section);
return $xmlReader->expand(); return $xmlReader->expand();

View File

@ -293,13 +293,16 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
private function getXmlSectionFromStylesXmlFile($fileName, $section) private function getXmlSectionFromStylesXmlFile($fileName, $section)
{ {
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#xl/styles.xml';
$xmlReader = new XMLReader(); $xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->openFileInZip($resourcePath, 'xl/styles.xml');
$xmlReader->readUntilNodeFound($section); $xmlReader->readUntilNodeFound($section);
return $xmlReader->expand(); $xmlSection = $xmlReader->expand();
$xmlReader->close();
return $xmlSection;
} }
/** /**
@ -311,17 +314,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
$cellElements = []; $cellElements = [];
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#xl/worksheets/sheet1.xml';
$xmlReader = new \XMLReader(); $xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->openFileInZip($resourcePath, 'xl/worksheets/sheet1.xml');
while ($xmlReader->read()) { while ($xmlReader->read()) {
if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'c') { if ($xmlReader->isPositionedOnStartingNode('c')) {
$cellElements[] = $xmlReader->expand(); $cellElements[] = $xmlReader->expand();
} }
} }
$xmlReader->close();
return $cellElements; return $cellElements;
} }