Adding open_file_in_zip() helper function to XMLReader (#238)

This commit is contained in:
Adrien Loison 2016-05-29 23:22:57 -07:00
parent 03866a6604
commit 251c0bebc1
8 changed files with 64 additions and 111 deletions

View File

@ -15,6 +15,8 @@ use Box\Spout\Reader\Wrapper\XMLReader;
*/
class SheetIterator implements IteratorInterface
{
const CONTENT_XML_FILE_PATH = 'content.xml';
/** Definition of XML nodes name and attribute used to parse sheet data */
const XML_NODE_TABLE = 'table:table';
const XML_ATTRIBUTE_TABLE_NAME = 'table:name';
@ -63,8 +65,8 @@ class SheetIterator implements IteratorInterface
{
$this->xmlReader->close();
$contentXmlFilePath = $this->filePath . '#content.xml';
if ($this->xmlReader->open('zip://' . $contentXmlFilePath) === false) {
if ($this->xmlReader->openFileInZip($this->filePath, self::CONTENT_XML_FILE_PATH) === false) {
$contentXmlFilePath = $this->filePath . '#' . self::CONTENT_XML_FILE_PATH;
throw new IOException("Could not open \"{$contentXmlFilePath}\".");
}

View File

@ -14,66 +14,44 @@ class XMLReader extends \XMLReader
{
use XMLInternalErrorsHelper;
const ZIP_WRAPPER = 'zip://';
/**
* Set the URI containing the XML to parse
* @see \XMLReader::open
* Opens the XML Reader to read a file located inside a ZIP file.
*
* @param string $URI URI pointing to the document
* @param string|null|void $encoding The document encoding
* @param int $options A bitmask of the LIBXML_* constants
* @param string $zipFilePath Path to the ZIP file
* @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
* @return bool TRUE on success or FALSE on failure
*/
public function open($URI, $encoding = null, $options = 0)
public function openFileInZip($zipFilePath, $fileInsideZipPath)
{
$wasOpenSuccessful = false;
$realPathURI = $this->convertURIToUseRealPath($URI);
$realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath);
// HHVM does not check if file exists within zip file
// @link https://github.com/facebook/hhvm/issues/5779
if ($this->isRunningHHVM() && $this->isZipStream($realPathURI)) {
if ($this->isRunningHHVM()) {
if ($this->fileExistsWithinZip($realPathURI)) {
$wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET);
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
} else {
$wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET);
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
return $wasOpenSuccessful;
}
/**
* Updates the given URI to use a real path.
* This is to avoid issues on some Windows setup.
* Returns the real path for the given path components.
* This is useful to avoid issues on some Windows setup.
*
* @param string $URI URI
* @return string The URI using a real path
* @param string $zipFilePath Path to the ZIP file
* @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
* @return string The real path URI
*/
protected function convertURIToUseRealPath($URI)
public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath)
{
$realPathURI = $URI;
if ($this->isZipStream($URI)) {
if (preg_match('/zip:\/\/(.*)#(.*)/', $URI, $matches)) {
$documentPath = $matches[1];
$documentInsideZipPath = $matches[2];
$realPathURI = 'zip://' . realpath($documentPath) . '#' . $documentInsideZipPath;
}
} else {
$realPathURI = realpath($URI);
}
return $realPathURI;
}
/**
* Returns whether the given URI is a zip stream.
*
* @param string $URI URI pointing to a document
* @return bool TRUE if URI is a zip stream, FALSE otherwise
*/
protected function isZipStream($URI)
{
return (strpos($URI, 'zip://') === 0);
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath);
}
/**

View File

@ -55,7 +55,7 @@ class SheetHelper
$sheetIndex = 0;
$xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $this->filePath . '#' . self::WORKBOOK_XML_FILE_PATH)) {
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('sheet')) {
$sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex);
@ -105,7 +105,7 @@ class SheetHelper
// find the file path of the sheet, by looking at the "workbook.xml.res" file
$xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $this->filePath . '#' . self::WORKBOOK_XML_RELS_FILE_PATH)) {
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('Relationship')) {
$relationshipSheetId = $xmlReader->getAttribute('Id');

View File

@ -76,10 +76,9 @@ class StyleHelper
$this->customNumberFormats = [];
$this->stylesAttributes = [];
$stylesXmlFilePath = $this->filePath .'#' . self::STYLES_XML_FILE_PATH;
$xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $stylesXmlFilePath)) {
if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
$numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml());

View File

@ -20,12 +20,11 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
public function testOpenShouldFailIfFileInsideZipDoesNotExist()
{
$resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx');
$nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml';
$xmlReader = new XMLReader();
// using "@" to prevent errors/warning to be displayed
$wasOpenSuccessful = @$xmlReader->open($nonExistingXMLFilePath);
$wasOpenSuccessful = @$xmlReader->openFileInZip($resourcePath, 'path/to/fake/file.xml');
$this->assertTrue($wasOpenSuccessful === false);
}
@ -72,10 +71,9 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
public function testReadShouldThrowExceptionOnError()
{
$resourcePath = $this->getResourcePath('one_sheet_with_invalid_xml_characters.xlsx');
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/worksheets/sheet1.xml';
$xmlReader = new XMLReader();
if ($xmlReader->open($sheetDataXMLFilePath) === false) {
if ($xmlReader->openFileInZip($resourcePath, 'xl/worksheets/sheet1.xml') === false) {
$this->fail();
}
@ -95,43 +93,13 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
// The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains
// a doctype element that causes read errors
$resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx');
$sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/sharedStrings.xml';
$xmlReader = new XMLReader();
if ($xmlReader->open($sheetDataXMLFilePath) !== false) {
if ($xmlReader->openFileInZip($resourcePath, 'xl/sharedStrings.xml') !== false) {
@$xmlReader->next('sst');
}
}
/**
* @return array
*/
public function dataProviderForTestIsZipStream()
{
return [
['/absolute/path/to/file.xlsx', false],
['relative/path/to/file.xlsx', false],
['php://temp', false],
['zip:///absolute/path/to/file.xlsx', true],
['zip://relative/path/to/file.xlsx', true],
];
}
/**
* @dataProvider dataProviderForTestIsZipStream
*
* @param string $URI
* @param bool $expectedResult
* @return void
*/
public function testIsZipStream($URI, $expectedResult)
{
$xmlReader = new XMLReader();
$isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'isZipStream', $URI);
$this->assertEquals($expectedResult, $isZipStream);
}
/**
* @return array
*/
@ -167,34 +135,34 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
/**
* @return array
*/
public function dataProviderForTestConvertURIToUseRealPath()
public function dataProviderForTestGetRealPathURIForFileInZip()
{
$tempFolder = realpath(sys_get_temp_dir());
$expectedRealPathURI = 'zip://' . $tempFolder . '/test.xlsx#test.xml';
return [
['/../../../' . $tempFolder . '/test.xlsx', $tempFolder . '/test.xlsx'],
[$tempFolder . '/test.xlsx', $tempFolder . '/test.xlsx'],
['zip://' . $tempFolder . '/test.xlsx#test.xml', 'zip://' . $tempFolder . '/test.xlsx#test.xml'],
['zip:///../../../' . $tempFolder . '/test.xlsx#test.xml', 'zip://' . $tempFolder . '/test.xlsx#test.xml'],
[$tempFolder, "$tempFolder/test.xlsx", 'test.xml', $expectedRealPathURI],
[$tempFolder, "/../../../$tempFolder/test.xlsx", 'test.xml', $expectedRealPathURI],
];
}
/**
* @dataProvider dataProviderForTestConvertURIToUseRealPath
* @dataProvider dataProviderForTestGetRealPathURIForFileInZip
*
* @param string $URI
* @param string $expectedConvertedURI
* @param string $tempFolder
* @param string $zipFilePath
* @param string $fileInsideZipPath
* @param string $expectedRealPathURI
* @return void
*/
public function testConvertURIToUseRealPath($URI, $expectedConvertedURI)
public function testGetRealPathURIForFileInZip($tempFolder, $zipFilePath, $fileInsideZipPath, $expectedRealPathURI)
{
$tempFolder = sys_get_temp_dir();
touch($tempFolder . '/test.xlsx');
$xmlReader = new XMLReader();
$convertedURI = \ReflectionHelper::callMethodOnObject($xmlReader, 'convertURIToUseRealPath', $URI);
$realPathURI = \ReflectionHelper::callMethodOnObject($xmlReader, 'getRealPathURIForFileInZip', $zipFilePath, $fileInsideZipPath);
$this->assertEquals($expectedConvertedURI, $convertedURI);
$this->assertEquals($expectedRealPathURI, $realPathURI);
unlink($tempFolder . '/test.xlsx');
}
@ -230,5 +198,7 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase
$xmlReader->read();
$this->assertFalse($xmlReader->isPositionedOnStartingNode('test'));
$this->assertTrue($xmlReader->isPositionedOnEndingNode('test'));
$xmlReader->close();
}
}

View File

@ -538,10 +538,9 @@ class WriterTest extends \PHPUnit_Framework_TestCase
private function moveReaderToCorrectTableNode($fileName, $sheetIndex)
{
$resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToSheetFile = $resourcePath . '#content.xml';
$xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToSheetFile);
$xmlReader->openFileInZip($resourcePath, 'content.xml');
$xmlReader->readUntilNodeFound('table:table');
for ($i = 1; $i < $sheetIndex; $i++) {

View File

@ -296,17 +296,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
$cellElements = [];
$resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#content.xml';
$xmlReader = new \XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile);
$xmlReader = new XMLReader();
$xmlReader->openFileInZip($resourcePath, 'content.xml');
while ($xmlReader->read()) {
if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'table:table-cell' && $xmlReader->getAttribute('office:value-type') !== null) {
if ($xmlReader->isPositionedOnStartingNode('table:table-cell') && $xmlReader->getAttribute('office:value-type') !== null) {
$cellElements[] = $xmlReader->expand();
}
}
$xmlReader->close();
return $cellElements;
}
@ -319,17 +320,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
$cellStyleElements = [];
$resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#content.xml';
$xmlReader = new \XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile);
$xmlReader = new XMLReader();
$xmlReader->openFileInZip($resourcePath, 'content.xml');
while ($xmlReader->read()) {
if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'style:style' && $xmlReader->getAttribute('style:family') === 'table-cell') {
if ($xmlReader->isPositionedOnStartingNode('style:style') && $xmlReader->getAttribute('style:family') === 'table-cell') {
$cellStyleElements[] = $xmlReader->expand();
}
}
$xmlReader->close();
return $cellStyleElements;
}
@ -341,10 +343,9 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
private function getXmlSectionFromStylesXmlFile($fileName, $section)
{
$resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#styles.xml';
$xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile);
$xmlReader->openFileInZip($resourcePath, 'styles.xml');
$xmlReader->readUntilNodeFound($section);
return $xmlReader->expand();

View File

@ -293,13 +293,16 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
private function getXmlSectionFromStylesXmlFile($fileName, $section)
{
$resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#xl/styles.xml';
$xmlReader = new XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile);
$xmlReader->openFileInZip($resourcePath, 'xl/styles.xml');
$xmlReader->readUntilNodeFound($section);
return $xmlReader->expand();
$xmlSection = $xmlReader->expand();
$xmlReader->close();
return $xmlSection;
}
/**
@ -311,17 +314,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase
$cellElements = [];
$resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToStylesXmlFile = $resourcePath . '#xl/worksheets/sheet1.xml';
$xmlReader = new \XMLReader();
$xmlReader->open('zip://' . $pathToStylesXmlFile);
$xmlReader = new XMLReader();
$xmlReader->openFileInZip($resourcePath, 'xl/worksheets/sheet1.xml');
while ($xmlReader->read()) {
if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'c') {
if ($xmlReader->isPositionedOnStartingNode('c')) {
$cellElements[] = $xmlReader->expand();
}
}
$xmlReader->close();
return $cellElements;
}