From a38fa6ccd7250729acac43af18f6c29f547cf052 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Sun, 29 May 2016 22:16:59 -0700 Subject: [PATCH] Adding open_file_in_zip() helper function to XMLReader --- src/Spout/Reader/ODS/SheetIterator.php | 6 +- src/Spout/Reader/Wrapper/XMLReader.php | 56 +++++----------- src/Spout/Reader/XLSX/Helper/SheetHelper.php | 4 +- src/Spout/Reader/XLSX/Helper/StyleHelper.php | 3 +- tests/Spout/Reader/Wrapper/XMLReaderTest.php | 64 +++++-------------- tests/Spout/Writer/ODS/WriterTest.php | 3 +- .../Spout/Writer/ODS/WriterWithStyleTest.php | 21 +++--- .../Spout/Writer/XLSX/WriterWithStyleTest.php | 18 ++++-- 8 files changed, 64 insertions(+), 111 deletions(-) diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php index d0010bd..4f2ec01 100644 --- a/src/Spout/Reader/ODS/SheetIterator.php +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -15,6 +15,8 @@ use Box\Spout\Reader\Wrapper\XMLReader; */ class SheetIterator implements IteratorInterface { + const CONTENT_XML_FILE_PATH = 'content.xml'; + /** Definition of XML nodes name and attribute used to parse sheet data */ const XML_NODE_TABLE = 'table:table'; const XML_ATTRIBUTE_TABLE_NAME = 'table:name'; @@ -63,8 +65,8 @@ class SheetIterator implements IteratorInterface { $this->xmlReader->close(); - $contentXmlFilePath = $this->filePath . '#content.xml'; - if ($this->xmlReader->open('zip://' . $contentXmlFilePath) === false) { + if ($this->xmlReader->openFileInZip($this->filePath, self::CONTENT_XML_FILE_PATH) === false) { + $contentXmlFilePath = $this->filePath . '#' . self::CONTENT_XML_FILE_PATH; throw new IOException("Could not open \"{$contentXmlFilePath}\"."); } diff --git a/src/Spout/Reader/Wrapper/XMLReader.php b/src/Spout/Reader/Wrapper/XMLReader.php index 94b28eb..c979819 100644 --- a/src/Spout/Reader/Wrapper/XMLReader.php +++ b/src/Spout/Reader/Wrapper/XMLReader.php @@ -14,66 +14,44 @@ class XMLReader extends \XMLReader { use XMLInternalErrorsHelper; + const ZIP_WRAPPER = 'zip://'; + /** - * Set the URI containing the XML to parse - * @see \XMLReader::open + * Opens the XML Reader to read a file located inside a ZIP file. * - * @param string $URI URI pointing to the document - * @param string|null|void $encoding The document encoding - * @param int $options A bitmask of the LIBXML_* constants + * @param string $zipFilePath Path to the ZIP file + * @param string $fileInsideZipPath Relative or absolute path of the file inside the zip * @return bool TRUE on success or FALSE on failure */ - public function open($URI, $encoding = null, $options = 0) + public function openFileInZip($zipFilePath, $fileInsideZipPath) { $wasOpenSuccessful = false; - $realPathURI = $this->convertURIToUseRealPath($URI); + $realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath); // HHVM does not check if file exists within zip file // @link https://github.com/facebook/hhvm/issues/5779 - if ($this->isRunningHHVM() && $this->isZipStream($realPathURI)) { + if ($this->isRunningHHVM()) { if ($this->fileExistsWithinZip($realPathURI)) { - $wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET); + $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET); } } else { - $wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET); + $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET); } return $wasOpenSuccessful; } /** - * Updates the given URI to use a real path. - * This is to avoid issues on some Windows setup. + * Returns the real path for the given path components. + * This is useful to avoid issues on some Windows setup. * - * @param string $URI URI - * @return string The URI using a real path + * @param string $zipFilePath Path to the ZIP file + * @param string $fileInsideZipPath Relative or absolute path of the file inside the zip + * @return string The real path URI */ - protected function convertURIToUseRealPath($URI) + public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath) { - $realPathURI = $URI; - - if ($this->isZipStream($URI)) { - if (preg_match('/zip:\/\/(.*)#(.*)/', $URI, $matches)) { - $documentPath = $matches[1]; - $documentInsideZipPath = $matches[2]; - $realPathURI = 'zip://' . realpath($documentPath) . '#' . $documentInsideZipPath; - } - } else { - $realPathURI = realpath($URI); - } - - return $realPathURI; - } - - /** - * Returns whether the given URI is a zip stream. - * - * @param string $URI URI pointing to a document - * @return bool TRUE if URI is a zip stream, FALSE otherwise - */ - protected function isZipStream($URI) - { - return (strpos($URI, 'zip://') === 0); + return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath); } /** diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index ae7b8e0..175de5b 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -55,7 +55,7 @@ class SheetHelper $sheetIndex = 0; $xmlReader = new XMLReader(); - if ($xmlReader->open('zip://' . $this->filePath . '#' . self::WORKBOOK_XML_FILE_PATH)) { + if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode('sheet')) { $sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex); @@ -105,7 +105,7 @@ class SheetHelper // find the file path of the sheet, by looking at the "workbook.xml.res" file $xmlReader = new XMLReader(); - if ($xmlReader->open('zip://' . $this->filePath . '#' . self::WORKBOOK_XML_RELS_FILE_PATH)) { + if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode('Relationship')) { $relationshipSheetId = $xmlReader->getAttribute('Id'); diff --git a/src/Spout/Reader/XLSX/Helper/StyleHelper.php b/src/Spout/Reader/XLSX/Helper/StyleHelper.php index 462433c..6fdcd80 100644 --- a/src/Spout/Reader/XLSX/Helper/StyleHelper.php +++ b/src/Spout/Reader/XLSX/Helper/StyleHelper.php @@ -76,10 +76,9 @@ class StyleHelper $this->customNumberFormats = []; $this->stylesAttributes = []; - $stylesXmlFilePath = $this->filePath .'#' . self::STYLES_XML_FILE_PATH; $xmlReader = new XMLReader(); - if ($xmlReader->open('zip://' . $stylesXmlFilePath)) { + if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) { $numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml()); diff --git a/tests/Spout/Reader/Wrapper/XMLReaderTest.php b/tests/Spout/Reader/Wrapper/XMLReaderTest.php index 1f7ffc4..b16c648 100644 --- a/tests/Spout/Reader/Wrapper/XMLReaderTest.php +++ b/tests/Spout/Reader/Wrapper/XMLReaderTest.php @@ -20,12 +20,11 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase public function testOpenShouldFailIfFileInsideZipDoesNotExist() { $resourcePath = $this->getResourcePath('one_sheet_with_inline_strings.xlsx'); - $nonExistingXMLFilePath = 'zip://' . $resourcePath . '#path/to/fake/file.xml'; $xmlReader = new XMLReader(); // using "@" to prevent errors/warning to be displayed - $wasOpenSuccessful = @$xmlReader->open($nonExistingXMLFilePath); + $wasOpenSuccessful = @$xmlReader->openFileInZip($resourcePath, 'path/to/fake/file.xml'); $this->assertTrue($wasOpenSuccessful === false); } @@ -72,10 +71,9 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase public function testReadShouldThrowExceptionOnError() { $resourcePath = $this->getResourcePath('one_sheet_with_invalid_xml_characters.xlsx'); - $sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/worksheets/sheet1.xml'; $xmlReader = new XMLReader(); - if ($xmlReader->open($sheetDataXMLFilePath) === false) { + if ($xmlReader->openFileInZip($resourcePath, 'xl/worksheets/sheet1.xml') === false) { $this->fail(); } @@ -95,43 +93,13 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase // The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains // a doctype element that causes read errors $resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx'); - $sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/sharedStrings.xml'; $xmlReader = new XMLReader(); - if ($xmlReader->open($sheetDataXMLFilePath) !== false) { + if ($xmlReader->openFileInZip($resourcePath, 'xl/sharedStrings.xml') !== false) { @$xmlReader->next('sst'); } } - /** - * @return array - */ - public function dataProviderForTestIsZipStream() - { - return [ - ['/absolute/path/to/file.xlsx', false], - ['relative/path/to/file.xlsx', false], - ['php://temp', false], - ['zip:///absolute/path/to/file.xlsx', true], - ['zip://relative/path/to/file.xlsx', true], - ]; - } - - /** - * @dataProvider dataProviderForTestIsZipStream - * - * @param string $URI - * @param bool $expectedResult - * @return void - */ - public function testIsZipStream($URI, $expectedResult) - { - $xmlReader = new XMLReader(); - $isZipStream = \ReflectionHelper::callMethodOnObject($xmlReader, 'isZipStream', $URI); - - $this->assertEquals($expectedResult, $isZipStream); - } - /** * @return array */ @@ -167,34 +135,34 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase /** * @return array */ - public function dataProviderForTestConvertURIToUseRealPath() + public function dataProviderForTestGetRealPathURIForFileInZip() { $tempFolder = realpath(sys_get_temp_dir()); + $expectedRealPathURI = 'zip://' . $tempFolder . '/test.xlsx#test.xml'; return [ - ['/../../../' . $tempFolder . '/test.xlsx', $tempFolder . '/test.xlsx'], - [$tempFolder . '/test.xlsx', $tempFolder . '/test.xlsx'], - ['zip://' . $tempFolder . '/test.xlsx#test.xml', 'zip://' . $tempFolder . '/test.xlsx#test.xml'], - ['zip:///../../../' . $tempFolder . '/test.xlsx#test.xml', 'zip://' . $tempFolder . '/test.xlsx#test.xml'], + [$tempFolder, "$tempFolder/test.xlsx", 'test.xml', $expectedRealPathURI], + [$tempFolder, "/../../../$tempFolder/test.xlsx", 'test.xml', $expectedRealPathURI], ]; } /** - * @dataProvider dataProviderForTestConvertURIToUseRealPath + * @dataProvider dataProviderForTestGetRealPathURIForFileInZip * - * @param string $URI - * @param string $expectedConvertedURI + * @param string $tempFolder + * @param string $zipFilePath + * @param string $fileInsideZipPath + * @param string $expectedRealPathURI * @return void */ - public function testConvertURIToUseRealPath($URI, $expectedConvertedURI) + public function testGetRealPathURIForFileInZip($tempFolder, $zipFilePath, $fileInsideZipPath, $expectedRealPathURI) { - $tempFolder = sys_get_temp_dir(); touch($tempFolder . '/test.xlsx'); $xmlReader = new XMLReader(); - $convertedURI = \ReflectionHelper::callMethodOnObject($xmlReader, 'convertURIToUseRealPath', $URI); + $realPathURI = \ReflectionHelper::callMethodOnObject($xmlReader, 'getRealPathURIForFileInZip', $zipFilePath, $fileInsideZipPath); - $this->assertEquals($expectedConvertedURI, $convertedURI); + $this->assertEquals($expectedRealPathURI, $realPathURI); unlink($tempFolder . '/test.xlsx'); } @@ -230,5 +198,7 @@ class XMLReaderTest extends \PHPUnit_Framework_TestCase $xmlReader->read(); $this->assertFalse($xmlReader->isPositionedOnStartingNode('test')); $this->assertTrue($xmlReader->isPositionedOnEndingNode('test')); + + $xmlReader->close(); } } diff --git a/tests/Spout/Writer/ODS/WriterTest.php b/tests/Spout/Writer/ODS/WriterTest.php index 4c67e4e..43383e4 100644 --- a/tests/Spout/Writer/ODS/WriterTest.php +++ b/tests/Spout/Writer/ODS/WriterTest.php @@ -538,10 +538,9 @@ class WriterTest extends \PHPUnit_Framework_TestCase private function moveReaderToCorrectTableNode($fileName, $sheetIndex) { $resourcePath = $this->getGeneratedResourcePath($fileName); - $pathToSheetFile = $resourcePath . '#content.xml'; $xmlReader = new XMLReader(); - $xmlReader->open('zip://' . $pathToSheetFile); + $xmlReader->openFileInZip($resourcePath, 'content.xml'); $xmlReader->readUntilNodeFound('table:table'); for ($i = 1; $i < $sheetIndex; $i++) { diff --git a/tests/Spout/Writer/ODS/WriterWithStyleTest.php b/tests/Spout/Writer/ODS/WriterWithStyleTest.php index cc3fc50..9888bfc 100644 --- a/tests/Spout/Writer/ODS/WriterWithStyleTest.php +++ b/tests/Spout/Writer/ODS/WriterWithStyleTest.php @@ -296,17 +296,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase $cellElements = []; $resourcePath = $this->getGeneratedResourcePath($fileName); - $pathToStylesXmlFile = $resourcePath . '#content.xml'; - $xmlReader = new \XMLReader(); - $xmlReader->open('zip://' . $pathToStylesXmlFile); + $xmlReader = new XMLReader(); + $xmlReader->openFileInZip($resourcePath, 'content.xml'); while ($xmlReader->read()) { - if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'table:table-cell' && $xmlReader->getAttribute('office:value-type') !== null) { + if ($xmlReader->isPositionedOnStartingNode('table:table-cell') && $xmlReader->getAttribute('office:value-type') !== null) { $cellElements[] = $xmlReader->expand(); } } + $xmlReader->close(); + return $cellElements; } @@ -319,17 +320,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase $cellStyleElements = []; $resourcePath = $this->getGeneratedResourcePath($fileName); - $pathToStylesXmlFile = $resourcePath . '#content.xml'; - $xmlReader = new \XMLReader(); - $xmlReader->open('zip://' . $pathToStylesXmlFile); + $xmlReader = new XMLReader(); + $xmlReader->openFileInZip($resourcePath, 'content.xml'); while ($xmlReader->read()) { - if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'style:style' && $xmlReader->getAttribute('style:family') === 'table-cell') { + if ($xmlReader->isPositionedOnStartingNode('style:style') && $xmlReader->getAttribute('style:family') === 'table-cell') { $cellStyleElements[] = $xmlReader->expand(); } } + $xmlReader->close(); + return $cellStyleElements; } @@ -341,10 +343,9 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase private function getXmlSectionFromStylesXmlFile($fileName, $section) { $resourcePath = $this->getGeneratedResourcePath($fileName); - $pathToStylesXmlFile = $resourcePath . '#styles.xml'; $xmlReader = new XMLReader(); - $xmlReader->open('zip://' . $pathToStylesXmlFile); + $xmlReader->openFileInZip($resourcePath, 'styles.xml'); $xmlReader->readUntilNodeFound($section); return $xmlReader->expand(); diff --git a/tests/Spout/Writer/XLSX/WriterWithStyleTest.php b/tests/Spout/Writer/XLSX/WriterWithStyleTest.php index 9a531b9..d43b566 100644 --- a/tests/Spout/Writer/XLSX/WriterWithStyleTest.php +++ b/tests/Spout/Writer/XLSX/WriterWithStyleTest.php @@ -293,13 +293,16 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase private function getXmlSectionFromStylesXmlFile($fileName, $section) { $resourcePath = $this->getGeneratedResourcePath($fileName); - $pathToStylesXmlFile = $resourcePath . '#xl/styles.xml'; $xmlReader = new XMLReader(); - $xmlReader->open('zip://' . $pathToStylesXmlFile); + $xmlReader->openFileInZip($resourcePath, 'xl/styles.xml'); $xmlReader->readUntilNodeFound($section); - return $xmlReader->expand(); + $xmlSection = $xmlReader->expand(); + + $xmlReader->close(); + + return $xmlSection; } /** @@ -311,17 +314,18 @@ class WriterWithStyleTest extends \PHPUnit_Framework_TestCase $cellElements = []; $resourcePath = $this->getGeneratedResourcePath($fileName); - $pathToStylesXmlFile = $resourcePath . '#xl/worksheets/sheet1.xml'; - $xmlReader = new \XMLReader(); - $xmlReader->open('zip://' . $pathToStylesXmlFile); + $xmlReader = new XMLReader(); + $xmlReader->openFileInZip($resourcePath, 'xl/worksheets/sheet1.xml'); while ($xmlReader->read()) { - if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'c') { + if ($xmlReader->isPositionedOnStartingNode('c')) { $cellElements[] = $xmlReader->expand(); } } + $xmlReader->close(); + return $cellElements; }