From ae3ee357ff5c502d1c4725b6e83e13c7eaf65c03 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Wed, 15 Jul 2015 00:22:37 -0700 Subject: [PATCH] Moved readers to iterators Instead of the hasNext() / next() syntax, readers now implements the PHP iterator pattern. It allows readers to be used with a foreach() loop. All readers now share the same structure (CSV is treated as having exactly one sheet): - one concrete Reader - one SheetIterator, exposed by the Reader - one or more Sheets, returned at every iteration - one RowIterator, exposed by the Sheet Introducing the concept of sheets for CSV may be kind of confusing but it makes Spout way more consistent. Also, this confusion may be resolved by creating a wrapper around the readers if needed. -- This commit does not delete the old files, not change the folder structure for Writers. This will be done in another commit. --- src/Spout/Reader/AbstractReader2.php | 111 ++++++ src/Spout/Reader/CSV/Reader.php | 95 +++++ src/Spout/Reader/CSV/RowIterator.php | 163 ++++++++ src/Spout/Reader/CSV/Sheet.php | 35 ++ src/Spout/Reader/CSV/SheetIterator.php | 96 +++++ .../Exception/NoSheetsFoundException.php | 12 + src/Spout/Reader/IteratorInterface.php | 18 + src/Spout/Reader/ReaderFactory2.php | 44 +++ src/Spout/Reader/ReaderInterface2.php | 35 ++ src/Spout/Reader/SheetInterface.php | 18 + src/Spout/Reader/XLSX/Helper/CellHelper.php | 97 +++++ .../CachingStrategyFactory.php | 154 ++++++++ .../CachingStrategyInterface.php | 44 +++ .../FileBasedStrategy.php | 188 +++++++++ .../SharedStringsCaching/InMemoryStrategy.php | 83 ++++ .../XLSX/Helper/SharedStringsHelper.php | 280 ++++++++++++++ src/Spout/Reader/XLSX/Helper/SheetHelper.php | 199 ++++++++++ src/Spout/Reader/XLSX/Reader.php | 93 +++++ src/Spout/Reader/XLSX/RowIterator.php | 356 ++++++++++++++++++ src/Spout/Reader/XLSX/Sheet.php | 74 ++++ src/Spout/Reader/XLSX/SheetIterator.php | 112 ++++++ tests/Spout/Reader/CSV/ReaderTest.php | 181 +++++++++ .../Reader/XLSX/Helper/CellHelperTest.php | 60 +++ .../CachingStrategyFactoryTest.php | 99 +++++ .../XLSX/Helper/SharedStringsHelperTest.php | 112 ++++++ tests/Spout/Reader/XLSX/ReaderTest.php | 300 +++++++++++++++ tests/Spout/Reader/XLSX/SheetTest.php | 53 +++ 27 files changed, 3112 insertions(+) create mode 100644 src/Spout/Reader/AbstractReader2.php create mode 100644 src/Spout/Reader/CSV/Reader.php create mode 100644 src/Spout/Reader/CSV/RowIterator.php create mode 100644 src/Spout/Reader/CSV/Sheet.php create mode 100644 src/Spout/Reader/CSV/SheetIterator.php create mode 100644 src/Spout/Reader/Exception/NoSheetsFoundException.php create mode 100644 src/Spout/Reader/IteratorInterface.php create mode 100644 src/Spout/Reader/ReaderFactory2.php create mode 100644 src/Spout/Reader/ReaderInterface2.php create mode 100644 src/Spout/Reader/SheetInterface.php create mode 100644 src/Spout/Reader/XLSX/Helper/CellHelper.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php create mode 100644 src/Spout/Reader/XLSX/Helper/SheetHelper.php create mode 100644 src/Spout/Reader/XLSX/Reader.php create mode 100644 src/Spout/Reader/XLSX/RowIterator.php create mode 100644 src/Spout/Reader/XLSX/Sheet.php create mode 100644 src/Spout/Reader/XLSX/SheetIterator.php create mode 100644 tests/Spout/Reader/CSV/ReaderTest.php create mode 100644 tests/Spout/Reader/XLSX/Helper/CellHelperTest.php create mode 100644 tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php create mode 100644 tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php create mode 100644 tests/Spout/Reader/XLSX/ReaderTest.php create mode 100644 tests/Spout/Reader/XLSX/SheetTest.php diff --git a/src/Spout/Reader/AbstractReader2.php b/src/Spout/Reader/AbstractReader2.php new file mode 100644 index 0000000..ef24412 --- /dev/null +++ b/src/Spout/Reader/AbstractReader2.php @@ -0,0 +1,111 @@ +globalFunctionsHelper = $globalFunctionsHelper; + return $this; + } + + /** + * Prepares the reader to read the given file. It also makes sure + * that the file exists and is readable. + * + * @param string $filePath Path of the file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted + */ + public function open($filePath) + { + if (!$this->isPhpStream($filePath)) { + // we skip the checks if the provided file path points to a PHP stream + if (!$this->globalFunctionsHelper->file_exists($filePath)) { + throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.'); + } else if (!$this->globalFunctionsHelper->is_readable($filePath)) { + throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.'); + } + } + + try { + $this->openReader($filePath); + $this->isStreamOpened = true; + } catch (\Exception $exception) { + throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')'); + } + } + + /** + * Checks if a path is a PHP stream (like php://output, php://memory, ...) + * + * @param string $filePath Path of the file to be read + * @return bool Whether the given path maps to a PHP stream + */ + protected function isPhpStream($filePath) + { + return (strpos($filePath, 'php://') === 0); + } + + /** + * Closes the reader, preventing any additional reading + * + * @return void + */ + public function close() + { + if ($this->isStreamOpened) { + $this->closeReader(); + + $sheetIterator = $this->getSheetIterator(); + if ($sheetIterator) { + $sheetIterator->end(); + } + + $this->isStreamOpened = false; + } + } +} diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php new file mode 100644 index 0000000..3b164d5 --- /dev/null +++ b/src/Spout/Reader/CSV/Reader.php @@ -0,0 +1,95 @@ +fieldDelimiter = $fieldDelimiter; + return $this; + } + + /** + * Sets the field enclosure for the CSV. + * Needs to be called before opening the reader. + * + * @param string $fieldEnclosure Character that enclose fields + * @return Reader + */ + public function setFieldEnclosure($fieldEnclosure) + { + $this->fieldEnclosure = $fieldEnclosure; + return $this; + } + + /** + * Opens the file at the given path to make it ready to be read. + * The file must be UTF-8 encoded. + * @TODO add encoding detection/conversion + * + * @param string $filePath Path of the CSV file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException + */ + protected function openReader($filePath) + { + $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); + if (!$this->filePointer) { + throw new IOException('Could not open file ' . $filePath . ' for reading.'); + } + + $this->sheetIterator = new SheetIterator($this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, $this->globalFunctionsHelper); + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getSheetIterator() + { + return $this->sheetIterator; + } + + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->filePointer) { + $this->globalFunctionsHelper->fclose($this->filePointer); + } + } +} diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php new file mode 100644 index 0000000..2316fa7 --- /dev/null +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -0,0 +1,163 @@ +filePointer = $filePointer; + $this->fieldDelimiter = $fieldDelimiter; + $this->fieldEnclosure = $fieldEnclosure; + $this->globalFunctionsHelper = $globalFunctionsHelper; + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->rewindAndSkipUtf8Bom(); + + $this->numReadRows = 0; + $this->rowDataBuffer = null; + + $this->next(); + } + + /** + * This rewinds and skips the UTF-8 BOM if inserted at the beginning of the file + * by moving the file pointer after it, so that it is not read. + * + * @return void + */ + protected function rewindAndSkipUtf8Bom() + { + $this->globalFunctionsHelper->rewind($this->filePointer); + + $hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM); + + if ($hasUtf8Bom) { + // we skip the 2 first bytes (so start from the 3rd byte) + $this->globalFunctionsHelper->fseek($this->filePointer, 3); + } else { + // if no BOM, reset the pointer to read from the beginning + $this->globalFunctionsHelper->fseek($this->filePointer, 0); + } + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return ($this->filePointer && !$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows are skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $lineData = null; + $this->hasReachedEndOfFile = feof($this->filePointer); + + if (!$this->hasReachedEndOfFile) { + do { + $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); + } while ($lineData && $this->isEmptyLine($lineData)); + + if ($lineData !== null) { + $this->rowDataBuffer = $lineData; + $this->numReadRows++; + } + } + } + + /** + * @param array $lineData Array containing the cells value for the line + * @return bool Whether the given line is empty + */ + protected function isEmptyLine($lineData) + { + return (count($lineData) === 1 && $lineData[0] === null); + } + + /** + * Return the current element from the buffer + * @link http://php.net/manual/en/iterator.current.php + * + * @return array + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // do nothing + } +} diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php new file mode 100644 index 0000000..207fcae --- /dev/null +++ b/src/Spout/Reader/CSV/Sheet.php @@ -0,0 +1,35 @@ +rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $globalFunctionsHelper); + } + + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } +} diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php new file mode 100644 index 0000000..f424cd8 --- /dev/null +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -0,0 +1,96 @@ +sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $globalFunctionsHelper); + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->hasReadUniqueSheet = false; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReadUniqueSheet); + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $this->hasReadUniqueSheet = true; + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + return $this->sheet; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // do nothing + } +} diff --git a/src/Spout/Reader/Exception/NoSheetsFoundException.php b/src/Spout/Reader/Exception/NoSheetsFoundException.php new file mode 100644 index 0000000..dfc4907 --- /dev/null +++ b/src/Spout/Reader/Exception/NoSheetsFoundException.php @@ -0,0 +1,12 @@ +setGlobalFunctionsHelper(new GlobalFunctionsHelper()); + + return $reader; + } +} diff --git a/src/Spout/Reader/ReaderInterface2.php b/src/Spout/Reader/ReaderInterface2.php new file mode 100644 index 0000000..a61c83c --- /dev/null +++ b/src/Spout/Reader/ReaderInterface2.php @@ -0,0 +1,35 @@ + 0 + * Z => 25 + * AA => 26 : (26^(2-1) * (0+1)) + 0 + * AB => 27 : (26^(2-1) * (0+1)) + 1 + * BC => 54 : (26^(2-1) * (1+1)) + 2 + * BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25 + */ + foreach (str_split($column) as $single_cell_index) + { + $currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue; + + if ($columnLength == 1) { + $columnIndex += $currentColumnIndex; + } else { + $columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1); + } + + $columnLength--; + } + + return $columnIndex; + } + + /** + * Returns whether a cell index is valid, in an Excel world. + * To be valid, the cell index should start with capital letters and be followed by numbers. + * + * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) + * @return bool + */ + protected static function isValidCellIndex($cellIndex) + { + return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1); + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php new file mode 100644 index 0000000..8fffdb0 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php @@ -0,0 +1,154 @@ + 20 * 600 ≈ 12KB + */ + const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; + + /** + * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files + * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory + * and the string will be quickly retrieved. + * The performance bottleneck is not when creating these temporary files, but rather when loading their content. + * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works + * best when the indexes of the shared strings are sorted in the sheet data. + * 10,000 was chosen because it creates small files that are fast to be loaded in memory. + */ + const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; + + /** @var CachingStrategyFactory|null Singleton instance */ + protected static $instance = null; + + /** + * Private constructor for singleton + */ + private function __construct() + { + } + + /** + * Returns the singleton instance of the factory + * + * @return CachingStrategyFactory + */ + public static function getInstance() + { + if (self::$instance === null) { + self::$instance = new CachingStrategyFactory(); + } + + return self::$instance; + } + + /** + * Returns the best caching strategy, given the number of unique shared strings + * and the amount of memory available. + * + * @param int $sharedStringsUniqueCount Number of unique shared strings + * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored + * @return CachingStrategyInterface The best caching strategy + */ + public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null) + { + if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { + return new InMemoryStrategy($sharedStringsUniqueCount); + } else { + return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE); + } + } + + /** + * Returns whether it is safe to use in-memory caching, given the number of unique shared strings + * and the amount of memory available. + * + * @param int $sharedStringsUniqueCount Number of unique shared strings + * @return bool + */ + protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) + { + $memoryAvailable = $this->getMemoryLimitInKB(); + + if ($memoryAvailable === -1) { + // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe + return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE); + } else { + $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB; + return ($memoryAvailable > $memoryNeeded); + } + } + + /** + * Returns the PHP "memory_limit" in Kilobytes + * + * @return float + */ + protected function getMemoryLimitInKB() + { + $memoryLimitFormatted = $this->getMemoryLimitFromIni(); + $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted)); + + // No memory limit + if ($memoryLimitFormatted === '-1') { + return -1; + } + + if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { + $amount = intval($matches[1]); + $unit = $matches[2]; + + switch ($unit) { + case 'b': return ($amount / 1024); + case 'k': return $amount; + case 'm': return ($amount * 1024); + case 'g': return ($amount * 1024 * 1024); + case 't': return ($amount * 1024 * 1024 * 1024); + } + } + + return -1; + } + + /** + * Returns the formatted "memory_limit" value + * + * @return string + */ + protected function getMemoryLimitFromIni() + { + return ini_get('memory_limit'); + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php new file mode 100644 index 0000000..631222a --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php @@ -0,0 +1,44 @@ +fileSystemHelper = new FileSystemHelper($rootTempFolder); + $this->tempFolder = $this->fileSystemHelper->createFolder($rootTempFolder, uniqid('sharedstrings')); + + $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; + + $this->globalFunctionsHelper = new GlobalFunctionsHelper(); + $this->tempFilePointer = null; + } + + /** + * Adds the given string to the cache. + * + * @param string $sharedString The string to be added to the cache + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return void + */ + public function addStringForIndex($sharedString, $sharedStringIndex) + { + $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); + + if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { + if ($this->tempFilePointer) { + $this->globalFunctionsHelper->fclose($this->tempFilePointer); + } + $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); + } + + // The shared string retrieval logic expects each cell data to be on one line only + // Encoding the line feed character allows to preserve this assumption + $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); + + $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); + } + + /** + * Returns the path for the temp file that should contain the string for the given index + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The temp file path for the given index + */ + protected function getSharedStringTempFilePath($sharedStringIndex) + { + $numTempFile = intval($sharedStringIndex / $this->maxNumStringsPerTempFile); + return $this->tempFolder . '/sharedstrings' . $numTempFile; + } + + /** + * Closes the cache after the last shared string was added. + * This prevents any additional string from being added to the cache. + * + * @return void + */ + public function closeCache() + { + // close pointer to the last temp file that was written + if ($this->tempFilePointer) { + $this->globalFunctionsHelper->fclose($this->tempFilePointer); + } + } + + + /** + * Returns the string located at the given index from the cache. + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The shared string at the given index + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index + */ + public function getStringAtIndex($sharedStringIndex) + { + $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); + $indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile; + + if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { + throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); + } + + if ($this->inMemoryTempFilePath !== $tempFilePath) { + // free memory + unset($this->inMemoryTempFileContents); + + $this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); + $this->inMemoryTempFilePath = $tempFilePath; + } + + $sharedString = null; + if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { + $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; + $sharedString = $this->unescapeLineFeed($escapedSharedString); + } + + if ($sharedString === null) { + throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); + } + + return rtrim($sharedString, PHP_EOL); + } + + /** + * Escapes the line feed characters (\n) + * + * @param string $unescapedString + * @return string + */ + private function escapeLineFeed($unescapedString) + { + return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString); + } + + /** + * Unescapes the line feed characters (\n) + * + * @param string $escapedString + * @return string + */ + private function unescapeLineFeed($escapedString) + { + return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString); + } + + /** + * Destroys the cache, freeing memory and removing any created artifacts + * + * @return void + */ + public function clearCache() + { + if ($this->tempFolder) { + $this->fileSystemHelper->deleteFolderRecursively($this->tempFolder); + } + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php new file mode 100644 index 0000000..c6a5321 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php @@ -0,0 +1,83 @@ +inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount); + $this->isCacheClosed = false; + } + + /** + * Adds the given string to the cache. + * + * @param string $sharedString The string to be added to the cache + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return void + */ + public function addStringForIndex($sharedString, $sharedStringIndex) + { + if (!$this->isCacheClosed) { + $this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString); + } + } + + /** + * Closes the cache after the last shared string was added. + * This prevents any additional string from being added to the cache. + * + * @return void + */ + public function closeCache() + { + $this->isCacheClosed = true; + } + + /** + * Returns the string located at the given index from the cache. + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The shared string at the given index + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index + */ + public function getStringAtIndex($sharedStringIndex) + { + try { + return $this->inMemoryCache->offsetGet($sharedStringIndex); + } catch (\RuntimeException $e) { + throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); + } + } + + /** + * Destroys the cache, freeing memory and removing any created artifacts + * + * @return void + */ + public function clearCache() + { + unset($this->inMemoryCache); + $this->isCacheClosed = false; + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php new file mode 100644 index 0000000..5c8fb46 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -0,0 +1,280 @@ +filePath = $filePath; + $this->tempFolder = $tempFolder; + } + + /** + * Returns whether the XLSX file contains a shared strings XML file + * + * @return bool + */ + public function hasSharedStrings() + { + $hasSharedStrings = false; + $zip = new \ZipArchive(); + + if ($zip->open($this->filePath) === true) { + $hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false); + $zip->close(); + } + + return $hasSharedStrings; + } + + /** + * Builds an in-memory array containing all the shared strings of the sheet. + * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. + * It is then accessed by the sheet data, via the string index in the built table. + * + * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx + * + * The XML file can be really big with sheets containing a lot of data. That is why + * we need to use a XML reader that provides streaming like the XMLReader library. + * Please note that SimpleXML does not provide such a functionality but since it is faster + * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. + * + * @return void + * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read + */ + public function extractSharedStrings() + { + $xmlReader = new \XMLReader(); + $sharedStringIndex = 0; + $escaper = new \Box\Spout\Common\Escaper\XLSX(); + + $sharedStringsFilePath = $this->getSharedStringsFilePath(); + if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) { + throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); + } + + $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); + $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); + + while ($xmlReader->read() && $xmlReader->name !== 'si') { + // do nothing until a 'si' tag is reached + } + + while ($xmlReader->name === 'si') { + $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); + $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); + + // removes nodes that should not be read, like the pronunciation of the Kanji characters + $cleanNode = $this->removeSuperfluousTextNodes($node); + + // find all text nodes 't'; there can be multiple if the cell contains formatting + $textNodes = $cleanNode->xpath('//ns:t'); + + $textValue = ''; + foreach ($textNodes as $textNode) { + if ($this->shouldPreserveWhitespace($textNode)) { + $textValue .= $textNode->__toString(); + } else { + $textValue .= trim($textNode->__toString()); + } + } + + $unescapedTextValue = $escaper->unescape($textValue); + $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); + + $sharedStringIndex++; + + // jump to the next 'si' tag + $xmlReader->next('si'); + } + + $this->cachingStrategy->closeCache(); + + $xmlReader->close(); + } + + /** + * @return string The path to the shared strings XML file + */ + protected function getSharedStringsFilePath() + { + return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH; + } + + /** + * Returns the shared strings unique count, as specified in tag. + * + * @param \XMLReader $xmlReader XMLReader instance + * @return int Number of unique shared strings in the sharedStrings.xml file + * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read + */ + protected function getSharedStringsUniqueCount($xmlReader) + { + // Use internal errors to avoid displaying lots of warning messages in case of invalid file + // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks + libxml_clear_errors(); + libxml_use_internal_errors(true); + + $xmlReader->next('sst'); + + // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) + while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) { + $xmlReader->read(); + } + + $readError = libxml_get_last_error(); + if ($readError !== false) { + throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readError->message}]"); + } + + // reset the setting to display XML warnings/errors + libxml_use_internal_errors(false); + + return intval($xmlReader->getAttribute('uniqueCount')); + } + + /** + * Returns the best shared strings caching strategy. + * + * @param int $sharedStringsUniqueCount + * @return CachingStrategyInterface + */ + protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) + { + return CachingStrategyFactory::getInstance() + ->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder); + } + + /** + * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. + * This is to simplify the parsing of the subtree. + * + * @param \XMLReader $xmlReader + * @return \SimpleXMLElement + * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read + */ + protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) + { + // Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node. + // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks + libxml_clear_errors(); + libxml_use_internal_errors(true); + + $node = null; + try { + $node = new \SimpleXMLElement($xmlReader->readOuterXml()); + } catch (\Exception $exception) { + $error = libxml_get_last_error(); + libxml_use_internal_errors(false); + + throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].'); + } + + libxml_use_internal_errors(false); + + return $node; + } + + /** + * Removes nodes that should not be read, like the pronunciation of the Kanji characters. + * By keeping them, their text content would be added to the read string. + * + * @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove + * @return \SimpleXMLElement Cleaned parent node + */ + protected function removeSuperfluousTextNodes($parentNode) + { + $tagsToRemove = [ + 'rPh', // Pronunciation of the text + ]; + + foreach ($tagsToRemove as $tagToRemove) { + $xpath = '//ns:' . $tagToRemove; + $nodesToRemove = $parentNode->xpath($xpath); + + foreach ($nodesToRemove as $nodeToRemove) { + // This is how to remove a node from the XML + unset($nodeToRemove[0]); + } + } + + return $parentNode; + } + + /** + * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. + * + * @param \SimpleXMLElement $textNode The text node element () whitespace may be preserved + * @return bool Whether whitespace should be preserved + */ + protected function shouldPreserveWhitespace($textNode) + { + $shouldPreserveWhitespace = false; + + $attributes = $textNode->attributes('xml', true); + if ($attributes) { + foreach ($attributes as $attributeName => $attributeValue) { + if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') { + $shouldPreserveWhitespace = true; + break; + } + } + } + + return $shouldPreserveWhitespace; + } + + /** + * Returns the shared string at the given index, using the previously chosen caching strategy. + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The shared string at the given index + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index + */ + public function getStringAtIndex($sharedStringIndex) + { + return $this->cachingStrategy->getStringAtIndex($sharedStringIndex); + } + + /** + * Destroys the cache, freeing memory and removing any created artifacts + * + * @return void + */ + public function cleanup() + { + if ($this->cachingStrategy) { + $this->cachingStrategy->clearCache(); + } + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php new file mode 100644 index 0000000..3cbe9cb --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -0,0 +1,199 @@ +filePath = $filePath; + $this->sharedStringsHelper = $sharedStringsHelper; + $this->globalFunctionsHelper = $globalFunctionsHelper; + } + + /** + * Returns the sheets metadata of the file located at the previously given file path. + * The paths to the sheets' data are read from the [Content_Types].xml file. + * + * @return Sheet[] Sheets within the XLSX file + */ + public function getSheets() + { + $sheets = []; + + $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::CONTENT_TYPES_XML_FILE_PATH, + self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML + ); + + // find all nodes defining a sheet + $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); + + for ($i = 0; $i < count($sheetNodes); $i++) { + $sheetNode = $sheetNodes[$i]; + $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; + + $sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i); + } + + return $sheets; + } + + /** + * Returns an instance of a sheet, given the path of its data XML file. + * We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet. + * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. + * The entry contains the ID and name of the sheet. + * + * If this piece of data can't be found by parsing the different XML files, the ID will default + * to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will + * default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2"). + * + * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) + * @return \Box\Spout\Reader\Sheet Sheet instance + */ + protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased) + { + $sheetId = $sheetIndexZeroBased + 1; + $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); + + /* + * In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" + * In workbook.xml.rels, it is only "worksheets/sheet1.xml" + */ + $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); + + // find the node associated to the given file path + $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); + $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); + + if (count($relationshipNodes) === 1) { + $relationshipNode = $relationshipNodes[0]; + $sheetId = (string) $relationshipNode->attributes()->Id; + + $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); + $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]'); + + if (count($sheetNodes) === 1) { + $sheetNode = $sheetNodes[0]; + $sheetId = (int) $sheetNode->attributes()->sheetId; + $escapedSheetName = (string) $sheetNode->attributes()->name; + + $escaper = new \Box\Spout\Common\Escaper\XLSX(); + $sheetName = $escaper->unescape($escapedSheetName); + } + } + + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetId, $sheetIndexZeroBased, $sheetName); + } + + /** + * Returns the default name of the sheet whose data is located + * at the given path. + * + * @param $sheetDataXMLFilePath + * @return string The default sheet name + */ + protected function getDefaultSheetName($sheetDataXMLFilePath) + { + return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION); + } + + /** + * Returns a representation of the workbook.xml.rels file, ready to be parsed. + * The returned value is cached. + * + * @return \SimpleXMLElement XML element representating the workbook.xml.rels file + */ + protected function getWorkbookXMLRelsAsXMLElement() + { + if (!$this->workbookXMLRelsAsXMLElement) { + $this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::WORKBOOK_XML_RELS_FILE_PATH, + self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS + ); + } + + return $this->workbookXMLRelsAsXMLElement; + } + + /** + * Returns a representation of the workbook.xml file, ready to be parsed. + * The returned value is cached. + * + * @return \SimpleXMLElement XML element representating the workbook.xml.rels file + */ + protected function getWorkbookXMLAsXMLElement() + { + if (!$this->workbookXMLAsXMLElement) { + $this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::WORKBOOK_XML_FILE_PATH, + self::MAIN_NAMESPACE_FOR_WORKBOOK_XML + ); + } + + return $this->workbookXMLAsXMLElement; + } + + /** + * Loads the contents of the given file in an XML parser and register the given XPath namespace. + * + * @param string $xmlFilePath The path of the XML file inside the XLSX file + * @param string $mainNamespace The main XPath namespace to register + * @return \SimpleXMLElement The XML element representing the file + */ + protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace) + { + $xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath); + + $xmlElement = new \SimpleXMLElement($xmlContents); + $xmlElement->registerXPathNamespace('ns', $mainNamespace); + + return $xmlElement; + } +} diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php new file mode 100644 index 0000000..68712cc --- /dev/null +++ b/src/Spout/Reader/XLSX/Reader.php @@ -0,0 +1,93 @@ +tempFolder = $tempFolder; + return $this; + } + + /** + * Opens the file at the given file path to make it ready to be read. + * It also parses the sharedStrings.xml file to get all the shared strings available in memory + * and fetches all the available sheets. + * + * @param string $filePath Path of the file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read + * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file + */ + protected function openReader($filePath) + { + $this->zip = new \ZipArchive(); + + if ($this->zip->open($filePath) === true) { + $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); + + if ($this->sharedStringsHelper->hasSharedStrings()) { + // Extracts all the strings from the sheets for easy access in the future + $this->sharedStringsHelper->extractSharedStrings(); + } + + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper); + } else { + throw new IOException('Could not open ' . $filePath . ' for reading.'); + } + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getSheetIterator() + { + return $this->sheetIterator; + } + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->zip) { + $this->zip->close(); + } + + if ($this->sharedStringsHelper) { + $this->sharedStringsHelper->cleanup(); + } + } +} diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php new file mode 100644 index 0000000..e96898f --- /dev/null +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -0,0 +1,356 @@ +filePath = $filePath; + $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); + $this->sharedStringsHelper = $sharedStringsHelper; + + $this->xmlReader = new \XMLReader(); + $this->escaper = new \Box\Spout\Common\Escaper\XLSX(); + } + + /** + * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @return string Path of the XML file containing the sheet data, + * without the leading slash. + */ + protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) + { + return ltrim($sheetDataXMLFilePath, '/'); + } + + /** + * Rewind the Iterator to the first element. + * Initializes the XMLReader object that reads the associated sheet data. + * The XMLReader is configured to be safe from billion laughs attack. + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read + */ + public function rewind() + { + $this->xmlReader->close(); + + $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath; + if ($this->xmlReader->open($sheetDataFilePath, null, LIBXML_NONET) === false) { + throw new IOException('Could not open "' . $this->sheetDataXMLFilePath . '".'); + } + + $this->numReadRows = 0; + $this->rowDataBuffer = null; + $this->hasReachedEndOfFile = false; + $this->numColumns = 0; + + $this->next(); + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows will be skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + */ + public function next() + { + $isInsideRowTag = false; + $rowData = []; + + while ($this->xmlReader->read()) { + if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) { + // Read dimensions of the sheet + $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) + if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { + $lastCellIndex = $matches[1]; + $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + } + + } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + // Start of the row description + $isInsideRowTag = true; + + // Read spans info if present + $numberOfColumnsForRow = $this->numColumns; + $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance + if ($spans) { + list(, $numberOfColumnsForRow) = explode(':', $spans); + $numberOfColumnsForRow = intval($numberOfColumnsForRow); + } + $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + + } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) { + // Start of a cell description + $currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); + $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); + + $node = $this->xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node); + + } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + // End of the row description + // If needed, we fill the empty cells + $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); + $this->numReadRows++; + break; + + } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + } + + $this->rowDataBuffer = $rowData; + } + + /** + * Returns the cell's string value from a node's nested value node + * + * @param \DOMNode $node + * @return string The value associated with the cell + */ + protected function getVNodeValue($node) + { + // for cell types having a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); + if ($vNode !== null) { + return $vNode->nodeValue; + } + return ""; + } + + /** + * Returns the cell String value where string is inline. + * + * @param \DOMNode $node + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatInlineStringCellValue($node) + { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value from shared-strings file using nodeValue index. + * + * @param string $nodeValue + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatSharedStringCellValue($nodeValue) + { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($nodeValue); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value, where string is stored in value node. + * + * @param string $nodeValue + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatStrCellValue($nodeValue) + { + $escapedCellValue = trim($nodeValue); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value from string of nodeValue. + * + * @param string $nodeValue + * @return int|float The value associated with the cell + */ + protected function formatNumericCellValue($nodeValue) + { + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value from a specific node's Value. + * + * @param string $nodeValue + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue($nodeValue) + { + // !! is similar to boolval() + $cellValue = !!$nodeValue; + return $cellValue; + } + + /** + * Returns a cell's PHP Date value, associated to the given stored nodeValue. + * + * @param string $nodeValue + * @return \DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function formatDateCellValue($nodeValue) + { + // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + try { + $cellValue = new \DateTime($nodeValue); + return $cellValue; + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. + * + * @param \DOMNode $node + * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function getCellValue($node) + { + // Default cell type is "n" + $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; + $vNodeValue = $this->getVNodeValue($node); + + if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { + return $vNodeValue; + } + + switch ($cellType) { + case self::CELL_TYPE_INLINE_STRING: + return $this->formatInlineStringCellValue($node); + case self::CELL_TYPE_SHARED_STRING: + return $this->formatSharedStringCellValue($vNodeValue); + case self::CELL_TYPE_STR: + return $this->formatStrCellValue($vNodeValue); + case self::CELL_TYPE_BOOLEAN: + return $this->formatBooleanCellValue($vNodeValue); + case self::CELL_TYPE_NUMERIC: + return $this->formatNumericCellValue($vNodeValue); + case self::CELL_TYPE_DATE: + return $this->formatDateCellValue($vNodeValue); + default: + return null; + } + } + + /** + * Return the current element, from the buffer. + * @link http://php.net/manual/en/iterator.current.php + * + * @return array|null + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + $this->xmlReader->close(); + } +} diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php new file mode 100644 index 0000000..e2eebec --- /dev/null +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -0,0 +1,74 @@ +rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper); + $this->id = $sheetId; + $this->index = $sheetIndex; + $this->name = $sheetName; + } + + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } + + /** + * @return int ID of the sheet + */ + public function getId() + { + return $this->id; + } + + /** + * @return int Index of the sheet, based on order of creation (zero-based) + */ + public function getIndex() + { + return $this->index; + } + + /** + * @return string Name of the sheet + */ + public function getName() + { + return $this->name; + } +} diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php new file mode 100644 index 0000000..aae58c2 --- /dev/null +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -0,0 +1,112 @@ +sheets = $sheetHelper->getSheets(); + + if (count($this->sheets) === 0) { + throw new NoSheetsFoundException('The file must contain at least one sheet.'); + } + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->currentSheetIndex = 0; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return ($this->currentSheetIndex < count($this->sheets)); + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + if (array_key_exists($this->currentSheetIndex, $this->sheets)) { + $currentSheet = $this->sheets[$this->currentSheetIndex]; + $currentSheet->getRowIterator()->end(); + + $this->currentSheetIndex++; + } + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + return $this->sheets[$this->currentSheetIndex]; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->currentSheetIndex + 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // make sure we are not leaking memory in case the iteration stopped before the end + foreach ($this->sheets as $sheet) { + $sheet->getRowIterator()->end(); + } + } +} diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php new file mode 100644 index 0000000..de55b94 --- /dev/null +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -0,0 +1,181 @@ +open('/path/to/fake/file.csv'); + } + + /** + * @expectedException \Box\Spout\Common\Exception\IOException + * + * @return void + */ + public function testOpenShouldThrowExceptionIfFileNotReadable() + { + $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') + ->setMethods(['is_readable']) + ->getMock(); + $helperStub->method('is_readable')->willReturn(false); + + $resourcePath = $this->getResourcePath('csv_standard.csv'); + + $reader = ReaderFactory2::create(Type::CSV); + $reader->setGlobalFunctionsHelper($helperStub); + $reader->open($resourcePath); + } + + + /** + * @return void + */ + public function testReadStandardCSV() + { + $allRows = $this->getAllRowsForFile('csv_standard.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldNotStopAtCommaIfEnclosed() + { + $allRows = $this->getAllRowsForFile('csv_with_comma_enclosed.csv'); + $this->assertEquals('This is, a comma', $allRows[0][0]); + } + + /** + * @return void + */ + public function testReadShouldKeepEmptyCells() + { + $allRows = $this->getAllRowsForFile('csv_with_empty_cells.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', '', 'csv--23'], + ['csv--31', 'csv--32', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyLines() + { + $allRows = $this->getAllRowsForFile('csv_with_empty_line.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldHaveTheRightNumberOfCells() + { + $allRows = $this->getAllRowsForFile('csv_with_different_cells_number.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22'], + ['csv--31'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportCustomFieldDelimiter() + { + $allRows = $this->getAllRowsForFile('csv_delimited_with_pipes.csv', '|'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportCustomFieldEnclosure() + { + $allRows = $this->getAllRowsForFile('csv_text_enclosed_with_pound.csv', ',', '#'); + $this->assertEquals('This is, a comma', $allRows[0][0]); + } + + /** + * @return void + */ + public function testReadShouldSkipUtf8Bom() + { + $allRows = $this->getAllRowsForFile('csv_with_utf8_bom.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @param string $fileName + * @param string|void $fieldDelimiter + * @param string|void $fieldEnclosure + * @return array All the read rows the given file + */ + private function getAllRowsForFile($fileName, $fieldDelimiter = ",", $fieldEnclosure = '"') + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + $reader = ReaderFactory2::create(Type::CSV); + $reader->setFieldDelimiter($fieldDelimiter); + $reader->setFieldEnclosure($fieldEnclosure); + + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; + } + } + + $reader->close(); + + return $allRows; + } +} diff --git a/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php new file mode 100644 index 0000000..ff417b9 --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php @@ -0,0 +1,60 @@ + 1, 3 => 3]; + $filledArray = CellHelper::fillMissingArrayIndexes($arrayToFill, 'FILL'); + + $expectedFilledArray = ['FILL', 1, 'FILL', 3]; + $this->assertEquals($expectedFilledArray, $filledArray); + } + + /** + * @return array + */ + public function dataProviderForTestGetColumnIndexFromCellIndex() + { + return [ + ['A1', 0], + ['Z3', 25], + ['AA5', 26], + ['AB24', 27], + ['BC5', 54], + ['BCZ99', 1455], + ]; + } + + /** + * @dataProvider dataProviderForTestGetColumnIndexFromCellIndex + * + * @param string $cellIndex + * @param int $expectedColumnIndex + * @return void + */ + public function testGetColumnIndexFromCellIndex($cellIndex, $expectedColumnIndex) + { + $this->assertEquals($expectedColumnIndex, CellHelper::getColumnIndexFromCellIndex($cellIndex)); + } + + /** + * @expectedException \Box\Spout\Common\Exception\InvalidArgumentException + * + * @return void + */ + public function testGetColumnIndexFromCellIndexShouldThrowIfInvalidCellIndex() + { + CellHelper::getColumnIndexFromCellIndex('InvalidCellIndex'); + } +} diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php new file mode 100644 index 0000000..ea77b4f --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php @@ -0,0 +1,99 @@ +getMockBuilder('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory') + ->disableOriginalConstructor() + ->setMethods(['getMemoryLimitInKB']) + ->getMock(); + + $factoryStub->method('getMemoryLimitInKB')->willReturn($memoryLimitInKB); + + \ReflectionHelper::setStaticValue('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub); + + $strategy = $factoryStub->getBestCachingStrategy($sharedStringsUniqueCount, null); + + $fullExpectedStrategyClassName = 'Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\\' . $expectedStrategyClassName; + $this->assertEquals($fullExpectedStrategyClassName, get_class($strategy)); + + $strategy->clearCache(); + \ReflectionHelper::reset(); + } + + /** + * @return array + */ + public function dataProviderForTestGetMemoryLimitInKB() + { + return [ + ['-1', -1], + ['invalid', -1], + ['1024B', 1], + ['128K', 128], + ['256KB', 256], + ['512M', 512 * 1024], + ['2MB', 2 * 1024], + ['1G', 1 * 1024 * 1024], + ['10GB', 10 * 1024 * 1024], + ['2T', 2 * 1024 * 1024 * 1024], + ['5TB', 5 * 1024 * 1024 * 1024], + ]; + } + + /** + * @dataProvider dataProviderForTestGetMemoryLimitInKB + * + * @param string $memoryLimitFormatted + * @param float $expectedMemoryLimitInKB + * @return void + */ + public function testGetMemoryLimitInKB($memoryLimitFormatted, $expectedMemoryLimitInKB) + { + /** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */ + $factoryStub = $this + ->getMockBuilder('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory') + ->disableOriginalConstructor() + ->setMethods(['getMemoryLimitFromIni']) + ->getMock(); + + $factoryStub->method('getMemoryLimitFromIni')->willReturn($memoryLimitFormatted); + + $memoryLimitInKB = \ReflectionHelper::callMethodOnObject($factoryStub, 'getMemoryLimitInKB'); + + $this->assertEquals($expectedMemoryLimitInKB, $memoryLimitInKB); + } +} diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php new file mode 100644 index 0000000..a72d19a --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php @@ -0,0 +1,112 @@ +getResourcePath('one_sheet_with_shared_strings.xlsx'); + $this->sharedStringsHelper = new SharedStringsHelper($resourcePath); + } + + /** + * @return void + */ + public function tearDown() + { + $this->sharedStringsHelper->cleanup(); + } + + /** + * @expectedException \Box\Spout\Reader\Exception\SharedStringNotFoundException + * @return void + */ + public function testGetStringAtIndexShouldThrowExceptionIfStringNotFound() + { + $this->sharedStringsHelper->extractSharedStrings(); + $this->sharedStringsHelper->getStringAtIndex(PHP_INT_MAX); + } + + /** + * @return void + */ + public function testGetStringAtIndexShouldReturnTheCorrectStringIfFound() + { + $this->sharedStringsHelper->extractSharedStrings(); + + $sharedString = $this->sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('s1--A1', $sharedString); + + $sharedString = $this->sharedStringsHelper->getStringAtIndex(24); + $this->assertEquals('s1--E5', $sharedString); + + $usedCachingStrategy = \ReflectionHelper::getValueOnObject($this->sharedStringsHelper, 'cachingStrategy'); + $this->assertTrue($usedCachingStrategy instanceof InMemoryStrategy); + } + + /** + * @return void + */ + public function testGetStringAtIndexShouldWorkWithMultilineStrings() + { + $resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals("s1\nA1", $sharedString); + + $sharedString = $sharedStringsHelper->getStringAtIndex(24); + $this->assertEquals("s1\nE5", $sharedString); + + $sharedStringsHelper->cleanup(); + } + + /** + * @return void + */ + public function testGetStringAtIndexWithFileBasedStrategy() + { + // force the file-based strategy by setting no memory limit + $originalMemoryLimit = ini_get('memory_limit'); + ini_set('memory_limit', '-1'); + + $resourcePath = $this->getResourcePath('sheet_with_lots_of_shared_strings.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('str', $sharedString); + + $sharedString = $sharedStringsHelper->getStringAtIndex(CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE + 1); + $this->assertEquals('str', $sharedString); + + $usedCachingStrategy = \ReflectionHelper::getValueOnObject($sharedStringsHelper, 'cachingStrategy'); + $this->assertTrue($usedCachingStrategy instanceof FileBasedStrategy); + + $sharedStringsHelper->cleanup(); + + ini_set('memory_limit', $originalMemoryLimit); + } +} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php new file mode 100644 index 0000000..c5fb583 --- /dev/null +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -0,0 +1,300 @@ +getAllRowsForFile($filePath); + } + + /** + * @return array + */ + public function dataProviderForTestReadForAllWorksheets() + { + return [ + ['one_sheet_with_shared_strings.xlsx', 5, 5], + ['one_sheet_with_inline_strings.xlsx', 5, 5], + ['two_sheets_with_shared_strings.xlsx', 10, 5], + ['two_sheets_with_inline_strings.xlsx', 10, 5] + ]; + } + + /** + * @dataProvider dataProviderForTestReadForAllWorksheets + * + * @param string $resourceName + * @param int $expectedNumOfRows + * @param int $expectedNumOfCellsPerRow + * @return void + */ + public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow) + { + $allRows = $this->getAllRowsForFile($resourceName); + + $this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows"); + foreach ($allRows as $row) { + $this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row"); + } + } + + /** + * @return void + */ + public function testReadShouldSupportFilesWithoutSharedStringsFile() + { + $allRows = $this->getAllRowsForFile('sheet_with_no_shared_strings_file.xlsx'); + + $expectedRows = [ + [10, 11], + [20, 21], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportAllCellTypes() + { + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); + + $expectedRows = [ + [ + 's1--A1', 's1--A2', + false, true, + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'), + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), + 10, 10.43, + null, + 'weird string', // valid 'str' string + null, // invalid date + ], + ['', '', '', '', '', '', '', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldKeepEmptyCellsAtTheEndIfDimensionsSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_without_dimensions_but_spans_and_empty_cells.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be 2 rows'); + foreach ($allRows as $row) { + $this->assertEquals(5, count($row), 'There should be 5 cells for every row, because empty rows should be preserved'); + } + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldKeepEmptyCellsAtTheEndIfNoDimensionsButSpansSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be 2 rows'); + $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); + $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyCellsAtTheEndIfDimensionsNotSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be 2 rows'); + $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); + $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyRows() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportEmptySharedString() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_shared_string.xlsx'); + + $expectedRows = [ + ['s1--A1', '', 's1--C1'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveSpaceIfSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_with_preserve_space_shared_strings.xlsx'); + + $expectedRows = [ + [' s1--A1', 's1--B1 ', ' s1--C1 '], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipPronunciationData() + { + $allRows = $this->getAllRowsForFile('sheet_with_pronunciation.xlsx'); + + $expectedRow = ['名前', '一二三四']; + $this->assertEquals($expectedRow, $allRows[0], 'Pronunciation data should be removed.'); + } + + + /** + * @return array + */ + public function dataProviderForTestReadShouldBeProtectedAgainstAttacks() + { + return [ + ['attack_billion_laughs.xlsx'], + ['attack_quadratic_blowup.xlsx'], + ]; + } + + /** + * @dataProvider dataProviderForTestReadShouldBeProtectedAgainstAttacks + * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) + * + * @param string $fileName + * @return void + */ + public function testReadShouldBeProtectedAgainstAttacks($fileName) + { + $startTime = microtime(true); + + try { + $this->getAllRowsForFile($fileName); + $this->fail('An exception should have been thrown'); + } catch (IOException $exception) { + $duration = microtime(true) - $startTime; + $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); + + $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB + $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); + } + } + + /** + * @return void + */ + public function testReadShouldBeAbleToProcessEmptySheets() + { + $allRows = $this->getAllRowsForFile('sheet_with_no_cells.xlsx'); + $this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.'); + } + + /** + * @return void + */ + public function testReadShouldSkipFormulas() + { + $allRows = $this->getAllRowsForFile('sheet_with_formulas.xlsx'); + + $expectedRows = [ + ['val1', 'val2', 'total1', 'total2'], + [10, 20, 30, 21], + [11, 21, 32, 41], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @param string $fileName + * @return array All the read rows the given file + */ + private function getAllRowsForFile($fileName) + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + $reader = ReaderFactory2::create(Type::XLSX); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; + } + } + + $reader->close(); + + return $allRows; + } +} diff --git a/tests/Spout/Reader/XLSX/SheetTest.php b/tests/Spout/Reader/XLSX/SheetTest.php new file mode 100644 index 0000000..c9449f4 --- /dev/null +++ b/tests/Spout/Reader/XLSX/SheetTest.php @@ -0,0 +1,53 @@ +openFileAndReturnSheets('two_sheets_with_custom_names.xlsx'); + + $this->assertEquals('CustomName1', $sheets[0]->getName()); + $this->assertEquals(0, $sheets[0]->getIndex()); + $this->assertEquals(1, $sheets[0]->getId()); + + $this->assertEquals('CustomName2', $sheets[1]->getName()); + $this->assertEquals(1, $sheets[1]->getIndex()); + $this->assertEquals(2, $sheets[1]->getId()); + } + + /** + * @param string $fileName + * @return Sheet[] + */ + private function openFileAndReturnSheets($fileName) + { + $resourcePath = $this->getResourcePath($fileName); + $reader = ReaderFactory2::create(Type::XLSX); + $reader->open($resourcePath); + + $sheets = []; + foreach ($reader->getSheetIterator() as $sheet) { + $sheets[] = $sheet; + } + + $reader->close(); + + return $sheets; + } +}