From ae3ee357ff5c502d1c4725b6e83e13c7eaf65c03 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Wed, 15 Jul 2015 00:22:37 -0700 Subject: [PATCH 1/7] Moved readers to iterators Instead of the hasNext() / next() syntax, readers now implements the PHP iterator pattern. It allows readers to be used with a foreach() loop. All readers now share the same structure (CSV is treated as having exactly one sheet): - one concrete Reader - one SheetIterator, exposed by the Reader - one or more Sheets, returned at every iteration - one RowIterator, exposed by the Sheet Introducing the concept of sheets for CSV may be kind of confusing but it makes Spout way more consistent. Also, this confusion may be resolved by creating a wrapper around the readers if needed. -- This commit does not delete the old files, not change the folder structure for Writers. This will be done in another commit. --- src/Spout/Reader/AbstractReader2.php | 111 ++++++ src/Spout/Reader/CSV/Reader.php | 95 +++++ src/Spout/Reader/CSV/RowIterator.php | 163 ++++++++ src/Spout/Reader/CSV/Sheet.php | 35 ++ src/Spout/Reader/CSV/SheetIterator.php | 96 +++++ .../Exception/NoSheetsFoundException.php | 12 + src/Spout/Reader/IteratorInterface.php | 18 + src/Spout/Reader/ReaderFactory2.php | 44 +++ src/Spout/Reader/ReaderInterface2.php | 35 ++ src/Spout/Reader/SheetInterface.php | 18 + src/Spout/Reader/XLSX/Helper/CellHelper.php | 97 +++++ .../CachingStrategyFactory.php | 154 ++++++++ .../CachingStrategyInterface.php | 44 +++ .../FileBasedStrategy.php | 188 +++++++++ .../SharedStringsCaching/InMemoryStrategy.php | 83 ++++ .../XLSX/Helper/SharedStringsHelper.php | 280 ++++++++++++++ src/Spout/Reader/XLSX/Helper/SheetHelper.php | 199 ++++++++++ src/Spout/Reader/XLSX/Reader.php | 93 +++++ src/Spout/Reader/XLSX/RowIterator.php | 356 ++++++++++++++++++ src/Spout/Reader/XLSX/Sheet.php | 74 ++++ src/Spout/Reader/XLSX/SheetIterator.php | 112 ++++++ tests/Spout/Reader/CSV/ReaderTest.php | 181 +++++++++ .../Reader/XLSX/Helper/CellHelperTest.php | 60 +++ .../CachingStrategyFactoryTest.php | 99 +++++ .../XLSX/Helper/SharedStringsHelperTest.php | 112 ++++++ tests/Spout/Reader/XLSX/ReaderTest.php | 300 +++++++++++++++ tests/Spout/Reader/XLSX/SheetTest.php | 53 +++ 27 files changed, 3112 insertions(+) create mode 100644 src/Spout/Reader/AbstractReader2.php create mode 100644 src/Spout/Reader/CSV/Reader.php create mode 100644 src/Spout/Reader/CSV/RowIterator.php create mode 100644 src/Spout/Reader/CSV/Sheet.php create mode 100644 src/Spout/Reader/CSV/SheetIterator.php create mode 100644 src/Spout/Reader/Exception/NoSheetsFoundException.php create mode 100644 src/Spout/Reader/IteratorInterface.php create mode 100644 src/Spout/Reader/ReaderFactory2.php create mode 100644 src/Spout/Reader/ReaderInterface2.php create mode 100644 src/Spout/Reader/SheetInterface.php create mode 100644 src/Spout/Reader/XLSX/Helper/CellHelper.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php create mode 100644 src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php create mode 100644 src/Spout/Reader/XLSX/Helper/SheetHelper.php create mode 100644 src/Spout/Reader/XLSX/Reader.php create mode 100644 src/Spout/Reader/XLSX/RowIterator.php create mode 100644 src/Spout/Reader/XLSX/Sheet.php create mode 100644 src/Spout/Reader/XLSX/SheetIterator.php create mode 100644 tests/Spout/Reader/CSV/ReaderTest.php create mode 100644 tests/Spout/Reader/XLSX/Helper/CellHelperTest.php create mode 100644 tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php create mode 100644 tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php create mode 100644 tests/Spout/Reader/XLSX/ReaderTest.php create mode 100644 tests/Spout/Reader/XLSX/SheetTest.php diff --git a/src/Spout/Reader/AbstractReader2.php b/src/Spout/Reader/AbstractReader2.php new file mode 100644 index 0000000..ef24412 --- /dev/null +++ b/src/Spout/Reader/AbstractReader2.php @@ -0,0 +1,111 @@ +globalFunctionsHelper = $globalFunctionsHelper; + return $this; + } + + /** + * Prepares the reader to read the given file. It also makes sure + * that the file exists and is readable. + * + * @param string $filePath Path of the file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted + */ + public function open($filePath) + { + if (!$this->isPhpStream($filePath)) { + // we skip the checks if the provided file path points to a PHP stream + if (!$this->globalFunctionsHelper->file_exists($filePath)) { + throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.'); + } else if (!$this->globalFunctionsHelper->is_readable($filePath)) { + throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.'); + } + } + + try { + $this->openReader($filePath); + $this->isStreamOpened = true; + } catch (\Exception $exception) { + throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')'); + } + } + + /** + * Checks if a path is a PHP stream (like php://output, php://memory, ...) + * + * @param string $filePath Path of the file to be read + * @return bool Whether the given path maps to a PHP stream + */ + protected function isPhpStream($filePath) + { + return (strpos($filePath, 'php://') === 0); + } + + /** + * Closes the reader, preventing any additional reading + * + * @return void + */ + public function close() + { + if ($this->isStreamOpened) { + $this->closeReader(); + + $sheetIterator = $this->getSheetIterator(); + if ($sheetIterator) { + $sheetIterator->end(); + } + + $this->isStreamOpened = false; + } + } +} diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php new file mode 100644 index 0000000..3b164d5 --- /dev/null +++ b/src/Spout/Reader/CSV/Reader.php @@ -0,0 +1,95 @@ +fieldDelimiter = $fieldDelimiter; + return $this; + } + + /** + * Sets the field enclosure for the CSV. + * Needs to be called before opening the reader. + * + * @param string $fieldEnclosure Character that enclose fields + * @return Reader + */ + public function setFieldEnclosure($fieldEnclosure) + { + $this->fieldEnclosure = $fieldEnclosure; + return $this; + } + + /** + * Opens the file at the given path to make it ready to be read. + * The file must be UTF-8 encoded. + * @TODO add encoding detection/conversion + * + * @param string $filePath Path of the CSV file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException + */ + protected function openReader($filePath) + { + $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); + if (!$this->filePointer) { + throw new IOException('Could not open file ' . $filePath . ' for reading.'); + } + + $this->sheetIterator = new SheetIterator($this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, $this->globalFunctionsHelper); + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getSheetIterator() + { + return $this->sheetIterator; + } + + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->filePointer) { + $this->globalFunctionsHelper->fclose($this->filePointer); + } + } +} diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php new file mode 100644 index 0000000..2316fa7 --- /dev/null +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -0,0 +1,163 @@ +filePointer = $filePointer; + $this->fieldDelimiter = $fieldDelimiter; + $this->fieldEnclosure = $fieldEnclosure; + $this->globalFunctionsHelper = $globalFunctionsHelper; + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->rewindAndSkipUtf8Bom(); + + $this->numReadRows = 0; + $this->rowDataBuffer = null; + + $this->next(); + } + + /** + * This rewinds and skips the UTF-8 BOM if inserted at the beginning of the file + * by moving the file pointer after it, so that it is not read. + * + * @return void + */ + protected function rewindAndSkipUtf8Bom() + { + $this->globalFunctionsHelper->rewind($this->filePointer); + + $hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM); + + if ($hasUtf8Bom) { + // we skip the 2 first bytes (so start from the 3rd byte) + $this->globalFunctionsHelper->fseek($this->filePointer, 3); + } else { + // if no BOM, reset the pointer to read from the beginning + $this->globalFunctionsHelper->fseek($this->filePointer, 0); + } + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return ($this->filePointer && !$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows are skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $lineData = null; + $this->hasReachedEndOfFile = feof($this->filePointer); + + if (!$this->hasReachedEndOfFile) { + do { + $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); + } while ($lineData && $this->isEmptyLine($lineData)); + + if ($lineData !== null) { + $this->rowDataBuffer = $lineData; + $this->numReadRows++; + } + } + } + + /** + * @param array $lineData Array containing the cells value for the line + * @return bool Whether the given line is empty + */ + protected function isEmptyLine($lineData) + { + return (count($lineData) === 1 && $lineData[0] === null); + } + + /** + * Return the current element from the buffer + * @link http://php.net/manual/en/iterator.current.php + * + * @return array + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // do nothing + } +} diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php new file mode 100644 index 0000000..207fcae --- /dev/null +++ b/src/Spout/Reader/CSV/Sheet.php @@ -0,0 +1,35 @@ +rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $globalFunctionsHelper); + } + + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } +} diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php new file mode 100644 index 0000000..f424cd8 --- /dev/null +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -0,0 +1,96 @@ +sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $globalFunctionsHelper); + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->hasReadUniqueSheet = false; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReadUniqueSheet); + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $this->hasReadUniqueSheet = true; + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + return $this->sheet; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // do nothing + } +} diff --git a/src/Spout/Reader/Exception/NoSheetsFoundException.php b/src/Spout/Reader/Exception/NoSheetsFoundException.php new file mode 100644 index 0000000..dfc4907 --- /dev/null +++ b/src/Spout/Reader/Exception/NoSheetsFoundException.php @@ -0,0 +1,12 @@ +setGlobalFunctionsHelper(new GlobalFunctionsHelper()); + + return $reader; + } +} diff --git a/src/Spout/Reader/ReaderInterface2.php b/src/Spout/Reader/ReaderInterface2.php new file mode 100644 index 0000000..a61c83c --- /dev/null +++ b/src/Spout/Reader/ReaderInterface2.php @@ -0,0 +1,35 @@ + 0 + * Z => 25 + * AA => 26 : (26^(2-1) * (0+1)) + 0 + * AB => 27 : (26^(2-1) * (0+1)) + 1 + * BC => 54 : (26^(2-1) * (1+1)) + 2 + * BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25 + */ + foreach (str_split($column) as $single_cell_index) + { + $currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue; + + if ($columnLength == 1) { + $columnIndex += $currentColumnIndex; + } else { + $columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1); + } + + $columnLength--; + } + + return $columnIndex; + } + + /** + * Returns whether a cell index is valid, in an Excel world. + * To be valid, the cell index should start with capital letters and be followed by numbers. + * + * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) + * @return bool + */ + protected static function isValidCellIndex($cellIndex) + { + return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1); + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php new file mode 100644 index 0000000..8fffdb0 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactory.php @@ -0,0 +1,154 @@ + 20 * 600 ≈ 12KB + */ + const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; + + /** + * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files + * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory + * and the string will be quickly retrieved. + * The performance bottleneck is not when creating these temporary files, but rather when loading their content. + * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works + * best when the indexes of the shared strings are sorted in the sheet data. + * 10,000 was chosen because it creates small files that are fast to be loaded in memory. + */ + const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; + + /** @var CachingStrategyFactory|null Singleton instance */ + protected static $instance = null; + + /** + * Private constructor for singleton + */ + private function __construct() + { + } + + /** + * Returns the singleton instance of the factory + * + * @return CachingStrategyFactory + */ + public static function getInstance() + { + if (self::$instance === null) { + self::$instance = new CachingStrategyFactory(); + } + + return self::$instance; + } + + /** + * Returns the best caching strategy, given the number of unique shared strings + * and the amount of memory available. + * + * @param int $sharedStringsUniqueCount Number of unique shared strings + * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored + * @return CachingStrategyInterface The best caching strategy + */ + public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null) + { + if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { + return new InMemoryStrategy($sharedStringsUniqueCount); + } else { + return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE); + } + } + + /** + * Returns whether it is safe to use in-memory caching, given the number of unique shared strings + * and the amount of memory available. + * + * @param int $sharedStringsUniqueCount Number of unique shared strings + * @return bool + */ + protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) + { + $memoryAvailable = $this->getMemoryLimitInKB(); + + if ($memoryAvailable === -1) { + // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe + return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE); + } else { + $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB; + return ($memoryAvailable > $memoryNeeded); + } + } + + /** + * Returns the PHP "memory_limit" in Kilobytes + * + * @return float + */ + protected function getMemoryLimitInKB() + { + $memoryLimitFormatted = $this->getMemoryLimitFromIni(); + $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted)); + + // No memory limit + if ($memoryLimitFormatted === '-1') { + return -1; + } + + if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { + $amount = intval($matches[1]); + $unit = $matches[2]; + + switch ($unit) { + case 'b': return ($amount / 1024); + case 'k': return $amount; + case 'm': return ($amount * 1024); + case 'g': return ($amount * 1024 * 1024); + case 't': return ($amount * 1024 * 1024 * 1024); + } + } + + return -1; + } + + /** + * Returns the formatted "memory_limit" value + * + * @return string + */ + protected function getMemoryLimitFromIni() + { + return ini_get('memory_limit'); + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php new file mode 100644 index 0000000..631222a --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyInterface.php @@ -0,0 +1,44 @@ +fileSystemHelper = new FileSystemHelper($rootTempFolder); + $this->tempFolder = $this->fileSystemHelper->createFolder($rootTempFolder, uniqid('sharedstrings')); + + $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; + + $this->globalFunctionsHelper = new GlobalFunctionsHelper(); + $this->tempFilePointer = null; + } + + /** + * Adds the given string to the cache. + * + * @param string $sharedString The string to be added to the cache + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return void + */ + public function addStringForIndex($sharedString, $sharedStringIndex) + { + $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); + + if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { + if ($this->tempFilePointer) { + $this->globalFunctionsHelper->fclose($this->tempFilePointer); + } + $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); + } + + // The shared string retrieval logic expects each cell data to be on one line only + // Encoding the line feed character allows to preserve this assumption + $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); + + $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); + } + + /** + * Returns the path for the temp file that should contain the string for the given index + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The temp file path for the given index + */ + protected function getSharedStringTempFilePath($sharedStringIndex) + { + $numTempFile = intval($sharedStringIndex / $this->maxNumStringsPerTempFile); + return $this->tempFolder . '/sharedstrings' . $numTempFile; + } + + /** + * Closes the cache after the last shared string was added. + * This prevents any additional string from being added to the cache. + * + * @return void + */ + public function closeCache() + { + // close pointer to the last temp file that was written + if ($this->tempFilePointer) { + $this->globalFunctionsHelper->fclose($this->tempFilePointer); + } + } + + + /** + * Returns the string located at the given index from the cache. + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The shared string at the given index + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index + */ + public function getStringAtIndex($sharedStringIndex) + { + $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); + $indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile; + + if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { + throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); + } + + if ($this->inMemoryTempFilePath !== $tempFilePath) { + // free memory + unset($this->inMemoryTempFileContents); + + $this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); + $this->inMemoryTempFilePath = $tempFilePath; + } + + $sharedString = null; + if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { + $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; + $sharedString = $this->unescapeLineFeed($escapedSharedString); + } + + if ($sharedString === null) { + throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); + } + + return rtrim($sharedString, PHP_EOL); + } + + /** + * Escapes the line feed characters (\n) + * + * @param string $unescapedString + * @return string + */ + private function escapeLineFeed($unescapedString) + { + return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString); + } + + /** + * Unescapes the line feed characters (\n) + * + * @param string $escapedString + * @return string + */ + private function unescapeLineFeed($escapedString) + { + return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString); + } + + /** + * Destroys the cache, freeing memory and removing any created artifacts + * + * @return void + */ + public function clearCache() + { + if ($this->tempFolder) { + $this->fileSystemHelper->deleteFolderRecursively($this->tempFolder); + } + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php new file mode 100644 index 0000000..c6a5321 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/InMemoryStrategy.php @@ -0,0 +1,83 @@ +inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount); + $this->isCacheClosed = false; + } + + /** + * Adds the given string to the cache. + * + * @param string $sharedString The string to be added to the cache + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return void + */ + public function addStringForIndex($sharedString, $sharedStringIndex) + { + if (!$this->isCacheClosed) { + $this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString); + } + } + + /** + * Closes the cache after the last shared string was added. + * This prevents any additional string from being added to the cache. + * + * @return void + */ + public function closeCache() + { + $this->isCacheClosed = true; + } + + /** + * Returns the string located at the given index from the cache. + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The shared string at the given index + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index + */ + public function getStringAtIndex($sharedStringIndex) + { + try { + return $this->inMemoryCache->offsetGet($sharedStringIndex); + } catch (\RuntimeException $e) { + throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); + } + } + + /** + * Destroys the cache, freeing memory and removing any created artifacts + * + * @return void + */ + public function clearCache() + { + unset($this->inMemoryCache); + $this->isCacheClosed = false; + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php new file mode 100644 index 0000000..5c8fb46 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -0,0 +1,280 @@ +filePath = $filePath; + $this->tempFolder = $tempFolder; + } + + /** + * Returns whether the XLSX file contains a shared strings XML file + * + * @return bool + */ + public function hasSharedStrings() + { + $hasSharedStrings = false; + $zip = new \ZipArchive(); + + if ($zip->open($this->filePath) === true) { + $hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false); + $zip->close(); + } + + return $hasSharedStrings; + } + + /** + * Builds an in-memory array containing all the shared strings of the sheet. + * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. + * It is then accessed by the sheet data, via the string index in the built table. + * + * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx + * + * The XML file can be really big with sheets containing a lot of data. That is why + * we need to use a XML reader that provides streaming like the XMLReader library. + * Please note that SimpleXML does not provide such a functionality but since it is faster + * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. + * + * @return void + * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read + */ + public function extractSharedStrings() + { + $xmlReader = new \XMLReader(); + $sharedStringIndex = 0; + $escaper = new \Box\Spout\Common\Escaper\XLSX(); + + $sharedStringsFilePath = $this->getSharedStringsFilePath(); + if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) { + throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); + } + + $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); + $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); + + while ($xmlReader->read() && $xmlReader->name !== 'si') { + // do nothing until a 'si' tag is reached + } + + while ($xmlReader->name === 'si') { + $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); + $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); + + // removes nodes that should not be read, like the pronunciation of the Kanji characters + $cleanNode = $this->removeSuperfluousTextNodes($node); + + // find all text nodes 't'; there can be multiple if the cell contains formatting + $textNodes = $cleanNode->xpath('//ns:t'); + + $textValue = ''; + foreach ($textNodes as $textNode) { + if ($this->shouldPreserveWhitespace($textNode)) { + $textValue .= $textNode->__toString(); + } else { + $textValue .= trim($textNode->__toString()); + } + } + + $unescapedTextValue = $escaper->unescape($textValue); + $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); + + $sharedStringIndex++; + + // jump to the next 'si' tag + $xmlReader->next('si'); + } + + $this->cachingStrategy->closeCache(); + + $xmlReader->close(); + } + + /** + * @return string The path to the shared strings XML file + */ + protected function getSharedStringsFilePath() + { + return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH; + } + + /** + * Returns the shared strings unique count, as specified in tag. + * + * @param \XMLReader $xmlReader XMLReader instance + * @return int Number of unique shared strings in the sharedStrings.xml file + * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read + */ + protected function getSharedStringsUniqueCount($xmlReader) + { + // Use internal errors to avoid displaying lots of warning messages in case of invalid file + // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks + libxml_clear_errors(); + libxml_use_internal_errors(true); + + $xmlReader->next('sst'); + + // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) + while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) { + $xmlReader->read(); + } + + $readError = libxml_get_last_error(); + if ($readError !== false) { + throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readError->message}]"); + } + + // reset the setting to display XML warnings/errors + libxml_use_internal_errors(false); + + return intval($xmlReader->getAttribute('uniqueCount')); + } + + /** + * Returns the best shared strings caching strategy. + * + * @param int $sharedStringsUniqueCount + * @return CachingStrategyInterface + */ + protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) + { + return CachingStrategyFactory::getInstance() + ->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder); + } + + /** + * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. + * This is to simplify the parsing of the subtree. + * + * @param \XMLReader $xmlReader + * @return \SimpleXMLElement + * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read + */ + protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) + { + // Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node. + // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks + libxml_clear_errors(); + libxml_use_internal_errors(true); + + $node = null; + try { + $node = new \SimpleXMLElement($xmlReader->readOuterXml()); + } catch (\Exception $exception) { + $error = libxml_get_last_error(); + libxml_use_internal_errors(false); + + throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].'); + } + + libxml_use_internal_errors(false); + + return $node; + } + + /** + * Removes nodes that should not be read, like the pronunciation of the Kanji characters. + * By keeping them, their text content would be added to the read string. + * + * @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove + * @return \SimpleXMLElement Cleaned parent node + */ + protected function removeSuperfluousTextNodes($parentNode) + { + $tagsToRemove = [ + 'rPh', // Pronunciation of the text + ]; + + foreach ($tagsToRemove as $tagToRemove) { + $xpath = '//ns:' . $tagToRemove; + $nodesToRemove = $parentNode->xpath($xpath); + + foreach ($nodesToRemove as $nodeToRemove) { + // This is how to remove a node from the XML + unset($nodeToRemove[0]); + } + } + + return $parentNode; + } + + /** + * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. + * + * @param \SimpleXMLElement $textNode The text node element () whitespace may be preserved + * @return bool Whether whitespace should be preserved + */ + protected function shouldPreserveWhitespace($textNode) + { + $shouldPreserveWhitespace = false; + + $attributes = $textNode->attributes('xml', true); + if ($attributes) { + foreach ($attributes as $attributeName => $attributeValue) { + if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') { + $shouldPreserveWhitespace = true; + break; + } + } + } + + return $shouldPreserveWhitespace; + } + + /** + * Returns the shared string at the given index, using the previously chosen caching strategy. + * + * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file + * @return string The shared string at the given index + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index + */ + public function getStringAtIndex($sharedStringIndex) + { + return $this->cachingStrategy->getStringAtIndex($sharedStringIndex); + } + + /** + * Destroys the cache, freeing memory and removing any created artifacts + * + * @return void + */ + public function cleanup() + { + if ($this->cachingStrategy) { + $this->cachingStrategy->clearCache(); + } + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php new file mode 100644 index 0000000..3cbe9cb --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -0,0 +1,199 @@ +filePath = $filePath; + $this->sharedStringsHelper = $sharedStringsHelper; + $this->globalFunctionsHelper = $globalFunctionsHelper; + } + + /** + * Returns the sheets metadata of the file located at the previously given file path. + * The paths to the sheets' data are read from the [Content_Types].xml file. + * + * @return Sheet[] Sheets within the XLSX file + */ + public function getSheets() + { + $sheets = []; + + $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::CONTENT_TYPES_XML_FILE_PATH, + self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML + ); + + // find all nodes defining a sheet + $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); + + for ($i = 0; $i < count($sheetNodes); $i++) { + $sheetNode = $sheetNodes[$i]; + $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; + + $sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i); + } + + return $sheets; + } + + /** + * Returns an instance of a sheet, given the path of its data XML file. + * We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet. + * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. + * The entry contains the ID and name of the sheet. + * + * If this piece of data can't be found by parsing the different XML files, the ID will default + * to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will + * default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2"). + * + * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) + * @return \Box\Spout\Reader\Sheet Sheet instance + */ + protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased) + { + $sheetId = $sheetIndexZeroBased + 1; + $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); + + /* + * In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" + * In workbook.xml.rels, it is only "worksheets/sheet1.xml" + */ + $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); + + // find the node associated to the given file path + $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); + $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); + + if (count($relationshipNodes) === 1) { + $relationshipNode = $relationshipNodes[0]; + $sheetId = (string) $relationshipNode->attributes()->Id; + + $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); + $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]'); + + if (count($sheetNodes) === 1) { + $sheetNode = $sheetNodes[0]; + $sheetId = (int) $sheetNode->attributes()->sheetId; + $escapedSheetName = (string) $sheetNode->attributes()->name; + + $escaper = new \Box\Spout\Common\Escaper\XLSX(); + $sheetName = $escaper->unescape($escapedSheetName); + } + } + + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetId, $sheetIndexZeroBased, $sheetName); + } + + /** + * Returns the default name of the sheet whose data is located + * at the given path. + * + * @param $sheetDataXMLFilePath + * @return string The default sheet name + */ + protected function getDefaultSheetName($sheetDataXMLFilePath) + { + return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION); + } + + /** + * Returns a representation of the workbook.xml.rels file, ready to be parsed. + * The returned value is cached. + * + * @return \SimpleXMLElement XML element representating the workbook.xml.rels file + */ + protected function getWorkbookXMLRelsAsXMLElement() + { + if (!$this->workbookXMLRelsAsXMLElement) { + $this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::WORKBOOK_XML_RELS_FILE_PATH, + self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS + ); + } + + return $this->workbookXMLRelsAsXMLElement; + } + + /** + * Returns a representation of the workbook.xml file, ready to be parsed. + * The returned value is cached. + * + * @return \SimpleXMLElement XML element representating the workbook.xml.rels file + */ + protected function getWorkbookXMLAsXMLElement() + { + if (!$this->workbookXMLAsXMLElement) { + $this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace( + self::WORKBOOK_XML_FILE_PATH, + self::MAIN_NAMESPACE_FOR_WORKBOOK_XML + ); + } + + return $this->workbookXMLAsXMLElement; + } + + /** + * Loads the contents of the given file in an XML parser and register the given XPath namespace. + * + * @param string $xmlFilePath The path of the XML file inside the XLSX file + * @param string $mainNamespace The main XPath namespace to register + * @return \SimpleXMLElement The XML element representing the file + */ + protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace) + { + $xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath); + + $xmlElement = new \SimpleXMLElement($xmlContents); + $xmlElement->registerXPathNamespace('ns', $mainNamespace); + + return $xmlElement; + } +} diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php new file mode 100644 index 0000000..68712cc --- /dev/null +++ b/src/Spout/Reader/XLSX/Reader.php @@ -0,0 +1,93 @@ +tempFolder = $tempFolder; + return $this; + } + + /** + * Opens the file at the given file path to make it ready to be read. + * It also parses the sharedStrings.xml file to get all the shared strings available in memory + * and fetches all the available sheets. + * + * @param string $filePath Path of the file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read + * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file + */ + protected function openReader($filePath) + { + $this->zip = new \ZipArchive(); + + if ($this->zip->open($filePath) === true) { + $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); + + if ($this->sharedStringsHelper->hasSharedStrings()) { + // Extracts all the strings from the sheets for easy access in the future + $this->sharedStringsHelper->extractSharedStrings(); + } + + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper); + } else { + throw new IOException('Could not open ' . $filePath . ' for reading.'); + } + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getSheetIterator() + { + return $this->sheetIterator; + } + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->zip) { + $this->zip->close(); + } + + if ($this->sharedStringsHelper) { + $this->sharedStringsHelper->cleanup(); + } + } +} diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php new file mode 100644 index 0000000..e96898f --- /dev/null +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -0,0 +1,356 @@ +filePath = $filePath; + $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); + $this->sharedStringsHelper = $sharedStringsHelper; + + $this->xmlReader = new \XMLReader(); + $this->escaper = new \Box\Spout\Common\Escaper\XLSX(); + } + + /** + * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @return string Path of the XML file containing the sheet data, + * without the leading slash. + */ + protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) + { + return ltrim($sheetDataXMLFilePath, '/'); + } + + /** + * Rewind the Iterator to the first element. + * Initializes the XMLReader object that reads the associated sheet data. + * The XMLReader is configured to be safe from billion laughs attack. + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read + */ + public function rewind() + { + $this->xmlReader->close(); + + $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath; + if ($this->xmlReader->open($sheetDataFilePath, null, LIBXML_NONET) === false) { + throw new IOException('Could not open "' . $this->sheetDataXMLFilePath . '".'); + } + + $this->numReadRows = 0; + $this->rowDataBuffer = null; + $this->hasReachedEndOfFile = false; + $this->numColumns = 0; + + $this->next(); + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows will be skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + */ + public function next() + { + $isInsideRowTag = false; + $rowData = []; + + while ($this->xmlReader->read()) { + if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) { + // Read dimensions of the sheet + $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) + if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { + $lastCellIndex = $matches[1]; + $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + } + + } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + // Start of the row description + $isInsideRowTag = true; + + // Read spans info if present + $numberOfColumnsForRow = $this->numColumns; + $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance + if ($spans) { + list(, $numberOfColumnsForRow) = explode(':', $spans); + $numberOfColumnsForRow = intval($numberOfColumnsForRow); + } + $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + + } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) { + // Start of a cell description + $currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); + $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); + + $node = $this->xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node); + + } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + // End of the row description + // If needed, we fill the empty cells + $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); + $this->numReadRows++; + break; + + } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + } + + $this->rowDataBuffer = $rowData; + } + + /** + * Returns the cell's string value from a node's nested value node + * + * @param \DOMNode $node + * @return string The value associated with the cell + */ + protected function getVNodeValue($node) + { + // for cell types having a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); + if ($vNode !== null) { + return $vNode->nodeValue; + } + return ""; + } + + /** + * Returns the cell String value where string is inline. + * + * @param \DOMNode $node + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatInlineStringCellValue($node) + { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value from shared-strings file using nodeValue index. + * + * @param string $nodeValue + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatSharedStringCellValue($nodeValue) + { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($nodeValue); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value, where string is stored in value node. + * + * @param string $nodeValue + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatStrCellValue($nodeValue) + { + $escapedCellValue = trim($nodeValue); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value from string of nodeValue. + * + * @param string $nodeValue + * @return int|float The value associated with the cell + */ + protected function formatNumericCellValue($nodeValue) + { + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value from a specific node's Value. + * + * @param string $nodeValue + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue($nodeValue) + { + // !! is similar to boolval() + $cellValue = !!$nodeValue; + return $cellValue; + } + + /** + * Returns a cell's PHP Date value, associated to the given stored nodeValue. + * + * @param string $nodeValue + * @return \DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function formatDateCellValue($nodeValue) + { + // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + try { + $cellValue = new \DateTime($nodeValue); + return $cellValue; + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. + * + * @param \DOMNode $node + * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function getCellValue($node) + { + // Default cell type is "n" + $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; + $vNodeValue = $this->getVNodeValue($node); + + if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { + return $vNodeValue; + } + + switch ($cellType) { + case self::CELL_TYPE_INLINE_STRING: + return $this->formatInlineStringCellValue($node); + case self::CELL_TYPE_SHARED_STRING: + return $this->formatSharedStringCellValue($vNodeValue); + case self::CELL_TYPE_STR: + return $this->formatStrCellValue($vNodeValue); + case self::CELL_TYPE_BOOLEAN: + return $this->formatBooleanCellValue($vNodeValue); + case self::CELL_TYPE_NUMERIC: + return $this->formatNumericCellValue($vNodeValue); + case self::CELL_TYPE_DATE: + return $this->formatDateCellValue($vNodeValue); + default: + return null; + } + } + + /** + * Return the current element, from the buffer. + * @link http://php.net/manual/en/iterator.current.php + * + * @return array|null + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + $this->xmlReader->close(); + } +} diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php new file mode 100644 index 0000000..e2eebec --- /dev/null +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -0,0 +1,74 @@ +rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper); + $this->id = $sheetId; + $this->index = $sheetIndex; + $this->name = $sheetName; + } + + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } + + /** + * @return int ID of the sheet + */ + public function getId() + { + return $this->id; + } + + /** + * @return int Index of the sheet, based on order of creation (zero-based) + */ + public function getIndex() + { + return $this->index; + } + + /** + * @return string Name of the sheet + */ + public function getName() + { + return $this->name; + } +} diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php new file mode 100644 index 0000000..aae58c2 --- /dev/null +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -0,0 +1,112 @@ +sheets = $sheetHelper->getSheets(); + + if (count($this->sheets) === 0) { + throw new NoSheetsFoundException('The file must contain at least one sheet.'); + } + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->currentSheetIndex = 0; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return ($this->currentSheetIndex < count($this->sheets)); + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + if (array_key_exists($this->currentSheetIndex, $this->sheets)) { + $currentSheet = $this->sheets[$this->currentSheetIndex]; + $currentSheet->getRowIterator()->end(); + + $this->currentSheetIndex++; + } + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + return $this->sheets[$this->currentSheetIndex]; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->currentSheetIndex + 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // make sure we are not leaking memory in case the iteration stopped before the end + foreach ($this->sheets as $sheet) { + $sheet->getRowIterator()->end(); + } + } +} diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php new file mode 100644 index 0000000..de55b94 --- /dev/null +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -0,0 +1,181 @@ +open('/path/to/fake/file.csv'); + } + + /** + * @expectedException \Box\Spout\Common\Exception\IOException + * + * @return void + */ + public function testOpenShouldThrowExceptionIfFileNotReadable() + { + $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') + ->setMethods(['is_readable']) + ->getMock(); + $helperStub->method('is_readable')->willReturn(false); + + $resourcePath = $this->getResourcePath('csv_standard.csv'); + + $reader = ReaderFactory2::create(Type::CSV); + $reader->setGlobalFunctionsHelper($helperStub); + $reader->open($resourcePath); + } + + + /** + * @return void + */ + public function testReadStandardCSV() + { + $allRows = $this->getAllRowsForFile('csv_standard.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldNotStopAtCommaIfEnclosed() + { + $allRows = $this->getAllRowsForFile('csv_with_comma_enclosed.csv'); + $this->assertEquals('This is, a comma', $allRows[0][0]); + } + + /** + * @return void + */ + public function testReadShouldKeepEmptyCells() + { + $allRows = $this->getAllRowsForFile('csv_with_empty_cells.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', '', 'csv--23'], + ['csv--31', 'csv--32', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyLines() + { + $allRows = $this->getAllRowsForFile('csv_with_empty_line.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldHaveTheRightNumberOfCells() + { + $allRows = $this->getAllRowsForFile('csv_with_different_cells_number.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22'], + ['csv--31'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportCustomFieldDelimiter() + { + $allRows = $this->getAllRowsForFile('csv_delimited_with_pipes.csv', '|'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportCustomFieldEnclosure() + { + $allRows = $this->getAllRowsForFile('csv_text_enclosed_with_pound.csv', ',', '#'); + $this->assertEquals('This is, a comma', $allRows[0][0]); + } + + /** + * @return void + */ + public function testReadShouldSkipUtf8Bom() + { + $allRows = $this->getAllRowsForFile('csv_with_utf8_bom.csv'); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @param string $fileName + * @param string|void $fieldDelimiter + * @param string|void $fieldEnclosure + * @return array All the read rows the given file + */ + private function getAllRowsForFile($fileName, $fieldDelimiter = ",", $fieldEnclosure = '"') + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + $reader = ReaderFactory2::create(Type::CSV); + $reader->setFieldDelimiter($fieldDelimiter); + $reader->setFieldEnclosure($fieldEnclosure); + + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; + } + } + + $reader->close(); + + return $allRows; + } +} diff --git a/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php new file mode 100644 index 0000000..ff417b9 --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php @@ -0,0 +1,60 @@ + 1, 3 => 3]; + $filledArray = CellHelper::fillMissingArrayIndexes($arrayToFill, 'FILL'); + + $expectedFilledArray = ['FILL', 1, 'FILL', 3]; + $this->assertEquals($expectedFilledArray, $filledArray); + } + + /** + * @return array + */ + public function dataProviderForTestGetColumnIndexFromCellIndex() + { + return [ + ['A1', 0], + ['Z3', 25], + ['AA5', 26], + ['AB24', 27], + ['BC5', 54], + ['BCZ99', 1455], + ]; + } + + /** + * @dataProvider dataProviderForTestGetColumnIndexFromCellIndex + * + * @param string $cellIndex + * @param int $expectedColumnIndex + * @return void + */ + public function testGetColumnIndexFromCellIndex($cellIndex, $expectedColumnIndex) + { + $this->assertEquals($expectedColumnIndex, CellHelper::getColumnIndexFromCellIndex($cellIndex)); + } + + /** + * @expectedException \Box\Spout\Common\Exception\InvalidArgumentException + * + * @return void + */ + public function testGetColumnIndexFromCellIndexShouldThrowIfInvalidCellIndex() + { + CellHelper::getColumnIndexFromCellIndex('InvalidCellIndex'); + } +} diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php new file mode 100644 index 0000000..ea77b4f --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsCaching/CachingStrategyFactoryTest.php @@ -0,0 +1,99 @@ +getMockBuilder('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory') + ->disableOriginalConstructor() + ->setMethods(['getMemoryLimitInKB']) + ->getMock(); + + $factoryStub->method('getMemoryLimitInKB')->willReturn($memoryLimitInKB); + + \ReflectionHelper::setStaticValue('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub); + + $strategy = $factoryStub->getBestCachingStrategy($sharedStringsUniqueCount, null); + + $fullExpectedStrategyClassName = 'Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\\' . $expectedStrategyClassName; + $this->assertEquals($fullExpectedStrategyClassName, get_class($strategy)); + + $strategy->clearCache(); + \ReflectionHelper::reset(); + } + + /** + * @return array + */ + public function dataProviderForTestGetMemoryLimitInKB() + { + return [ + ['-1', -1], + ['invalid', -1], + ['1024B', 1], + ['128K', 128], + ['256KB', 256], + ['512M', 512 * 1024], + ['2MB', 2 * 1024], + ['1G', 1 * 1024 * 1024], + ['10GB', 10 * 1024 * 1024], + ['2T', 2 * 1024 * 1024 * 1024], + ['5TB', 5 * 1024 * 1024 * 1024], + ]; + } + + /** + * @dataProvider dataProviderForTestGetMemoryLimitInKB + * + * @param string $memoryLimitFormatted + * @param float $expectedMemoryLimitInKB + * @return void + */ + public function testGetMemoryLimitInKB($memoryLimitFormatted, $expectedMemoryLimitInKB) + { + /** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */ + $factoryStub = $this + ->getMockBuilder('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory') + ->disableOriginalConstructor() + ->setMethods(['getMemoryLimitFromIni']) + ->getMock(); + + $factoryStub->method('getMemoryLimitFromIni')->willReturn($memoryLimitFormatted); + + $memoryLimitInKB = \ReflectionHelper::callMethodOnObject($factoryStub, 'getMemoryLimitInKB'); + + $this->assertEquals($expectedMemoryLimitInKB, $memoryLimitInKB); + } +} diff --git a/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php new file mode 100644 index 0000000..a72d19a --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php @@ -0,0 +1,112 @@ +getResourcePath('one_sheet_with_shared_strings.xlsx'); + $this->sharedStringsHelper = new SharedStringsHelper($resourcePath); + } + + /** + * @return void + */ + public function tearDown() + { + $this->sharedStringsHelper->cleanup(); + } + + /** + * @expectedException \Box\Spout\Reader\Exception\SharedStringNotFoundException + * @return void + */ + public function testGetStringAtIndexShouldThrowExceptionIfStringNotFound() + { + $this->sharedStringsHelper->extractSharedStrings(); + $this->sharedStringsHelper->getStringAtIndex(PHP_INT_MAX); + } + + /** + * @return void + */ + public function testGetStringAtIndexShouldReturnTheCorrectStringIfFound() + { + $this->sharedStringsHelper->extractSharedStrings(); + + $sharedString = $this->sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('s1--A1', $sharedString); + + $sharedString = $this->sharedStringsHelper->getStringAtIndex(24); + $this->assertEquals('s1--E5', $sharedString); + + $usedCachingStrategy = \ReflectionHelper::getValueOnObject($this->sharedStringsHelper, 'cachingStrategy'); + $this->assertTrue($usedCachingStrategy instanceof InMemoryStrategy); + } + + /** + * @return void + */ + public function testGetStringAtIndexShouldWorkWithMultilineStrings() + { + $resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals("s1\nA1", $sharedString); + + $sharedString = $sharedStringsHelper->getStringAtIndex(24); + $this->assertEquals("s1\nE5", $sharedString); + + $sharedStringsHelper->cleanup(); + } + + /** + * @return void + */ + public function testGetStringAtIndexWithFileBasedStrategy() + { + // force the file-based strategy by setting no memory limit + $originalMemoryLimit = ini_get('memory_limit'); + ini_set('memory_limit', '-1'); + + $resourcePath = $this->getResourcePath('sheet_with_lots_of_shared_strings.xlsx'); + $sharedStringsHelper = new SharedStringsHelper($resourcePath); + + $sharedStringsHelper->extractSharedStrings(); + + $sharedString = $sharedStringsHelper->getStringAtIndex(0); + $this->assertEquals('str', $sharedString); + + $sharedString = $sharedStringsHelper->getStringAtIndex(CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE + 1); + $this->assertEquals('str', $sharedString); + + $usedCachingStrategy = \ReflectionHelper::getValueOnObject($sharedStringsHelper, 'cachingStrategy'); + $this->assertTrue($usedCachingStrategy instanceof FileBasedStrategy); + + $sharedStringsHelper->cleanup(); + + ini_set('memory_limit', $originalMemoryLimit); + } +} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php new file mode 100644 index 0000000..c5fb583 --- /dev/null +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -0,0 +1,300 @@ +getAllRowsForFile($filePath); + } + + /** + * @return array + */ + public function dataProviderForTestReadForAllWorksheets() + { + return [ + ['one_sheet_with_shared_strings.xlsx', 5, 5], + ['one_sheet_with_inline_strings.xlsx', 5, 5], + ['two_sheets_with_shared_strings.xlsx', 10, 5], + ['two_sheets_with_inline_strings.xlsx', 10, 5] + ]; + } + + /** + * @dataProvider dataProviderForTestReadForAllWorksheets + * + * @param string $resourceName + * @param int $expectedNumOfRows + * @param int $expectedNumOfCellsPerRow + * @return void + */ + public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow) + { + $allRows = $this->getAllRowsForFile($resourceName); + + $this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows"); + foreach ($allRows as $row) { + $this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row"); + } + } + + /** + * @return void + */ + public function testReadShouldSupportFilesWithoutSharedStringsFile() + { + $allRows = $this->getAllRowsForFile('sheet_with_no_shared_strings_file.xlsx'); + + $expectedRows = [ + [10, 11], + [20, 21], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportAllCellTypes() + { + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); + + $expectedRows = [ + [ + 's1--A1', 's1--A2', + false, true, + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'), + \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), + 10, 10.43, + null, + 'weird string', // valid 'str' string + null, // invalid date + ], + ['', '', '', '', '', '', '', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldKeepEmptyCellsAtTheEndIfDimensionsSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_without_dimensions_but_spans_and_empty_cells.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be 2 rows'); + foreach ($allRows as $row) { + $this->assertEquals(5, count($row), 'There should be 5 cells for every row, because empty rows should be preserved'); + } + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2', '', ''], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldKeepEmptyCellsAtTheEndIfNoDimensionsButSpansSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be 2 rows'); + $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); + $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyCellsAtTheEndIfDimensionsNotSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be 2 rows'); + $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); + $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A2', 's1--B2', 's1--C2'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipEmptyRows() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx'); + + $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); + + $expectedRows = [ + ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], + ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSupportEmptySharedString() + { + $allRows = $this->getAllRowsForFile('sheet_with_empty_shared_string.xlsx'); + + $expectedRows = [ + ['s1--A1', '', 's1--C1'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldPreserveSpaceIfSpecified() + { + $allRows = $this->getAllRowsForFile('sheet_with_preserve_space_shared_strings.xlsx'); + + $expectedRows = [ + [' s1--A1', 's1--B1 ', ' s1--C1 '], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @return void + */ + public function testReadShouldSkipPronunciationData() + { + $allRows = $this->getAllRowsForFile('sheet_with_pronunciation.xlsx'); + + $expectedRow = ['名前', '一二三四']; + $this->assertEquals($expectedRow, $allRows[0], 'Pronunciation data should be removed.'); + } + + + /** + * @return array + */ + public function dataProviderForTestReadShouldBeProtectedAgainstAttacks() + { + return [ + ['attack_billion_laughs.xlsx'], + ['attack_quadratic_blowup.xlsx'], + ]; + } + + /** + * @dataProvider dataProviderForTestReadShouldBeProtectedAgainstAttacks + * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) + * + * @param string $fileName + * @return void + */ + public function testReadShouldBeProtectedAgainstAttacks($fileName) + { + $startTime = microtime(true); + + try { + $this->getAllRowsForFile($fileName); + $this->fail('An exception should have been thrown'); + } catch (IOException $exception) { + $duration = microtime(true) - $startTime; + $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); + + $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB + $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); + } + } + + /** + * @return void + */ + public function testReadShouldBeAbleToProcessEmptySheets() + { + $allRows = $this->getAllRowsForFile('sheet_with_no_cells.xlsx'); + $this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.'); + } + + /** + * @return void + */ + public function testReadShouldSkipFormulas() + { + $allRows = $this->getAllRowsForFile('sheet_with_formulas.xlsx'); + + $expectedRows = [ + ['val1', 'val2', 'total1', 'total2'], + [10, 20, 30, 21], + [11, 21, 32, 41], + ]; + $this->assertEquals($expectedRows, $allRows); + } + + /** + * @param string $fileName + * @return array All the read rows the given file + */ + private function getAllRowsForFile($fileName) + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + $reader = ReaderFactory2::create(Type::XLSX); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; + } + } + + $reader->close(); + + return $allRows; + } +} diff --git a/tests/Spout/Reader/XLSX/SheetTest.php b/tests/Spout/Reader/XLSX/SheetTest.php new file mode 100644 index 0000000..c9449f4 --- /dev/null +++ b/tests/Spout/Reader/XLSX/SheetTest.php @@ -0,0 +1,53 @@ +openFileAndReturnSheets('two_sheets_with_custom_names.xlsx'); + + $this->assertEquals('CustomName1', $sheets[0]->getName()); + $this->assertEquals(0, $sheets[0]->getIndex()); + $this->assertEquals(1, $sheets[0]->getId()); + + $this->assertEquals('CustomName2', $sheets[1]->getName()); + $this->assertEquals(1, $sheets[1]->getIndex()); + $this->assertEquals(2, $sheets[1]->getId()); + } + + /** + * @param string $fileName + * @return Sheet[] + */ + private function openFileAndReturnSheets($fileName) + { + $resourcePath = $this->getResourcePath($fileName); + $reader = ReaderFactory2::create(Type::XLSX); + $reader->open($resourcePath); + + $sheets = []; + foreach ($reader->getSheetIterator() as $sheet) { + $sheets[] = $sheet; + } + + $reader->close(); + + return $sheets; + } +} From c52dd7bde8a5751b3f75c379a239aa9e9991c4a6 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 20 Jul 2015 22:18:56 -0700 Subject: [PATCH 2/7] Remove old reader files --- src/Spout/Reader/AbstractReader.php | 101 +---- src/Spout/Reader/AbstractReader2.php | 111 ----- src/Spout/Reader/CSV.php | 130 ------ src/Spout/Reader/CSV/Reader.php | 6 +- .../Exception/EndOfFileReachedException.php | 12 - .../EndOfWorksheetsReachedException.php | 12 - .../Exception/NoWorksheetsFoundException.php | 12 - src/Spout/Reader/Helper/XLSX/CellHelper.php | 97 ----- .../CachingStrategyFactory.php | 154 ------- .../CachingStrategyInterface.php | 44 -- .../FileBasedStrategy.php | 188 --------- .../SharedStringsCaching/InMemoryStrategy.php | 82 ---- .../Helper/XLSX/SharedStringsHelper.php | 280 ------------- .../Reader/Helper/XLSX/WorksheetHelper.php | 209 ---------- src/Spout/Reader/Internal/XLSX/Worksheet.php | 58 --- src/Spout/Reader/ReaderFactory.php | 6 +- src/Spout/Reader/ReaderFactory2.php | 44 -- src/Spout/Reader/ReaderInterface.php | 22 +- src/Spout/Reader/ReaderInterface2.php | 35 -- src/Spout/Reader/Sheet.php | 57 --- src/Spout/Reader/XLSX.php | 394 ------------------ src/Spout/Reader/XLSX/Reader.php | 6 +- tests/Spout/Reader/CSV/ReaderTest.php | 18 +- tests/Spout/Reader/CSVTest.php | 208 --------- .../Reader/Helper/XLSX/CellHelperTest.php | 60 --- .../CachingStrategyFactoryTest.php | 99 ----- .../Helper/XLSX/SharedStringsHelperTest.php | 112 ----- tests/Spout/Reader/SheetTest.php | 52 --- tests/Spout/Reader/XLSX/ReaderTest.php | 4 +- tests/Spout/Reader/XLSX/SheetTest.php | 4 +- tests/Spout/Reader/XLSXTest.php | 332 --------------- 31 files changed, 46 insertions(+), 2903 deletions(-) delete mode 100644 src/Spout/Reader/AbstractReader2.php delete mode 100644 src/Spout/Reader/CSV.php delete mode 100644 src/Spout/Reader/Exception/EndOfFileReachedException.php delete mode 100644 src/Spout/Reader/Exception/EndOfWorksheetsReachedException.php delete mode 100644 src/Spout/Reader/Exception/NoWorksheetsFoundException.php delete mode 100644 src/Spout/Reader/Helper/XLSX/CellHelper.php delete mode 100644 src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php delete mode 100644 src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php delete mode 100644 src/Spout/Reader/Helper/XLSX/SharedStringsCaching/FileBasedStrategy.php delete mode 100644 src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php delete mode 100644 src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php delete mode 100644 src/Spout/Reader/Helper/XLSX/WorksheetHelper.php delete mode 100644 src/Spout/Reader/Internal/XLSX/Worksheet.php delete mode 100644 src/Spout/Reader/ReaderFactory2.php delete mode 100644 src/Spout/Reader/ReaderInterface2.php delete mode 100644 src/Spout/Reader/Sheet.php delete mode 100644 src/Spout/Reader/XLSX.php delete mode 100644 tests/Spout/Reader/CSVTest.php delete mode 100644 tests/Spout/Reader/Helper/XLSX/CellHelperTest.php delete mode 100644 tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php delete mode 100644 tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php delete mode 100644 tests/Spout/Reader/SheetTest.php delete mode 100644 tests/Spout/Reader/XLSXTest.php diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index 2e2e2de..bfbedf8 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -4,7 +4,6 @@ namespace Box\Spout\Reader; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\ReaderNotOpenedException; -use Box\Spout\Reader\Exception\EndOfFileReachedException; /** * Class AbstractReader @@ -14,18 +13,9 @@ use Box\Spout\Reader\Exception\EndOfFileReachedException; */ abstract class AbstractReader implements ReaderInterface { - /** @var int Used to keep track of the row index */ - protected $currentRowIndex = 0; - /** @var bool Indicates whether the stream is currently open */ protected $isStreamOpened = false; - /** @var bool Indicates whether all rows have been read */ - protected $hasReachedEndOfFile = false; - - /** @var array Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer = null; - /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -38,11 +28,11 @@ abstract class AbstractReader implements ReaderInterface abstract protected function openReader($filePath); /** - * Reads and returns next row if available. + * Returns an iterator to iterate over sheets. * - * @return array|null Array that contains the data for the read row or null at the end of the file + * @return \Iterator To iterate over sheets */ - abstract protected function read(); + abstract public function getConcreteSheetIterator(); /** * Closes the reader. To be used after reading the file. @@ -80,9 +70,6 @@ abstract class AbstractReader implements ReaderInterface } } - $this->currentRowIndex = 0; - $this->hasReachedEndOfFile = false; - try { $this->openReader($filePath); $this->isStreamOpened = true; @@ -103,82 +90,18 @@ abstract class AbstractReader implements ReaderInterface } /** - * Returns whether all rows have been read (i.e. if we are at the end of the file). - * To know if the end of file has been reached, it uses a buffer. If the buffer is - * empty (meaning, nothing has been read or previous read line has been consumed), then - * it reads the next line, store it in the buffer for the next time or flip a variable if - * the end of file has been reached. + * Returns an iterator to iterate over sheets. * - * @return bool Whether all rows have been read (i.e. if we are at the end of the file) - * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If the stream was not opened first + * @return \Iterator To iterate over sheets + * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader */ - public function hasNextRow() + public function getSheetIterator() { if (!$this->isStreamOpened) { - throw new ReaderNotOpenedException('Stream should be opened first.'); + throw new ReaderNotOpenedException('Reader should be opened first.'); } - if ($this->hasReachedEndOfFile) { - return false; - } - - // if the buffer contains unprocessed row - if (!$this->isRowDataBufferEmpty()) { - return true; - } - - // otherwise, try to read the next line line, and store it in the buffer - $this->rowDataBuffer = $this->read(); - - // if the buffer is still empty after reading a row, it means end of file was reached - $this->hasReachedEndOfFile = $this->isRowDataBufferEmpty(); - - return (!$this->hasReachedEndOfFile); - } - - /** - * Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by - * actually reading the file. - * - * @return array Array that contains the data for the read row - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first - * @throws \Box\Spout\Reader\Exception\EndOfFileReachedException - */ - public function nextRow() - { - if (!$this->hasNextRow()) { - throw new EndOfFileReachedException('End of file was reached. Cannot read more rows.'); - } - - // Get data from buffer (if the buffer was empty, it was filled by the call to hasNextRow()) - $rowData = $this->rowDataBuffer; - - // empty buffer to mark the row as consumed - $this->emptyRowDataBuffer(); - - $this->currentRowIndex++; - - return $rowData; - } - - /** - * Returns whether the buffer where the row data is stored is empty - * - * @return bool - */ - protected function isRowDataBufferEmpty() - { - return ($this->rowDataBuffer === null); - } - - /** - * Empty the buffer that stores row data - * - * @return void - */ - protected function emptyRowDataBuffer() - { - $this->rowDataBuffer = null; + return $this->getConcreteSheetIterator(); } /** @@ -190,6 +113,12 @@ abstract class AbstractReader implements ReaderInterface { if ($this->isStreamOpened) { $this->closeReader(); + + $sheetIterator = $this->getConcreteSheetIterator(); + if ($sheetIterator) { + $sheetIterator->end(); + } + $this->isStreamOpened = false; } } diff --git a/src/Spout/Reader/AbstractReader2.php b/src/Spout/Reader/AbstractReader2.php deleted file mode 100644 index ef24412..0000000 --- a/src/Spout/Reader/AbstractReader2.php +++ /dev/null @@ -1,111 +0,0 @@ -globalFunctionsHelper = $globalFunctionsHelper; - return $this; - } - - /** - * Prepares the reader to read the given file. It also makes sure - * that the file exists and is readable. - * - * @param string $filePath Path of the file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted - */ - public function open($filePath) - { - if (!$this->isPhpStream($filePath)) { - // we skip the checks if the provided file path points to a PHP stream - if (!$this->globalFunctionsHelper->file_exists($filePath)) { - throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.'); - } else if (!$this->globalFunctionsHelper->is_readable($filePath)) { - throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.'); - } - } - - try { - $this->openReader($filePath); - $this->isStreamOpened = true; - } catch (\Exception $exception) { - throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')'); - } - } - - /** - * Checks if a path is a PHP stream (like php://output, php://memory, ...) - * - * @param string $filePath Path of the file to be read - * @return bool Whether the given path maps to a PHP stream - */ - protected function isPhpStream($filePath) - { - return (strpos($filePath, 'php://') === 0); - } - - /** - * Closes the reader, preventing any additional reading - * - * @return void - */ - public function close() - { - if ($this->isStreamOpened) { - $this->closeReader(); - - $sheetIterator = $this->getSheetIterator(); - if ($sheetIterator) { - $sheetIterator->end(); - } - - $this->isStreamOpened = false; - } - } -} diff --git a/src/Spout/Reader/CSV.php b/src/Spout/Reader/CSV.php deleted file mode 100644 index 2da160f..0000000 --- a/src/Spout/Reader/CSV.php +++ /dev/null @@ -1,130 +0,0 @@ -fieldDelimiter = $fieldDelimiter; - return $this; - } - - /** - * Sets the field enclosure for the CSV - * - * @param string $fieldEnclosure Character that enclose fields - * @return CSV - */ - public function setFieldEnclosure($fieldEnclosure) - { - $this->fieldEnclosure = $fieldEnclosure; - return $this; - } - - /** - * Opens the file at the given path to make it ready to be read. - * The file must be UTF-8 encoded. - * @TODO add encoding detection/conversion - * - * @param string $filePath Path of the CSV file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException - */ - protected function openReader($filePath) - { - $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); - if (!$this->filePointer) { - throw new IOException('Could not open file ' . $filePath . ' for reading.'); - } - - $this->skipUtf8Bom(); - } - - /** - * This skips the UTF-8 BOM if inserted at the beginning of the file - * by moving the file pointer after it, so that it is not read. - * - * @return void - */ - protected function skipUtf8Bom() - { - $this->globalFunctionsHelper->rewind($this->filePointer); - - $hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM); - - if ($hasUtf8Bom) { - // we skip the 2 first bytes (so start from the 3rd byte) - $this->globalFunctionsHelper->fseek($this->filePointer, 3); - } else { - // if no BOM, reset the pointer to read from the beginning - $this->globalFunctionsHelper->fseek($this->filePointer, 0); - } - } - - /** - * Reads and returns next row if available. - * Empty rows are skipped. - * - * @return array|null Array that contains the data for the read row or null at the end of the file - */ - protected function read() - { - $lineData = null; - - if ($this->filePointer) { - do { - $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); - } while ($lineData && $this->isEmptyLine($lineData)); - } - - // When reaching the end of the file, return null instead of false - return ($lineData !== false) ? $lineData : null; - } - - /** - * @param array $lineData Array containing the cells value for the line - * @return bool Whether the given line is empty - */ - protected function isEmptyLine($lineData) - { - return (count($lineData) === 1 && $lineData[0] === null); - } - - /** - * Closes the reader. To be used after reading the file. - * - * @return void - */ - protected function closeReader() - { - if ($this->filePointer) { - $this->globalFunctionsHelper->fclose($this->filePointer); - } - } -} diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index 3b164d5..9f9e56f 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -2,7 +2,7 @@ namespace Box\Spout\Reader\CSV; -use Box\Spout\Reader\AbstractReader2; +use Box\Spout\Reader\AbstractReader; use Box\Spout\Common\Exception\IOException; /** @@ -11,7 +11,7 @@ use Box\Spout\Common\Exception\IOException; * * @package Box\Spout\Reader\CSV */ -class Reader extends AbstractReader2 +class Reader extends AbstractReader { /** @var resource Pointer to the file to be written */ protected $filePointer; @@ -75,7 +75,7 @@ class Reader extends AbstractReader2 * * @return SheetIterator To iterate over sheets */ - public function getSheetIterator() + public function getConcreteSheetIterator() { return $this->sheetIterator; } diff --git a/src/Spout/Reader/Exception/EndOfFileReachedException.php b/src/Spout/Reader/Exception/EndOfFileReachedException.php deleted file mode 100644 index 6194d49..0000000 --- a/src/Spout/Reader/Exception/EndOfFileReachedException.php +++ /dev/null @@ -1,12 +0,0 @@ - 0 - * Z => 25 - * AA => 26 : (26^(2-1) * (0+1)) + 0 - * AB => 27 : (26^(2-1) * (0+1)) + 1 - * BC => 54 : (26^(2-1) * (1+1)) + 2 - * BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25 - */ - foreach (str_split($column) as $single_cell_index) - { - $currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue; - - if ($columnLength == 1) { - $columnIndex += $currentColumnIndex; - } else { - $columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1); - } - - $columnLength--; - } - - return $columnIndex; - } - - /** - * Returns whether a cell index is valid, in an Excel world. - * To be valid, the cell index should start with capital letters and be followed by numbers. - * - * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) - * @return bool - */ - protected static function isValidCellIndex($cellIndex) - { - return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1); - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php b/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php deleted file mode 100644 index 642647a..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php +++ /dev/null @@ -1,154 +0,0 @@ - 20 * 600 ≈ 12KB - */ - const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; - - /** - * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files - * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory - * and the string will be quickly retrieved. - * The performance bottleneck is not when creating these temporary files, but rather when loading their content. - * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works - * best when the indexes of the shared strings are sorted in the sheet data. - * 10,000 was chosen because it creates small files that are fast to be loaded in memory. - */ - const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; - - /** @var CachingStrategyFactory|null Singleton instance */ - protected static $instance = null; - - /** - * Private constructor for singleton - */ - private function __construct() - { - } - - /** - * Returns the singleton instance of the factory - * - * @return CachingStrategyFactory - */ - public static function getInstance() - { - if (self::$instance === null) { - self::$instance = new CachingStrategyFactory(); - } - - return self::$instance; - } - - /** - * Returns the best caching strategy, given the number of unique shared strings - * and the amount of memory available. - * - * @param int $sharedStringsUniqueCount Number of unique shared strings - * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored - * @return CachingStrategyInterface The best caching strategy - */ - public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null) - { - if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { - return new InMemoryStrategy($sharedStringsUniqueCount); - } else { - return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE); - } - } - - /** - * Returns whether it is safe to use in-memory caching, given the number of unique shared strings - * and the amount of memory available. - * - * @param int $sharedStringsUniqueCount Number of unique shared strings - * @return bool - */ - protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) - { - $memoryAvailable = $this->getMemoryLimitInKB(); - - if ($memoryAvailable === -1) { - // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe - return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE); - } else { - $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB; - return ($memoryAvailable > $memoryNeeded); - } - } - - /** - * Returns the PHP "memory_limit" in Kilobytes - * - * @return float - */ - protected function getMemoryLimitInKB() - { - $memoryLimitFormatted = $this->getMemoryLimitFromIni(); - $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted)); - - // No memory limit - if ($memoryLimitFormatted === '-1') { - return -1; - } - - if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { - $amount = intval($matches[1]); - $unit = $matches[2]; - - switch ($unit) { - case 'b': return ($amount / 1024); - case 'k': return $amount; - case 'm': return ($amount * 1024); - case 'g': return ($amount * 1024 * 1024); - case 't': return ($amount * 1024 * 1024 * 1024); - } - } - - return -1; - } - - /** - * Returns the formatted "memory_limit" value - * - * @return string - */ - protected function getMemoryLimitFromIni() - { - return ini_get('memory_limit'); - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php b/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php deleted file mode 100644 index 4334d86..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php +++ /dev/null @@ -1,44 +0,0 @@ -fileSystemHelper = new FileSystemHelper($rootTempFolder); - $this->tempFolder = $this->fileSystemHelper->createFolder($rootTempFolder, uniqid('sharedstrings')); - - $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; - - $this->globalFunctionsHelper = new GlobalFunctionsHelper(); - $this->tempFilePointer = null; - } - - /** - * Adds the given string to the cache. - * - * @param string $sharedString The string to be added to the cache - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return void - */ - public function addStringForIndex($sharedString, $sharedStringIndex) - { - $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - - if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); - } - $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); - } - - // The shared string retrieval logic expects each cell data to be on one line only - // Encoding the line feed character allows to preserve this assumption - $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); - - $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); - } - - /** - * Returns the path for the temp file that should contain the string for the given index - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The temp file path for the given index - */ - protected function getSharedStringTempFilePath($sharedStringIndex) - { - $numTempFile = intval($sharedStringIndex / $this->maxNumStringsPerTempFile); - return $this->tempFolder . '/sharedstrings' . $numTempFile; - } - - /** - * Closes the cache after the last shared string was added. - * This prevents any additional string from being added to the cache. - * - * @return void - */ - public function closeCache() - { - // close pointer to the last temp file that was written - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); - } - } - - - /** - * Returns the string located at the given index from the cache. - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The shared string at the given index - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index - */ - public function getStringAtIndex($sharedStringIndex) - { - $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - $indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile; - - if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { - throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); - } - - if ($this->inMemoryTempFilePath !== $tempFilePath) { - // free memory - unset($this->inMemoryTempFileContents); - - $this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); - $this->inMemoryTempFilePath = $tempFilePath; - } - - $sharedString = null; - if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { - $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; - $sharedString = $this->unescapeLineFeed($escapedSharedString); - } - - if ($sharedString === null) { - throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); - } - - return rtrim($sharedString, PHP_EOL); - } - - /** - * Escapes the line feed characters (\n) - * - * @param string $unescapedString - * @return string - */ - private function escapeLineFeed($unescapedString) - { - return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString); - } - - /** - * Unescapes the line feed characters (\n) - * - * @param string $escapedString - * @return string - */ - private function unescapeLineFeed($escapedString) - { - return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString); - } - - /** - * Destroys the cache, freeing memory and removing any created artifacts - * - * @return void - */ - public function clearCache() - { - if ($this->tempFolder) { - $this->fileSystemHelper->deleteFolderRecursively($this->tempFolder); - } - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php b/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php deleted file mode 100644 index 41b41be..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php +++ /dev/null @@ -1,82 +0,0 @@ -inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount); - $this->isCacheClosed = false; - } - - /** - * Adds the given string to the cache. - * - * @param string $sharedString The string to be added to the cache - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return void - */ - public function addStringForIndex($sharedString, $sharedStringIndex) - { - if (!$this->isCacheClosed) { - $this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString); - } - } - - /** - * Closes the cache after the last shared string was added. - * This prevents any additional string from being added to the cache. - * - * @return void - */ - public function closeCache() - { - $this->isCacheClosed = true; - } - - /** - * Returns the string located at the given index from the cache. - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The shared string at the given index - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index - */ - public function getStringAtIndex($sharedStringIndex) - { - try { - return $this->inMemoryCache->offsetGet($sharedStringIndex); - } catch (\RuntimeException $e) { - throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); - } - } - - /** - * Destroys the cache, freeing memory and removing any created artifacts - * - * @return void - */ - public function clearCache() - { - unset($this->inMemoryCache); - $this->isCacheClosed = false; - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php deleted file mode 100644 index 0f6d21d..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php +++ /dev/null @@ -1,280 +0,0 @@ -filePath = $filePath; - $this->tempFolder = $tempFolder; - } - - /** - * Returns whether the XLSX file contains a shared strings XML file - * - * @return bool - */ - public function hasSharedStrings() - { - $hasSharedStrings = false; - $zip = new \ZipArchive(); - - if ($zip->open($this->filePath) === true) { - $hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false); - $zip->close(); - } - - return $hasSharedStrings; - } - - /** - * Builds an in-memory array containing all the shared strings of the worksheet. - * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. - * It is then accessed by the worksheet data, via the string index in the built table. - * - * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx - * - * The XML file can be really big with worksheets containing a lot of data. That is why - * we need to use a XML reader that provides streaming like the XMLReader library. - * Please note that SimpleXML does not provide such a functionality but since it is faster - * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. - * - * @return void - * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read - */ - public function extractSharedStrings() - { - $xmlReader = new \XMLReader(); - $sharedStringIndex = 0; - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - - $sharedStringsFilePath = $this->getSharedStringsFilePath(); - if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) { - throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); - } - - $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); - $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); - - while ($xmlReader->read() && $xmlReader->name !== 'si') { - // do nothing until a 'si' tag is reached - } - - while ($xmlReader->name === 'si') { - $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); - $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); - - // removes nodes that should not be read, like the pronunciation of the Kanji characters - $cleanNode = $this->removeSuperfluousTextNodes($node); - - // find all text nodes 't'; there can be multiple if the cell contains formatting - $textNodes = $cleanNode->xpath('//ns:t'); - - $textValue = ''; - foreach ($textNodes as $textNode) { - if ($this->shouldPreserveWhitespace($textNode)) { - $textValue .= $textNode->__toString(); - } else { - $textValue .= trim($textNode->__toString()); - } - } - - $unescapedTextValue = $escaper->unescape($textValue); - $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); - - $sharedStringIndex++; - - // jump to the next 'si' tag - $xmlReader->next('si'); - } - - $this->cachingStrategy->closeCache(); - - $xmlReader->close(); - } - - /** - * @return string The path to the shared strings XML file - */ - protected function getSharedStringsFilePath() - { - return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH; - } - - /** - * Returns the shared strings unique count, as specified in tag. - * - * @param \XMLReader $xmlReader XMLReader instance - * @return int Number of unique shared strings in the sharedStrings.xml file - * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read - */ - protected function getSharedStringsUniqueCount($xmlReader) - { - // Use internal errors to avoid displaying lots of warning messages in case of invalid file - // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks - libxml_clear_errors(); - libxml_use_internal_errors(true); - - $xmlReader->next('sst'); - - // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) - while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) { - $xmlReader->read(); - } - - $readError = libxml_get_last_error(); - if ($readError !== false) { - throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readError->message}]"); - } - - // reset the setting to display XML warnings/errors - libxml_use_internal_errors(false); - - return intval($xmlReader->getAttribute('uniqueCount')); - } - - /** - * Returns the best shared strings caching strategy. - * - * @param int $sharedStringsUniqueCount - * @return CachingStrategyInterface - */ - protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) - { - return CachingStrategyFactory::getInstance() - ->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder); - } - - /** - * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. - * This is to simplify the parsing of the subtree. - * - * @param \XMLReader $xmlReader - * @return \SimpleXMLElement - * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read - */ - protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) - { - // Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node. - // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks - libxml_clear_errors(); - libxml_use_internal_errors(true); - - $node = null; - try { - $node = new \SimpleXMLElement($xmlReader->readOuterXml()); - } catch (\Exception $exception) { - $error = libxml_get_last_error(); - libxml_use_internal_errors(false); - - throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].'); - } - - libxml_use_internal_errors(false); - - return $node; - } - - /** - * Removes nodes that should not be read, like the pronunciation of the Kanji characters. - * By keeping them, their text content would be added to the read string. - * - * @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove - * @return \SimpleXMLElement Cleaned parent node - */ - protected function removeSuperfluousTextNodes($parentNode) - { - $tagsToRemove = [ - 'rPh', // Pronunciation of the text - ]; - - foreach ($tagsToRemove as $tagToRemove) { - $xpath = '//ns:' . $tagToRemove; - $nodesToRemove = $parentNode->xpath($xpath); - - foreach ($nodesToRemove as $nodeToRemove) { - // This is how to remove a node from the XML - unset($nodeToRemove[0]); - } - } - - return $parentNode; - } - - /** - * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. - * - * @param \SimpleXMLElement $textNode The text node element () whitespace may be preserved - * @return bool Whether whitespace should be preserved - */ - protected function shouldPreserveWhitespace($textNode) - { - $shouldPreserveWhitespace = false; - - $attributes = $textNode->attributes('xml', true); - if ($attributes) { - foreach ($attributes as $attributeName => $attributeValue) { - if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') { - $shouldPreserveWhitespace = true; - break; - } - } - } - - return $shouldPreserveWhitespace; - } - - /** - * Returns the shared string at the given index, using the previously chosen caching strategy. - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The shared string at the given index - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index - */ - public function getStringAtIndex($sharedStringIndex) - { - return $this->cachingStrategy->getStringAtIndex($sharedStringIndex); - } - - /** - * Destroys the cache, freeing memory and removing any created artifacts - * - * @return void - */ - public function cleanup() - { - if ($this->cachingStrategy) { - $this->cachingStrategy->clearCache(); - } - } -} diff --git a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php b/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php deleted file mode 100644 index d869dd4..0000000 --- a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php +++ /dev/null @@ -1,209 +0,0 @@ -filePath = $filePath; - $this->globalFunctionsHelper = $globalFunctionsHelper; - } - - /** - * Returns the file paths of the worksheet data XML files within the XLSX file. - * The paths are read from the [Content_Types].xml file. - * - * @return Worksheet[] Worksheets within the XLSX file - */ - public function getWorksheets() - { - $worksheets = []; - - $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::CONTENT_TYPES_XML_FILE_PATH, - self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML - ); - - // find all nodes defining a worksheet - $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); - - for ($i = 0; $i < count($sheetNodes); $i++) { - $sheetNode = $sheetNodes[$i]; - $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; - - $sheet = $this->getSheet($sheetDataXMLFilePath, $i); - $worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath); - } - - return $worksheets; - } - - /** - * Returns an instance of a sheet, given the path of its data XML file. - * We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet. - * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. - * The entry contains the ID and name of the sheet. - * - * If this piece of data can't be found by parsing the different XML files, the ID will default - * to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will - * default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2"). - * - * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml - * @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) - * @return \Box\Spout\Reader\Sheet Sheet instance - */ - protected function getSheet($sheetDataXMLFilePath, $sheetIndexZeroBased) - { - $sheetId = $sheetIndexZeroBased + 1; - $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); - - /* - * In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" - * In workbook.xml.rels, it is only "worksheets/sheet1.xml" - */ - $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); - - // find the node associated to the given file path - $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); - $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); - - if (count($relationshipNodes) === 1) { - $relationshipNode = $relationshipNodes[0]; - $sheetId = (string) $relationshipNode->attributes()->Id; - - $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); - $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]'); - - if (count($sheetNodes) === 1) { - $sheetNode = $sheetNodes[0]; - $sheetId = (int) $sheetNode->attributes()->sheetId; - $escapedSheetName = (string) $sheetNode->attributes()->name; - - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - $sheetName = $escaper->unescape($escapedSheetName); - } - } - - return new Sheet($sheetId, $sheetIndexZeroBased, $sheetName); - } - - /** - * Returns the default name of the sheet whose data is located - * at the given path. - * - * @param $sheetDataXMLFilePath - * @return string The default sheet name - */ - protected function getDefaultSheetName($sheetDataXMLFilePath) - { - return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION); - } - - /** - * Returns a representation of the workbook.xml.rels file, ready to be parsed. - * The returned value is cached. - * - * @return \SimpleXMLElement XML element representating the workbook.xml.rels file - */ - protected function getWorkbookXMLRelsAsXMLElement() - { - if (!$this->workbookXMLRelsAsXMLElement) { - $this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::WORKBOOK_XML_RELS_FILE_PATH, - self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS - ); - } - - return $this->workbookXMLRelsAsXMLElement; - } - - /** - * Returns a representation of the workbook.xml file, ready to be parsed. - * The returned value is cached. - * - * @return \SimpleXMLElement XML element representating the workbook.xml.rels file - */ - protected function getWorkbookXMLAsXMLElement() - { - if (!$this->workbookXMLAsXMLElement) { - $this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::WORKBOOK_XML_FILE_PATH, - self::MAIN_NAMESPACE_FOR_WORKBOOK_XML - ); - } - - return $this->workbookXMLAsXMLElement; - } - - /** - * Loads the contents of the given file in an XML parser and register the given XPath namespace. - * - * @param string $xmlFilePath The path of the XML file inside the XLSX file - * @param string $mainNamespace The main XPath namespace to register - * @return \SimpleXMLElement The XML element representing the file - */ - protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace) - { - $xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath); - - $xmlElement = new \SimpleXMLElement($xmlContents); - $xmlElement->registerXPathNamespace('ns', $mainNamespace); - - return $xmlElement; - } - - /** - * Returns whether another worksheet exists after the current worksheet. - * The order is determined by the order of appearance in the [Content_Types].xml file. - * - * @param Worksheet|null $currentWorksheet The worksheet being currently read or null if reading has not started yet - * @param Worksheet[] $allWorksheets A list of all worksheets in the XLSX file. Must contain at least one worksheet - * @return bool Whether another worksheet exists after the current sheet - */ - public function hasNextWorksheet($currentWorksheet, $allWorksheets) - { - return ($currentWorksheet === null || ($currentWorksheet->getWorksheetIndex() + 1 < count($allWorksheets))); - } -} diff --git a/src/Spout/Reader/Internal/XLSX/Worksheet.php b/src/Spout/Reader/Internal/XLSX/Worksheet.php deleted file mode 100644 index 5145c28..0000000 --- a/src/Spout/Reader/Internal/XLSX/Worksheet.php +++ /dev/null @@ -1,58 +0,0 @@ -externalSheet = $externalSheet; - $this->worksheetIndex = $worksheetIndex; - $this->dataXmlFilePath = $dataXmlFilePath; - } - - /** - * @return string Path of the XML file containing the worksheet data, - * without the leading slash. - */ - public function getDataXmlFilePath() - { - return ltrim($this->dataXmlFilePath, '/'); - } - - /** - * @return \Box\Spout\Reader\Sheet The "external" sheet - */ - public function getExternalSheet() - { - return $this->externalSheet; - } - - /** - * @return int - */ - public function getWorksheetIndex() - { - return $this->worksheetIndex; - } -} diff --git a/src/Spout/Reader/ReaderFactory.php b/src/Spout/Reader/ReaderFactory.php index 9766978..800ac89 100644 --- a/src/Spout/Reader/ReaderFactory.php +++ b/src/Spout/Reader/ReaderFactory.php @@ -19,7 +19,7 @@ class ReaderFactory * This creates an instance of the appropriate reader, given the type of the file to be read * * @param string $readerType Type of the reader to instantiate - * @return \Box\Spout\Reader\CSV|\Box\Spout\Reader\XLSX + * @return \Box\Spout\Reader\CSV\Reader|\Box\Spout\Reader\XLSX\Reader * @throws \Box\Spout\Common\Exception\UnsupportedTypeException */ public static function create($readerType) @@ -28,10 +28,10 @@ class ReaderFactory switch ($readerType) { case Type::CSV: - $reader = new CSV(); + $reader = new CSV\Reader(); break; case Type::XLSX: - $reader = new XLSX(); + $reader = new XLSX\Reader(); break; default: throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType); diff --git a/src/Spout/Reader/ReaderFactory2.php b/src/Spout/Reader/ReaderFactory2.php deleted file mode 100644 index 74f5d19..0000000 --- a/src/Spout/Reader/ReaderFactory2.php +++ /dev/null @@ -1,44 +0,0 @@ -setGlobalFunctionsHelper(new GlobalFunctionsHelper()); - - return $reader; - } -} diff --git a/src/Spout/Reader/ReaderInterface.php b/src/Spout/Reader/ReaderInterface.php index 7253821..8ecde30 100644 --- a/src/Spout/Reader/ReaderInterface.php +++ b/src/Spout/Reader/ReaderInterface.php @@ -20,26 +20,12 @@ interface ReaderInterface public function open($filePath); /** - * Returns whether all rows have been read (i.e. if we are at the end of the file). - * To know if the end of file has been reached, it uses a buffer. If the buffer is - * empty (meaning, nothing has been read or previous read line has been consumed), then - * it reads the next line, store it in the buffer for the next time or flip a variable if - * the end of file has been reached. + * Returns an iterator to iterate over sheets. * - * @return bool - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first + * @return \Iterator To iterate over sheets + * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader */ - public function hasNextRow(); - - /** - * Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by - * actually reading the file. - * - * @return array Array that contains the data for the read row - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first - * @throws \Box\Spout\Reader\Exception\EndOfFileReachedException - */ - public function nextRow(); + public function getSheetIterator(); /** * Closes the reader, preventing any additional reading diff --git a/src/Spout/Reader/ReaderInterface2.php b/src/Spout/Reader/ReaderInterface2.php deleted file mode 100644 index a61c83c..0000000 --- a/src/Spout/Reader/ReaderInterface2.php +++ /dev/null @@ -1,35 +0,0 @@ -id = $sheetId; - $this->index = $sheetIndex; - $this->name = $sheetName; - } - - /** - * @return int ID of the sheet - */ - public function getId() - { - return $this->id; - } - - /** - * @return int Index of the sheet, based on order of creation (zero-based) - */ - public function getIndex() - { - return $this->index; - } - - /** - * @return string Name of the sheet - */ - public function getName() - { - return $this->name; - } -} diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php deleted file mode 100644 index 7f176fe..0000000 --- a/src/Spout/Reader/XLSX.php +++ /dev/null @@ -1,394 +0,0 @@ -tempFolder = $tempFolder; - return $this; - } - - /** - * Opens the file at the given file path to make it ready to be read. - * It also parses the sharedStrings.xml file to get all the shared strings available in memory - * and fetches all the available worksheets. - * - * @param string $filePath Path of the file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read - * @throws Exception\NoWorksheetsFoundException If there are no worksheets in the file - */ - protected function openReader($filePath) - { - $this->filePath = $filePath; - $this->zip = new \ZipArchive(); - - if ($this->zip->open($filePath) === true) { - $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); - - if ($this->sharedStringsHelper->hasSharedStrings()) { - // Extracts all the strings from the worksheets for easy access in the future - $this->sharedStringsHelper->extractSharedStrings(); - } - - // Fetch all available worksheets - $this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper); - $this->worksheets = $this->worksheetHelper->getWorksheets($filePath); - - if (count($this->worksheets) === 0) { - throw new NoWorksheetsFoundException('The file must contain at least one worksheet.'); - } - } else { - throw new IOException('Could not open ' . $filePath . ' for reading.'); - } - } - - /** - * Returns whether another worksheet exists after the current worksheet. - * - * @return bool Whether another worksheet exists after the current worksheet. - * @throws Exception\ReaderNotOpenedException If the stream was not opened first - */ - public function hasNextSheet() - { - if (!$this->isStreamOpened) { - throw new ReaderNotOpenedException('Stream should be opened first.'); - } - - return $this->worksheetHelper->hasNextWorksheet($this->currentWorksheet, $this->worksheets); - } - - /** - * Moves the pointer to the current worksheet. - * Moving to another worksheet will stop the reading in the current worksheet. - * - * @return \Box\Spout\Reader\Sheet The next sheet - * @throws Exception\ReaderNotOpenedException If the stream was not opened first - * @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read - */ - public function nextSheet() - { - if (!$this->hasNextSheet()) { - throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.'); - } - - if ($this->currentWorksheet === null) { - $nextWorksheet = $this->worksheets[0]; - } else { - $currentWorksheetIndex = $this->currentWorksheet->getWorksheetIndex(); - $nextWorksheet = $this->worksheets[$currentWorksheetIndex + 1]; - } - - $this->initXmlReaderForWorksheetData($nextWorksheet); - $this->currentWorksheet = $nextWorksheet; - - // make sure that we are ready to read more rows - $this->hasReachedEndOfFile = false; - $this->emptyRowDataBuffer(); - - return $this->currentWorksheet->getExternalSheet(); - } - - /** - * Initializes the XMLReader object that reads worksheet data for the given worksheet. - * If another worksheet was being read, it closes the reader before reopening it for the new worksheet. - * The XMLReader is configured to be safe from billion laughs attack. - * - * @param Internal\XLSX\Worksheet $worksheet The worksheet to initialize the XMLReader with - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the worksheet data XML cannot be read - */ - protected function initXmlReaderForWorksheetData($worksheet) - { - // if changing worksheet and the XMLReader was initialized for the current worksheet - if ($worksheet != $this->currentWorksheet && $this->xmlReader) { - $this->xmlReader->close(); - } else if (!$this->xmlReader) { - $this->xmlReader = new \XMLReader(); - } - - $worksheetDataXMLFilePath = $worksheet->getDataXmlFilePath(); - - $worksheetDataFilePath = 'zip://' . $this->filePath . '#' . $worksheetDataXMLFilePath; - if ($this->xmlReader->open($worksheetDataFilePath, null, LIBXML_NONET) === false) { - throw new IOException('Could not open "' . $worksheetDataXMLFilePath . '".'); - } - } - - /** - * Reads and returns data of the line that comes after the last read line, on the current worksheet. - * Empty rows will be skipped. - * - * @return array|null Array that contains the data for the read line or null at the end of the file - * @throws \Box\Spout\Common\Exception\BadUsageException If the pointer to the current worksheet has not been set - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found - */ - protected function read() - { - if (!$this->currentWorksheet) { - throw new BadUsageException('You must call nextSheet() before calling hasNextRow() or nextRow()'); - } - - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - $isInsideRowTag = false; - $rowData = []; - - while ($this->xmlReader->read()) { - if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'dimension') { - // Read dimensions of the worksheet - $dimensionRef = $this->xmlReader->getAttribute('ref'); // returns 'A1:M13' for instance (or 'A1' for empty sheet) - if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { - $lastCellIndex = $matches[1]; - $this->numberOfColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; - } - - } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'row') { - // Start of the row description - $isInsideRowTag = true; - - // Read spans info if present - $numberOfColumnsForRow = $this->numberOfColumns; - $spans = $this->xmlReader->getAttribute('spans'); // returns '1:5' for instance - if ($spans) { - list(, $numberOfColumnsForRow) = explode(':', $spans); - $numberOfColumnsForRow = intval($numberOfColumnsForRow); - } - $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - - } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'c') { - // Start of a cell description - $currentCellIndex = $this->xmlReader->getAttribute('r'); - $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); - - $node = $this->xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node, $escaper); - - } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === 'row') { - // End of the row description - // If needed, we fill the empty cells - $rowData = ($this->numberOfColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - break; - } - } - - // no data means "end of file" - return ($rowData !== []) ? $rowData : null; - } - - /** - * Returns the cell's string value from a node's nested value node - * - * @param \DOMNode $node - * @return string The value associated with the cell - */ - protected function getVNodeValue($node) - { - // for cell types having a "v" tag containing the value. - // if not, the returned value should be empty string. - $vNode = $node->getElementsByTagName('v')->item(0); - if ($vNode !== null) { - return $vNode->nodeValue; - } - return ""; - } - - /** - * Returns the cell String value where string is inline. - * - * @param \DOMNode $node - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatInlineStringCellValue($node, $escaper) - { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell String value from shared-strings file using nodeValue index. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatSharedStringCellValue($nodeValue, $escaper) - { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($nodeValue); - $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell String value, where string is stored in value node. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatStrCellValue($nodeValue, $escaper) - { - $escapedCellValue = trim($nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell Numeric value from string of nodeValue. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return int|float The value associated with the cell - */ - protected function formatNumericCellValue($nodeValue) - { - $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); - return $cellValue; - } - - /** - * Returns the cell Boolean value from a specific node's Value. - * - * @param string $nodeValue - * @return bool The value associated with the cell - */ - protected function formatBooleanCellValue($nodeValue) - { - // !! is similar to boolval() - $cellValue = !!$nodeValue; - return $cellValue; - } - - /** - * Returns a cell's PHP Date value, associated to the given stored nodeValue. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return DateTime|null The value associated with the cell (null when the cell has an error) - */ - protected function formatDateCellValue($nodeValue) - { - try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) - $cellValue = new \DateTime($nodeValue); - return $cellValue; - } catch (\Exception $e) { - return null; - } - } - - /** - * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. - * - * @param \DOMNode $node - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string|int|float|bool|null The value associated with the cell (null when the cell has an error) - */ - protected function getCellValue($node, $escaper) - { - // Default cell type is "n" - $cellType = $node->getAttribute('t') ?: 'n'; - $vNodeValue = $this->getVNodeValue($node); - if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) { - return $vNodeValue; - } - - switch ($cellType) - { - case self::CELL_TYPE_INLINE_STRING: - return $this->formatInlineStringCellValue($node, $escaper); - case self::CELL_TYPE_SHARED_STRING: - return $this->formatSharedStringCellValue($vNodeValue, $escaper); - case self::CELL_TYPE_STR: - return $this->formatStrCellValue($vNodeValue, $escaper); - case self::CELL_TYPE_BOOLEAN: - return $this->formatBooleanCellValue($vNodeValue); - case self::CELL_TYPE_NUMERIC: - return $this->formatNumericCellValue($vNodeValue); - case self::CELL_TYPE_DATE: - return $this->formatDateCellValue($vNodeValue); - default: - return null; - } - } - - /** - * Closes the reader. To be used after reading the file. - * - * @return void - */ - protected function closeReader() - { - if ($this->xmlReader) { - $this->xmlReader->close(); - } - - if ($this->zip) { - $this->zip->close(); - } - - $this->sharedStringsHelper->cleanup(); - } -} diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php index 68712cc..f24d185 100644 --- a/src/Spout/Reader/XLSX/Reader.php +++ b/src/Spout/Reader/XLSX/Reader.php @@ -3,7 +3,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Exception\IOException; -use Box\Spout\Reader\AbstractReader2; +use Box\Spout\Reader\AbstractReader; use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper; /** @@ -12,7 +12,7 @@ use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper; * * @package Box\Spout\Reader\XLSX */ -class Reader extends AbstractReader2 +class Reader extends AbstractReader { /** @var string Temporary folder where the temporary files will be created */ protected $tempFolder; @@ -70,7 +70,7 @@ class Reader extends AbstractReader2 * * @return SheetIterator To iterate over sheets */ - public function getSheetIterator() + public function getConcreteSheetIterator() { return $this->sheetIterator; } diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index de55b94..922c61b 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -3,7 +3,7 @@ namespace Box\Spout\Reader\CSV; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory2; +use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; /** @@ -22,7 +22,17 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testOpenShouldThrowExceptionIfFileDoesNotExist() { - ReaderFactory2::create(Type::CSV)->open('/path/to/fake/file.csv'); + ReaderFactory::create(Type::CSV)->open('/path/to/fake/file.csv'); + } + + /** + * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException + * + * @return void + */ + public function testOpenShouldThrowExceptionIfTryingToReadBeforeOpeningReader() + { + ReaderFactory::create(Type::CSV)->getSheetIterator(); } /** @@ -39,7 +49,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $resourcePath = $this->getResourcePath('csv_standard.csv'); - $reader = ReaderFactory2::create(Type::CSV); + $reader = ReaderFactory::create(Type::CSV); $reader->setGlobalFunctionsHelper($helperStub); $reader->open($resourcePath); } @@ -162,7 +172,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory2::create(Type::CSV); + $reader = ReaderFactory::create(Type::CSV); $reader->setFieldDelimiter($fieldDelimiter); $reader->setFieldEnclosure($fieldEnclosure); diff --git a/tests/Spout/Reader/CSVTest.php b/tests/Spout/Reader/CSVTest.php deleted file mode 100644 index 8d02849..0000000 --- a/tests/Spout/Reader/CSVTest.php +++ /dev/null @@ -1,208 +0,0 @@ -open('/path/to/fake/file.csv'); - } - - /** - * @expectedException \Box\Spout\Common\Exception\IOException - * - * @return void - */ - public function testOpenShouldThrowExceptionIfFileNotReadable() - { - $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') - ->setMethods(['is_readable']) - ->getMock(); - $helperStub->method('is_readable')->willReturn(false); - - $resourcePath = $this->getResourcePath('csv_standard.csv'); - - $reader = ReaderFactory::create(Type::CSV); - $reader->setGlobalFunctionsHelper($helperStub); - $reader->open($resourcePath); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException - * - * @return void - */ - public function testReadShouldThrowExceptionIfReadBeforeReaderOpened() - { - $reader = ReaderFactory::create(Type::CSV); - $reader->hasNextRow(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\EndOfFileReachedException - * - * @return void - */ - public function testReadShouldThrowExceptionIfNextRowCalledAfterReadingDone() - { - $resourcePath = $this->getResourcePath('csv_standard.csv'); - - $reader = ReaderFactory::create(Type::CSV); - $reader->open($resourcePath); - - while ($reader->hasNextRow()) { - $reader->nextRow(); - } - - $reader->nextRow(); - } - - - /** - * @return void - */ - public function testReadStandardCSV() - { - $allRows = $this->getAllRowsForFile('csv_standard.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldNotStopAtCommaIfEnclosed() - { - $allRows = $this->getAllRowsForFile('csv_with_comma_enclosed.csv'); - $this->assertEquals('This is, a comma', $allRows[0][0]); - } - - /** - * @return void - */ - public function testReadShouldKeepEmptyCells() - { - $allRows = $this->getAllRowsForFile('csv_with_empty_cells.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', '', 'csv--23'], - ['csv--31', 'csv--32', ''], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipEmptyLines() - { - $allRows = $this->getAllRowsForFile('csv_with_empty_line.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldHaveTheRightNumberOfCells() - { - $allRows = $this->getAllRowsForFile('csv_with_different_cells_number.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22'], - ['csv--31'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportCustomFieldDelimiter() - { - $allRows = $this->getAllRowsForFile('csv_delimited_with_pipes.csv', '|'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportCustomFieldEnclosure() - { - $allRows = $this->getAllRowsForFile('csv_text_enclosed_with_pound.csv', ',', '#'); - $this->assertEquals('This is, a comma', $allRows[0][0]); - } - - /** - * @return void - */ - public function testReadShouldSkipUtf8Bom() - { - $allRows = $this->getAllRowsForFile('csv_with_utf8_bom.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @param string $fileName - * @param string|void $fieldDelimiter - * @param string|void $fieldEnclosure - * @return array All the read rows the given file - */ - private function getAllRowsForFile($fileName, $fieldDelimiter = ",", $fieldEnclosure = '"') - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::CSV); - $reader->setFieldDelimiter($fieldDelimiter); - $reader->setFieldEnclosure($fieldEnclosure); - - $reader->open($resourcePath); - - while ($reader->hasNextRow()) { - $allRows[] = $reader->nextRow(); - } - - $reader->close(); - - return $allRows; - } -} diff --git a/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php b/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php deleted file mode 100644 index 8851b33..0000000 --- a/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php +++ /dev/null @@ -1,60 +0,0 @@ - 1, 3 => 3]; - $filledArray = CellHelper::fillMissingArrayIndexes($arrayToFill, 'FILL'); - - $expectedFilledArray = ['FILL', 1, 'FILL', 3]; - $this->assertEquals($expectedFilledArray, $filledArray); - } - - /** - * @return array - */ - public function dataProviderForTestGetColumnIndexFromCellIndex() - { - return [ - ['A1', 0], - ['Z3', 25], - ['AA5', 26], - ['AB24', 27], - ['BC5', 54], - ['BCZ99', 1455], - ]; - } - - /** - * @dataProvider dataProviderForTestGetColumnIndexFromCellIndex - * - * @param string $cellIndex - * @param int $expectedColumnIndex - * @return void - */ - public function testGetColumnIndexFromCellIndex($cellIndex, $expectedColumnIndex) - { - $this->assertEquals($expectedColumnIndex, CellHelper::getColumnIndexFromCellIndex($cellIndex)); - } - - /** - * @expectedException \Box\Spout\Common\Exception\InvalidArgumentException - * - * @return void - */ - public function testGetColumnIndexFromCellIndexShouldThrowIfInvalidCellIndex() - { - CellHelper::getColumnIndexFromCellIndex('InvalidCellIndex'); - } -} diff --git a/tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php b/tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php deleted file mode 100644 index 18b1c74..0000000 --- a/tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php +++ /dev/null @@ -1,99 +0,0 @@ -getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory') - ->disableOriginalConstructor() - ->setMethods(['getMemoryLimitInKB']) - ->getMock(); - - $factoryStub->method('getMemoryLimitInKB')->willReturn($memoryLimitInKB); - - \ReflectionHelper::setStaticValue('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub); - - $strategy = $factoryStub->getBestCachingStrategy($sharedStringsUniqueCount, null); - - $fullExpectedStrategyClassName = 'Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\\' . $expectedStrategyClassName; - $this->assertEquals($fullExpectedStrategyClassName, get_class($strategy)); - - $strategy->clearCache(); - \ReflectionHelper::reset(); - } - - /** - * @return array - */ - public function dataProviderForTestGetMemoryLimitInKB() - { - return [ - ['-1', -1], - ['invalid', -1], - ['1024B', 1], - ['128K', 128], - ['256KB', 256], - ['512M', 512 * 1024], - ['2MB', 2 * 1024], - ['1G', 1 * 1024 * 1024], - ['10GB', 10 * 1024 * 1024], - ['2T', 2 * 1024 * 1024 * 1024], - ['5TB', 5 * 1024 * 1024 * 1024], - ]; - } - - /** - * @dataProvider dataProviderForTestGetMemoryLimitInKB - * - * @param string $memoryLimitFormatted - * @param float $expectedMemoryLimitInKB - * @return void - */ - public function testGetMemoryLimitInKB($memoryLimitFormatted, $expectedMemoryLimitInKB) - { - /** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */ - $factoryStub = $this - ->getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory') - ->disableOriginalConstructor() - ->setMethods(['getMemoryLimitFromIni']) - ->getMock(); - - $factoryStub->method('getMemoryLimitFromIni')->willReturn($memoryLimitFormatted); - - $memoryLimitInKB = \ReflectionHelper::callMethodOnObject($factoryStub, 'getMemoryLimitInKB'); - - $this->assertEquals($expectedMemoryLimitInKB, $memoryLimitInKB); - } -} diff --git a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php b/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php deleted file mode 100644 index 82631bc..0000000 --- a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php +++ /dev/null @@ -1,112 +0,0 @@ -getResourcePath('one_sheet_with_shared_strings.xlsx'); - $this->sharedStringsHelper = new SharedStringsHelper($resourcePath); - } - - /** - * @return void - */ - public function tearDown() - { - $this->sharedStringsHelper->cleanup(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\SharedStringNotFoundException - * @return void - */ - public function testGetStringAtIndexShouldThrowExceptionIfStringNotFound() - { - $this->sharedStringsHelper->extractSharedStrings(); - $this->sharedStringsHelper->getStringAtIndex(PHP_INT_MAX); - } - - /** - * @return void - */ - public function testGetStringAtIndexShouldReturnTheCorrectStringIfFound() - { - $this->sharedStringsHelper->extractSharedStrings(); - - $sharedString = $this->sharedStringsHelper->getStringAtIndex(0); - $this->assertEquals('s1--A1', $sharedString); - - $sharedString = $this->sharedStringsHelper->getStringAtIndex(24); - $this->assertEquals('s1--E5', $sharedString); - - $usedCachingStrategy = \ReflectionHelper::getValueOnObject($this->sharedStringsHelper, 'cachingStrategy'); - $this->assertTrue($usedCachingStrategy instanceof InMemoryStrategy); - } - - /** - * @return void - */ - public function testGetStringAtIndexShouldWorkWithMultilineStrings() - { - $resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx'); - $sharedStringsHelper = new SharedStringsHelper($resourcePath); - - $sharedStringsHelper->extractSharedStrings(); - - $sharedString = $sharedStringsHelper->getStringAtIndex(0); - $this->assertEquals("s1\nA1", $sharedString); - - $sharedString = $sharedStringsHelper->getStringAtIndex(24); - $this->assertEquals("s1\nE5", $sharedString); - - $sharedStringsHelper->cleanup(); - } - - /** - * @return void - */ - public function testGetStringAtIndexWithFileBasedStrategy() - { - // force the file-based strategy by setting no memory limit - $originalMemoryLimit = ini_get('memory_limit'); - ini_set('memory_limit', '-1'); - - $resourcePath = $this->getResourcePath('sheet_with_lots_of_shared_strings.xlsx'); - $sharedStringsHelper = new SharedStringsHelper($resourcePath); - - $sharedStringsHelper->extractSharedStrings(); - - $sharedString = $sharedStringsHelper->getStringAtIndex(0); - $this->assertEquals('str', $sharedString); - - $sharedString = $sharedStringsHelper->getStringAtIndex(CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE + 1); - $this->assertEquals('str', $sharedString); - - $usedCachingStrategy = \ReflectionHelper::getValueOnObject($sharedStringsHelper, 'cachingStrategy'); - $this->assertTrue($usedCachingStrategy instanceof FileBasedStrategy); - - $sharedStringsHelper->cleanup(); - - ini_set('memory_limit', $originalMemoryLimit); - } -} diff --git a/tests/Spout/Reader/SheetTest.php b/tests/Spout/Reader/SheetTest.php deleted file mode 100644 index 5f6e02d..0000000 --- a/tests/Spout/Reader/SheetTest.php +++ /dev/null @@ -1,52 +0,0 @@ -openFileAndReturnSheets('two_sheets_with_custom_names.xlsx'); - - $this->assertEquals('CustomName1', $sheets[0]->getName()); - $this->assertEquals(0, $sheets[0]->getIndex()); - $this->assertEquals(1, $sheets[0]->getId()); - - $this->assertEquals('CustomName2', $sheets[1]->getName()); - $this->assertEquals(1, $sheets[1]->getIndex()); - $this->assertEquals(2, $sheets[1]->getId()); - } - - /** - * @param string $fileName - * @return Sheet[] - */ - private function openFileAndReturnSheets($fileName) - { - $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - $sheets = []; - while ($reader->hasNextSheet()) { - $sheets[] = $reader->nextSheet(); - } - - $reader->close(); - - return $sheets; - } -} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index c5fb583..9643d54 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -4,7 +4,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory2; +use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; /** @@ -284,7 +284,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory2::create(Type::XLSX); + $reader = ReaderFactory::create(Type::XLSX); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/Spout/Reader/XLSX/SheetTest.php b/tests/Spout/Reader/XLSX/SheetTest.php index c9449f4..8f3f9df 100644 --- a/tests/Spout/Reader/XLSX/SheetTest.php +++ b/tests/Spout/Reader/XLSX/SheetTest.php @@ -3,7 +3,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory2; +use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; /** @@ -38,7 +38,7 @@ class SheetTest extends \PHPUnit_Framework_TestCase private function openFileAndReturnSheets($fileName) { $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory2::create(Type::XLSX); + $reader = ReaderFactory::create(Type::XLSX); $reader->open($resourcePath); $sheets = []; diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php deleted file mode 100644 index 531bdb8..0000000 --- a/tests/Spout/Reader/XLSXTest.php +++ /dev/null @@ -1,332 +0,0 @@ -getAllRowsForFile($filePath); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException - * - * @return void - */ - public function testHasNextSheetShouldThrowExceptionIfReaderNotOpened() - { - $reader = ReaderFactory::create(Type::XLSX); - $reader->hasNextSheet(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\EndOfWorksheetsReachedException - * - * @return void - */ - public function testNextSheetShouldThrowExceptionIfNoMoreSheetsToRead() - { - $fileName = 'one_sheet_with_shared_strings.xlsx'; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - while ($reader->hasNextSheet()) { - $reader->nextSheet(); - } - - $reader->nextSheet(); - } - - /** - * @return array - */ - public function dataProviderForTestReadForAllWorksheets() - { - return [ - ['one_sheet_with_shared_strings.xlsx', 5, 5], - ['one_sheet_with_inline_strings.xlsx', 5, 5], - ['two_sheets_with_shared_strings.xlsx', 10, 5], - ['two_sheets_with_inline_strings.xlsx', 10, 5] - ]; - } - - /** - * @dataProvider dataProviderForTestReadForAllWorksheets - * - * @param string $resourceName - * @param int $expectedNumOfRows - * @param int $expectedNumOfCellsPerRow - * @return void - */ - public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow) - { - $allRows = $this->getAllRowsForFile($resourceName); - - $this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows"); - foreach ($allRows as $row) { - $this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row"); - } - } - - /** - * @return void - */ - public function testReadShouldSupportFilesWithoutSharedStringsFile() - { - $allRows = $this->getAllRowsForFile('sheet_with_no_shared_strings_file.xlsx'); - - $expectedRows = [ - [10, 11], - [20, 21], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportAllCellTypes() - { - $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); - - $expectedRows = [ - [ - 's1--A1', 's1--A2', - false, true, - \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'), - \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), - 10, 10.43, - null, - 'weird string', // valid 'str' string - null, // invalid date - ], - ['', '', '', '', '', '', '', '', ''], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldKeepEmptyCellsAtTheEndIfDimensionsSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_without_dimensions_but_spans_and_empty_cells.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be 2 rows'); - foreach ($allRows as $row) { - $this->assertEquals(5, count($row), 'There should be 5 cells for every row, because empty rows should be preserved'); - } - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A2', 's1--B2', 's1--C2', '', ''], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldKeepEmptyCellsAtTheEndIfNoDimensionsButSpansSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be 2 rows'); - $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); - $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A2', 's1--B2', 's1--C2'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipEmptyCellsAtTheEndIfDimensionsNotSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be 2 rows'); - $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); - $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A2', 's1--B2', 's1--C2'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipEmptyRows() - { - $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportEmptySharedString() - { - $allRows = $this->getAllRowsForFile('sheet_with_empty_shared_string.xlsx'); - - $expectedRows = [ - ['s1--A1', '', 's1--C1'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldPreserveSpaceIfSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_with_preserve_space_shared_strings.xlsx'); - - $expectedRows = [ - [' s1--A1', 's1--B1 ', ' s1--C1 '], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipPronunciationData() - { - $allRows = $this->getAllRowsForFile('sheet_with_pronunciation.xlsx'); - - $expectedRow = ['名前', '一二三四']; - $this->assertEquals($expectedRow, $allRows[0], 'Pronunciation data should be removed.'); - } - - - /** - * @return array - */ - public function dataProviderForTestReadShouldBeProtectedAgainstAttacks() - { - return [ - ['attack_billion_laughs.xlsx'], - ['attack_quadratic_blowup.xlsx'], - ]; - } - - /** - * @dataProvider dataProviderForTestReadShouldBeProtectedAgainstAttacks - * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) - * - * @param string $fileName - * @return void - */ - public function testReadShouldBeProtectedAgainstAttacks($fileName) - { - $startTime = microtime(true); - - try { - $this->getAllRowsForFile($fileName); - $this->fail('An exception should have been thrown'); - } catch (IOException $exception) { - $duration = microtime(true) - $startTime; - $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); - - $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB - $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); - } - } - - /** - * @return void - */ - public function testReadShouldBeAbleToProcessEmptySheets() - { - $allRows = $this->getAllRowsForFile('sheet_with_no_cells.xlsx'); - $this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.'); - } - - /** - * @return void - */ - public function testReadShouldSkipFormulas() - { - $allRows = $this->getAllRowsForFile('sheet_with_formulas.xlsx'); - - $expectedRows = [ - ['val1', 'val2', 'total1', 'total2'], - [10, 20, 30, 21], - [11, 21, 32, 41], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @param string $fileName - * @return array All the read rows the given file - */ - private function getAllRowsForFile($fileName) - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - while ($reader->hasNextSheet()) { - $reader->nextSheet(); - - while ($reader->hasNextRow()) { - $allRows[] = $reader->nextRow(); - } - } - - $reader->close(); - - return $allRows; - } -} From c672558a186357e6d290c16267d745efad993322 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 20 Jul 2015 22:32:06 -0700 Subject: [PATCH 3/7] Update Writer folder structure to match Reader new structure --- src/Spout/Writer/AbstractWriter.php | 8 +++---- src/Spout/Writer/{CSV.php => CSV/Writer.php} | 13 ++++++------ src/Spout/Writer/WriterFactory.php | 6 +++--- src/Spout/Writer/WriterInterface.php | 8 +++---- .../XLSX => XLSX/Helper}/CellHelper.php | 4 ++-- .../XLSX => XLSX/Helper}/FileSystemHelper.php | 6 +++--- .../Helper}/SharedStringsHelper.php | 4 ++-- .../XLSX => XLSX/Helper}/ZipHelper.php | 4 ++-- .../XLSX => XLSX/Internal}/Workbook.php | 18 ++++++++-------- .../XLSX => XLSX/Internal}/Worksheet.php | 16 +++++++------- src/Spout/Writer/{ => XLSX}/Sheet.php | 6 +++--- .../Writer/{XLSX.php => XLSX/Writer.php} | 21 ++++++++++--------- .../{CSVTest.php => CSV/WriterTest.php} | 13 ++++++------ .../XLSX => XLSX/Helper}/CellHelperTest.php | 4 ++-- tests/Spout/Writer/{ => XLSX}/SheetTest.php | 7 ++++--- .../{XLSXTest.php => XLSX/WriterTest.php} | 19 +++++++++-------- 16 files changed, 81 insertions(+), 76 deletions(-) rename src/Spout/Writer/{CSV.php => CSV/Writer.php} (93%) rename src/Spout/Writer/{Helper/XLSX => XLSX/Helper}/CellHelper.php (96%) rename src/Spout/Writer/{Helper/XLSX => XLSX/Helper}/FileSystemHelper.php (99%) rename src/Spout/Writer/{Helper/XLSX => XLSX/Helper}/SharedStringsHelper.php (97%) rename src/Spout/Writer/{Helper/XLSX => XLSX/Helper}/ZipHelper.php (95%) rename src/Spout/Writer/{Internal/XLSX => XLSX/Internal}/Workbook.php (94%) rename src/Spout/Writer/{Internal/XLSX => XLSX/Internal}/Worksheet.php (92%) rename src/Spout/Writer/{ => XLSX}/Sheet.php (91%) rename src/Spout/Writer/{XLSX.php => XLSX/Writer.php} (93%) rename tests/Spout/Writer/{CSVTest.php => CSV/WriterTest.php} (93%) rename tests/Spout/Writer/{Helper/XLSX => XLSX/Helper}/CellHelperTest.php (97%) rename tests/Spout/Writer/{ => XLSX}/SheetTest.php (94%) rename tests/Spout/Writer/{XLSXTest.php => XLSX/WriterTest.php} (96%) diff --git a/src/Spout/Writer/AbstractWriter.php b/src/Spout/Writer/AbstractWriter.php index 68d452f..e17e16a 100644 --- a/src/Spout/Writer/AbstractWriter.php +++ b/src/Spout/Writer/AbstractWriter.php @@ -69,7 +69,7 @@ abstract class AbstractWriter implements WriterInterface * By using this method, the data will be written to a file. * * @param string $outputFilePath Path of the output file that will contain the data - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Common\Exception\IOException If the writer cannot be opened or if the given path is not writable */ public function openToFile($outputFilePath) @@ -92,7 +92,7 @@ abstract class AbstractWriter implements WriterInterface * @codeCoverageIgnore * * @param string $outputFileName Name of the output file that will contain the data. If a path is passed in, only the file name will be kept - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Common\Exception\IOException If the writer cannot be opened */ public function openToBrowser($outputFileName) @@ -144,7 +144,7 @@ abstract class AbstractWriter implements WriterInterface * If empty, no data is added (i.e. not even as a blank row) * Example: $dataRow = ['data1', 1234, null, '', 'data5', false]; * - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If this function is called before opening the writer * @throws \Box\Spout\Common\Exception\IOException If unable to write data */ @@ -173,7 +173,7 @@ abstract class AbstractWriter implements WriterInterface * ['data21', 'data22', null, false], * ]; * - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Common\Exception\InvalidArgumentException If the input param is not valid * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If this function is called before opening the writer * @throws \Box\Spout\Common\Exception\IOException If unable to write data diff --git a/src/Spout/Writer/CSV.php b/src/Spout/Writer/CSV/Writer.php similarity index 93% rename from src/Spout/Writer/CSV.php rename to src/Spout/Writer/CSV/Writer.php index 59e37a7..d37ad5b 100644 --- a/src/Spout/Writer/CSV.php +++ b/src/Spout/Writer/CSV/Writer.php @@ -1,16 +1,17 @@ EOD; - /** @var \Box\Spout\Writer\Sheet The "external" sheet */ + /** @var \Box\Spout\Writer\XLSX\Sheet The "external" sheet */ protected $externalSheet; /** @var string Path to the XML file that will contain the sheet data */ protected $worksheetFilePath; - /** @var \Box\Spout\Writer\Helper\XLSX\SharedStringsHelper Helper to write shared strings */ + /** @var \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper Helper to write shared strings */ protected $sharedStringsHelper; /** @var bool Whether inline or shared strings should be used */ @@ -42,9 +42,9 @@ EOD; protected $lastWrittenRowIndex = 0; /** - * @param \Box\Spout\Writer\Sheet $externalSheet The associated "external" sheet + * @param \Box\Spout\Writer\XLSX\Sheet $externalSheet The associated "external" sheet * @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored - * @param \Box\Spout\Writer\Helper\XLSX\SharedStringsHelper $sharedStringsHelper Helper for shared strings + * @param \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper $sharedStringsHelper Helper for shared strings * @param bool $shouldUseInlineStrings Whether inline or shared strings should be used * @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing */ @@ -76,7 +76,7 @@ EOD; } /** - * @return \Box\Spout\Writer\Sheet The "external" sheet + * @return \Box\Spout\Writer\XLSX\Sheet The "external" sheet */ public function getExternalSheet() { diff --git a/src/Spout/Writer/Sheet.php b/src/Spout/Writer/XLSX/Sheet.php similarity index 91% rename from src/Spout/Writer/Sheet.php rename to src/Spout/Writer/XLSX/Sheet.php index 7f8a874..46380a2 100644 --- a/src/Spout/Writer/Sheet.php +++ b/src/Spout/Writer/XLSX/Sheet.php @@ -1,12 +1,12 @@ book->getWorksheets(); - /** @var Internal\XLSX\Worksheet $worksheet */ + /** @var Internal\Worksheet $worksheet */ foreach ($worksheets as $worksheet) { $externalSheets[] = $worksheet->getExternalSheet(); } diff --git a/tests/Spout/Writer/CSVTest.php b/tests/Spout/Writer/CSV/WriterTest.php similarity index 93% rename from tests/Spout/Writer/CSVTest.php rename to tests/Spout/Writer/CSV/WriterTest.php index ef71b87..83e2e03 100644 --- a/tests/Spout/Writer/CSVTest.php +++ b/tests/Spout/Writer/CSV/WriterTest.php @@ -1,16 +1,17 @@ writeToCsvFileAndReturnWrittenContent($allRows, 'csv_with_utf8_bom.csv'); - $this->assertContains(CSV::UTF8_BOM, $writtenContent, 'The CSV file should contain a UTF-8 BOM'); + $this->assertContains(Writer::UTF8_BOM, $writtenContent, 'The CSV file should contain a UTF-8 BOM'); } /** @@ -161,6 +162,6 @@ class CSVTest extends \PHPUnit_Framework_TestCase private function trimWrittenContent($writtenContent) { // remove line feeds and UTF-8 BOM - return trim($writtenContent, PHP_EOL . CSV::UTF8_BOM); + return trim($writtenContent, PHP_EOL . Writer::UTF8_BOM); } } diff --git a/tests/Spout/Writer/Helper/XLSX/CellHelperTest.php b/tests/Spout/Writer/XLSX/Helper/CellHelperTest.php similarity index 97% rename from tests/Spout/Writer/Helper/XLSX/CellHelperTest.php rename to tests/Spout/Writer/XLSX/Helper/CellHelperTest.php index a5045aa..f46b1c6 100644 --- a/tests/Spout/Writer/Helper/XLSX/CellHelperTest.php +++ b/tests/Spout/Writer/XLSX/Helper/CellHelperTest.php @@ -1,11 +1,11 @@ createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->openToFile($resourcePath); diff --git a/tests/Spout/Writer/XLSXTest.php b/tests/Spout/Writer/XLSX/WriterTest.php similarity index 96% rename from tests/Spout/Writer/XLSXTest.php rename to tests/Spout/Writer/XLSX/WriterTest.php index 0abc252..7c6f5ea 100644 --- a/tests/Spout/Writer/XLSXTest.php +++ b/tests/Spout/Writer/XLSX/WriterTest.php @@ -1,16 +1,17 @@ createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->setShouldUseInlineStrings(true); @@ -278,7 +279,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase ]; // set the maxRowsPerSheet limit to 2 - \ReflectionHelper::setStaticValue('\Box\Spout\Writer\Internal\XLSX\Workbook', 'maxRowsPerWorksheet', 2); + \ReflectionHelper::setStaticValue('\Box\Spout\Writer\XLSX\Internal\Workbook', 'maxRowsPerWorksheet', 2); $writer = $this->writeToXLSXFile($dataRows, $fileName, true, $shouldCreateSheetsAutomatically = true); $this->assertEquals(2, count($writer->getSheets()), '2 sheets should have been created.'); @@ -302,7 +303,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase ]; // set the maxRowsPerSheet limit to 2 - \ReflectionHelper::setStaticValue('\Box\Spout\Writer\Internal\XLSX\Workbook', 'maxRowsPerWorksheet', 2); + \ReflectionHelper::setStaticValue('\Box\Spout\Writer\XLSX\Internal\Workbook', 'maxRowsPerWorksheet', 2); $writer = $this->writeToXLSXFile($dataRows, $fileName, true, $shouldCreateSheetsAutomatically = false); $this->assertEquals(1, count($writer->getSheets()), 'Only 1 sheet should have been created.'); @@ -348,14 +349,14 @@ class XLSXTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param bool $shouldUseInlineStrings * @param bool $shouldCreateSheetsAutomatically - * @return XLSX + * @return Writer */ private function writeToXLSXFile($allRows, $fileName, $shouldUseInlineStrings = true, $shouldCreateSheetsAutomatically = true) { $this->createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->setShouldUseInlineStrings($shouldUseInlineStrings); $writer->setShouldCreateNewSheetsAutomatically($shouldCreateSheetsAutomatically); @@ -373,14 +374,14 @@ class XLSXTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param bool $shouldUseInlineStrings * @param bool $shouldCreateSheetsAutomatically - * @return XLSX + * @return Writer */ private function writeToMultipleSheetsInXLSXFile($allRows, $numSheets, $fileName, $shouldUseInlineStrings = true, $shouldCreateSheetsAutomatically = true) { $this->createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->setShouldUseInlineStrings($shouldUseInlineStrings); $writer->setShouldCreateNewSheetsAutomatically($shouldCreateSheetsAutomatically); From 15aab7902a755dbceb0e017839d1fb0d35b12f2c Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 20 Jul 2015 22:47:55 -0700 Subject: [PATCH 4/7] Factory should return Interface --- src/Spout/Reader/ReaderFactory.php | 2 +- src/Spout/Writer/WriterFactory.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Spout/Reader/ReaderFactory.php b/src/Spout/Reader/ReaderFactory.php index 800ac89..0e39f59 100644 --- a/src/Spout/Reader/ReaderFactory.php +++ b/src/Spout/Reader/ReaderFactory.php @@ -19,7 +19,7 @@ class ReaderFactory * This creates an instance of the appropriate reader, given the type of the file to be read * * @param string $readerType Type of the reader to instantiate - * @return \Box\Spout\Reader\CSV\Reader|\Box\Spout\Reader\XLSX\Reader + * @return ReaderInterface * @throws \Box\Spout\Common\Exception\UnsupportedTypeException */ public static function create($readerType) diff --git a/src/Spout/Writer/WriterFactory.php b/src/Spout/Writer/WriterFactory.php index 5fd4f28..ee93cd7 100644 --- a/src/Spout/Writer/WriterFactory.php +++ b/src/Spout/Writer/WriterFactory.php @@ -19,7 +19,7 @@ class WriterFactory * This creates an instance of the appropriate writer, given the type of the file to be read * * @param string $writerType Type of the writer to instantiate - * @return \Box\Spout\Writer\CSV\Writer|\Box\Spout\Writer\XLSX\Writer + * @return WriterInterface * @throws \Box\Spout\Common\Exception\UnsupportedTypeException */ public static function create($writerType) From 2345a80784a5275676d1ed31530275cc65cc5980 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 20 Jul 2015 23:24:43 -0700 Subject: [PATCH 5/7] Update README for iterators --- README.md | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6686a2b..5bf9ed1 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,10 @@ use Box\Spout\Common\Type; $reader = ReaderFactory::create(Type::CSV); $reader->open($filePath); -while ($reader->hasNextRow()) { - $row = $reader->nextRow(); - // do stuff +foreach ($reader->getSheetIterator() as $sheet) { + foreach ($reader->getRowIterator() as $row) { + // do stuff + } } $reader->close(); @@ -81,11 +82,8 @@ use Box\Spout\Common\Type; $reader = ReaderFactory::create(Type::XLSX); $reader->open($filePath); -while ($reader->hasNextSheet()) { - $reader->nextSheet(); - - while ($reader->hasNextRow()) { - $row = $reader->nextRow(); +foreach ($reader->getSheetIterator() as $sheet) { + foreach ($reader->getRowIterator() as $row) { // do stuff } } @@ -202,8 +200,7 @@ $sheets = $writer->getSheets(); If you rely on the sheet's name in your application, you can access it and customize it this way: ```php // Accessing the sheet name when reading -while ($reader->hasNextSheet()) { - $sheet = $reader->nextSheet(); +foreach ($reader->getSheetIterator() as $sheet) { $sheetName = $sheet->getName(); } @@ -253,7 +250,7 @@ For information, the performance tests take about one hour to run (processing 2 When writing data, Spout is streaming the data to files, one or few lines at a time. That means that it only keeps in memory the few rows that it needs to write. Once written, the memory is freed. -Same goes with reading. Only one row at a time is stored in memory. A special technique is used to handle shared strings in XLSX, storing them into several small temporary files that allows fast access. +Same goes with reading. Only one row at a time is stored in memory. A special technique is used to handle shared strings in XLSX, storing them - if needed - into several small temporary files that allows fast access. #### How long does it take to generate a file with X rows? From 86a4c3790a9837b9b15d17d8c0c48075800e6107 Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Tue, 21 Jul 2015 23:00:40 -0700 Subject: [PATCH 6/7] Adding more tests --- .travis.yml | 2 +- phpunit.xml | 1 - src/Spout/Common/Helper/FileSystemHelper.php | 2 +- .../XLSX/Helper/SharedStringsHelper.php | 3 +- src/Spout/Reader/XLSX/RowIterator.php | 13 ++++ tests/Spout/Reader/CSV/ReaderTest.php | 63 ++++++++++++++++++ tests/Spout/Reader/XLSX/ReaderTest.php | 62 ++++++++++++++++- ..._sheet_xml_not_matching_content_types.xlsx | Bin 0 -> 3757 bytes 8 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 tests/resources/xlsx/file_with_sheet_xml_not_matching_content_types.xlsx diff --git a/.travis.yml b/.travis.yml index d4306c9..9e90442 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ install: script: - mkdir -p build/logs - - php vendor/bin/phpunit --coverage-clover build/logs/clover.xml + - php vendor/bin/phpunit --coverage-clover build/logs/clover.xml --coverage-text after_script: - if [[ $TRAVIS_PHP_VERSION != 'hhvm' && $TRAVIS_PHP_VERSION != '7.0' ]]; then php vendor/bin/ocular code-coverage:upload --format=php-clover build/logs/clover.xml; fi diff --git a/phpunit.xml b/phpunit.xml index fc6d657..06ddf63 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -5,7 +5,6 @@ colors="true" convertErrorsToExceptions="false" convertWarningsToExceptions="false" - strict="false" verbose="false"> diff --git a/src/Spout/Common/Helper/FileSystemHelper.php b/src/Spout/Common/Helper/FileSystemHelper.php index d7ca64f..6186822 100644 --- a/src/Spout/Common/Helper/FileSystemHelper.php +++ b/src/Spout/Common/Helper/FileSystemHelper.php @@ -63,7 +63,7 @@ class FileSystemHelper $filePath = $parentFolderPath . '/' . $fileName; $wasCreationSuccessful = file_put_contents($filePath, $fileContents); - if (!$wasCreationSuccessful) { + if ($wasCreationSuccessful === false) { throw new IOException('Unable to create file: ' . $filePath); } diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php index 5c8fb46..75f8989 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -154,7 +154,8 @@ class SharedStringsHelper $readError = libxml_get_last_error(); if ($readError !== false) { - throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readError->message}]"); + $readErrorMessage = trim($readError->message); + throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readErrorMessage}]"); } // reset the setting to display XML warnings/errors diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index e96898f..6fc1dde 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -131,12 +131,19 @@ class RowIterator implements IteratorInterface * * @return void * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML */ public function next() { $isInsideRowTag = false; $rowData = []; + // Use internal errors to avoid displaying lots of warning messages in case of invalid file + // For instance on HHVM, XMLReader->open() won't fail when trying to read a unexisting file within a zip... + // But the XMLReader->read() will fail! + libxml_clear_errors(); + libxml_use_internal_errors(true); + while ($this->xmlReader->read()) { if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) { // Read dimensions of the sheet @@ -180,6 +187,12 @@ class RowIterator implements IteratorInterface } } + $readError = libxml_get_last_error(); + if ($readError !== false) { + $readErrorMessage = trim($readError->message); + throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$readErrorMessage}]"); + } + $this->rowDataBuffer = $rowData; } diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index 922c61b..932633f 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -54,6 +54,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $reader->open($resourcePath); } + /** + * @expectedException \Box\Spout\Common\Exception\IOException + * + * @return void + */ + public function testOpenShouldThrowExceptionIfCannotOpenFile() + { + $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') + ->setMethods(['fopen']) + ->getMock(); + $helperStub->method('fopen')->willReturn(false); + + $resourcePath = $this->getResourcePath('csv_standard.csv'); + + $reader = ReaderFactory::create(Type::CSV); + $reader->setGlobalFunctionsHelper($helperStub); + $reader->open($resourcePath); + } + /** * @return void @@ -161,6 +180,50 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadMultipleTimesShouldRewindReader() + { + $allRows = []; + $resourcePath = $this->getResourcePath('csv_standard.csv'); + + $reader = ReaderFactory::create(Type::CSV); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + // do nothing + } + + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + } + + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + } + + $reader->close(); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--11', 'csv--12', 'csv--13'], + ['csv--11', 'csv--12', 'csv--13'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @param string $fileName * @param string|void $fieldDelimiter diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index 9643d54..8c8376b 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -6,6 +6,7 @@ use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Type; use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; +use Symfony\Component\Config\Definition\Exception\Exception; /** * Class ReaderTest @@ -24,6 +25,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase return [ ['/path/to/fake/file.xlsx'], ['file_with_no_sheets_in_content_types.xlsx'], + ['file_with_sheet_xml_not_matching_content_types.xlsx'], ['file_corrupted.xlsx'], ]; } @@ -37,7 +39,8 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldThrowException($filePath) { - $this->getAllRowsForFile($filePath); + // using @ to prevent warnings/errors from being displayed + @$this->getAllRowsForFile($filePath); } /** @@ -240,7 +243,8 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $startTime = microtime(true); try { - $this->getAllRowsForFile($fileName); + // using @ to prevent warnings/errors from being displayed + @$this->getAllRowsForFile($fileName); $this->fail('An exception should have been thrown'); } catch (IOException $exception) { $duration = microtime(true) - $startTime; @@ -275,6 +279,60 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadMultipleTimesShouldRewindReader() + { + $allRows = []; + $resourcePath = $this->getResourcePath('two_sheets_with_inline_strings.xlsx'); + + $reader = ReaderFactory::create(Type::XLSX); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + // do nothing + } + + foreach ($reader->getSheetIterator() as $sheet) { + // this loop should only add the first row of the first sheet + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // this loop should rewind the iterator and restart reading from the 1st row again + // therefore, it should only add the first row of the first sheet + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // not reading any more sheets + break; + } + + foreach ($reader->getSheetIterator() as $sheet) { + // this loop should only add the first row of the current sheet + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // not breaking, so we keep reading the next sheets + } + + $reader->close(); + + $expectedRows = [ + ['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'], + ['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'], + ['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'], + ['s2 - A1', 's2 - B1', 's2 - C1', 's2 - D1', 's2 - E1'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @param string $fileName * @return array All the read rows the given file diff --git a/tests/resources/xlsx/file_with_sheet_xml_not_matching_content_types.xlsx b/tests/resources/xlsx/file_with_sheet_xml_not_matching_content_types.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f33c9d7817f057af2fa1d0efe2b717be4c7e2953 GIT binary patch literal 3757 zcmai12{@Gf79ZKBFxDb5woul=$X3>|FXKarB7=#s%*ei%iSfxUMOpfk)Fs={ib~^C z3SYL7?Af#jQK%Bt8=X7vbUo_VDf>n?g+Q&{N(9@$TRm+$J<9a)d@68#M@I1jP+>B+cz6Z&4 ziK(05C5!b}DL=(c8+RiRR6Ln4ywcF@KWo4H(DzgU?%>GH5N zLMBl2U2kBge`4s|v+$X4JFmeDGy6?gG>>-3wm8EeUnf4h!R%l8^Bq4Xc&sIDrXutk z>l2kw0;kcud(-g=1{?FP$C?lPM~$Orjzl>?MP`MRpVzj3edYNK9ig7&JlrH!GL%bm zow0~6?ocg{Ya-0P-%H+tB91-2qVZvp-~1=ZAW&-Af*!!@)?omCUGPSG_{jc}*-S)$ zjzH-M@Xm1`2*k7{BeNk{m||*N17jmh2HvX0)8s8a_Hwo#;KLc`WbTdg z+D#kRALQOD;I%4H!))8dP+nde#haB>eQlo{8)BO2yUsg1g|Au&c@cb|y%#yeAQ%oC zgBVIu7N0zOI5Kt4ulip8B;~fAUxbASs$ir{Pf`;p^rf`z)f)CiYd(?Gr>E_@z>;qt za9wuA@|aFP`1JmL?ghyDAhgMXxIs~4jou{&iMrBv-f%c`6Y1uv3Jq}?dpjyZl9mD7 zvbt|v{E}b)x;E{y{>$J|oK0YgbzMjHr}!|FkBa2VzwcDk1;zE0N;ORh37?%?PH7yr zdr?8VvqI#gz%txGm{Y*RzCBbYoHu$SR?lQRBuS8qFm1ZZ%Cac^ky4ucq~hbG5hthn zuyllzzEmoxsPk1Cxll&+t63w7)L%K|R$SrC*jUhG_2)%8Q3r(V4UFVG`B4kKQD~pu zY>|?A4sBq0esQr8>eeWK<+C0<(}7S9(^>_TWI0QA8JQ5pkTsu^GU zOKt9{iqDJPKiUvWkg<8`@bp7!mx8v@s=l}PTROhU*7WoI|C*B@zPe6tb$N@$y>&$k zuo&p^e+mx}_}zV#VDq;p19&6gzVdGB*%S-FdAmE}aPAu!`FVXC+(S7YkC{SYM-ZPhw;U?T>qH zMWR*OeBXm|Bw`eZwu%g{ZIwe)SD4+ZOanipe|l0yLi-Ac5LW+zH-y~14=Ic2tz?6Y z#)|cDTw|?^?y??b4V?C~eOYAPZ8;{{OgQDJKF(*P@y6SV@xJYy!kSRVowfO|g*UbF z01GdGXJfbkr1`j_ywT23%ZIkVZ@G)zm_w1ZPIO{-YSUG`T5HVHKR~#Fs@cMllZwCG{?n7EY zqp;zl`xgOZl9f~q9sd56+~3_uY(fbgG;R!YdKKPhu%U5)H2YR2uhCm{EqKMfn{W7eNI$TN2jdR9Y%5OU6z@`x#$ zTy$qn#iUq(l+z1NaAVG0%#*b1ou@25=UJdEaJ8?UU!&Q{G3&)t(p!LAMSy44O^&x; zmH0$0Gm<8=?xH~#YVcH6E>q;)+-@rayQh2(@01%J9#$hpKk0&|S$z5uzvel%yzU=X zcU>*V)p7hUd#k>r;Di_mf92GLJNB!1cRl+ZdrSV@et!m?E#=PE09 zx_z)dqs;Bq`e-7hQ~5xL<7APKXH!ww5or}=ObQDzUU5n7?{!Dp%g9Q~zF#r#Z8i{U z%s#GYwD0aoX0UQ1zz^W25DWqd?~(lhF~s_#JTT5?D9<1L{sy_*>?(}w^-Kt7Z^V4~ z#8SGPG3u~q7|j9=FPTNSJh#xSw!L)#JD9yiB z%wFZJ(nB_6cQU03^Y{*yH~4np8KkdymJl~;07f{)_d<`UGZgYUlIe?Yv36*9um3&q znf{c(5)6M1o^r%Q#|3G4UR>0gG^XtXoo-$tipGSn88zQ}sX$RwwkU11maz6;TYX#>K4ktJ$*MsRq9Q1^FRNlgPmA>mmd*ta5Mw6=f*-Tgz_^Q3q*};0R z;}9_UiZUJb79oA#cm0rE$XmS~Iq75Of~s6zjC7VHW_=jnMLIk?OoTm7>g!}l_E?FY z?^)7rh2HU_uSXZsGMd%};>WqojcI@;@!uC{AamQ!Z`Nt5g1t4eu^dtl8mNX}qi3hl z-OC~s6SIZ+F?;@(v5l>YdTs$xZQHSS+S|QlQDLvQ!+!2uRHK`#?e4{eiY@>q@RreE zYYY{>m!OS_O+7QfN6U}-{ma|EfSo~d(EkAZp48nFooYvAJ8XN?+Qso^xVvW|RgXT9 z-``C}YQ&owc8?>f1_s7W4O_!$H*|NGr9$0+z5*EE?miW~m$;2~MLqTak3Yd%&FdHG jn?CPuUsUY1J=othGjn75AJq>5K|p7LG;3^p1cCkoa=d>q literal 0 HcmV?d00001 From 37d87a8a27a45c604bc2225b4c1f019ddcd99d4c Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Mon, 27 Jul 2015 00:16:40 -0700 Subject: [PATCH 7/7] Fix various problems --- .travis.yml | 2 +- src/Spout/Common/Helper/GlobalFunctionsHelper.php | 2 +- src/Spout/Reader/CSV/RowIterator.php | 6 +++--- .../XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php | 5 ++++- src/Spout/Reader/XLSX/Helper/SheetHelper.php | 7 ++++--- src/Spout/Reader/XLSX/Sheet.php | 2 +- src/Spout/Writer/XLSX/Helper/SharedStringsHelper.php | 3 +++ src/Spout/Writer/XLSX/Sheet.php | 2 +- tests/Spout/Reader/XLSX/ReaderTest.php | 3 +++ 9 files changed, 21 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9e90442..d4306c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ install: script: - mkdir -p build/logs - - php vendor/bin/phpunit --coverage-clover build/logs/clover.xml --coverage-text + - php vendor/bin/phpunit --coverage-clover build/logs/clover.xml after_script: - if [[ $TRAVIS_PHP_VERSION != 'hhvm' && $TRAVIS_PHP_VERSION != '7.0' ]]; then php vendor/bin/ocular code-coverage:upload --format=php-clover build/logs/clover.xml; fi diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index feeb782..47ed052 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -160,7 +160,7 @@ class GlobalFunctionsHelper * @see file_get_contents() * * @param string $filePath - * @return bool + * @return string */ public function file_get_contents($filePath) { diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 2316fa7..ffb533f 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -111,9 +111,9 @@ class RowIterator implements IteratorInterface if (!$this->hasReachedEndOfFile) { do { $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); - } while ($lineData && $this->isEmptyLine($lineData)); + } while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData))); - if ($lineData !== null) { + if ($lineData !== false && $lineData !== null) { $this->rowDataBuffer = $lineData; $this->numReadRows++; } @@ -133,7 +133,7 @@ class RowIterator implements IteratorInterface * Return the current element from the buffer * @link http://php.net/manual/en/iterator.current.php * - * @return array + * @return array|null */ public function current() { diff --git a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php index 6670bfe..9f1f19f 100644 --- a/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php +++ b/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/FileBasedStrategy.php @@ -26,6 +26,9 @@ class FileBasedStrategy implements CachingStrategyInterface /** @var \Box\Spout\Common\Helper\FileSystemHelper Helper to perform file system operations */ protected $fileSystemHelper; + /** @var string Temporary folder where the temporary files will be created */ + protected $tempFolder; + /** * @var int Maximum number of strings that can be stored in one temp file * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE @@ -42,7 +45,7 @@ class FileBasedStrategy implements CachingStrategyInterface protected $inMemoryTempFilePath; /** - * @var string Contents of the temporary file that was last read + * @var array Contents of the temporary file that was last read * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE */ protected $inMemoryTempFileContents; diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index 3cbe9cb..a3431ae 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -72,8 +72,9 @@ class SheetHelper // find all nodes defining a sheet $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); + $numSheetNodes = count($sheetNodes); - for ($i = 0; $i < count($sheetNodes); $i++) { + for ($i = 0; $i < $numSheetNodes; $i++) { $sheetNode = $sheetNodes[$i]; $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; @@ -95,7 +96,7 @@ class SheetHelper * * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) - * @return \Box\Spout\Reader\Sheet Sheet instance + * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance */ protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased) { @@ -136,7 +137,7 @@ class SheetHelper * Returns the default name of the sheet whose data is located * at the given path. * - * @param $sheetDataXMLFilePath + * @param string $sheetDataXMLFilePath Path of the sheet data XML file * @return string The default sheet name */ protected function getDefaultSheetName($sheetDataXMLFilePath) diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php index e2eebec..9510ecd 100644 --- a/src/Spout/Reader/XLSX/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -32,7 +32,7 @@ class Sheet implements SheetInterface * @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) * @param string $sheetName Name of the sheet */ - function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetId, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetId, $sheetIndex, $sheetName) { $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper); $this->id = $sheetId; diff --git a/src/Spout/Writer/XLSX/Helper/SharedStringsHelper.php b/src/Spout/Writer/XLSX/Helper/SharedStringsHelper.php index c78364c..8a544f9 100644 --- a/src/Spout/Writer/XLSX/Helper/SharedStringsHelper.php +++ b/src/Spout/Writer/XLSX/Helper/SharedStringsHelper.php @@ -25,6 +25,9 @@ EOD; */ const DEFAULT_STRINGS_COUNT_PART = 'count="9999999999999" uniqueCount="9999999999999"'; + /** @var resource Pointer to the sharedStrings.xml file */ + protected $sharedStringsFilePointer; + /** @var int Number of shared strings already written */ protected $numSharedStrings = 0; diff --git a/src/Spout/Writer/XLSX/Sheet.php b/src/Spout/Writer/XLSX/Sheet.php index 46380a2..858adcd 100644 --- a/src/Spout/Writer/XLSX/Sheet.php +++ b/src/Spout/Writer/XLSX/Sheet.php @@ -21,7 +21,7 @@ class Sheet /** * @param int $sheetIndex Index of the sheet, based on order of creation (zero-based) */ - function __construct($sheetIndex) + public function __construct($sheetIndex) { $this->index = $sheetIndex; $this->name = self::DEFAULT_SHEET_NAME_PREFIX . ($sheetIndex + 1); diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index 8c8376b..0037ca9 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -93,6 +93,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSupportAllCellTypes() { + // make sure dates are always created with the same timezone + date_default_timezone_set('UTC'); + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); $expectedRows = [