diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index 2e2e2de..bfbedf8 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -4,7 +4,6 @@ namespace Box\Spout\Reader; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\ReaderNotOpenedException; -use Box\Spout\Reader\Exception\EndOfFileReachedException; /** * Class AbstractReader @@ -14,18 +13,9 @@ use Box\Spout\Reader\Exception\EndOfFileReachedException; */ abstract class AbstractReader implements ReaderInterface { - /** @var int Used to keep track of the row index */ - protected $currentRowIndex = 0; - /** @var bool Indicates whether the stream is currently open */ protected $isStreamOpened = false; - /** @var bool Indicates whether all rows have been read */ - protected $hasReachedEndOfFile = false; - - /** @var array Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer = null; - /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -38,11 +28,11 @@ abstract class AbstractReader implements ReaderInterface abstract protected function openReader($filePath); /** - * Reads and returns next row if available. + * Returns an iterator to iterate over sheets. * - * @return array|null Array that contains the data for the read row or null at the end of the file + * @return \Iterator To iterate over sheets */ - abstract protected function read(); + abstract public function getConcreteSheetIterator(); /** * Closes the reader. To be used after reading the file. @@ -80,9 +70,6 @@ abstract class AbstractReader implements ReaderInterface } } - $this->currentRowIndex = 0; - $this->hasReachedEndOfFile = false; - try { $this->openReader($filePath); $this->isStreamOpened = true; @@ -103,82 +90,18 @@ abstract class AbstractReader implements ReaderInterface } /** - * Returns whether all rows have been read (i.e. if we are at the end of the file). - * To know if the end of file has been reached, it uses a buffer. If the buffer is - * empty (meaning, nothing has been read or previous read line has been consumed), then - * it reads the next line, store it in the buffer for the next time or flip a variable if - * the end of file has been reached. + * Returns an iterator to iterate over sheets. * - * @return bool Whether all rows have been read (i.e. if we are at the end of the file) - * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If the stream was not opened first + * @return \Iterator To iterate over sheets + * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader */ - public function hasNextRow() + public function getSheetIterator() { if (!$this->isStreamOpened) { - throw new ReaderNotOpenedException('Stream should be opened first.'); + throw new ReaderNotOpenedException('Reader should be opened first.'); } - if ($this->hasReachedEndOfFile) { - return false; - } - - // if the buffer contains unprocessed row - if (!$this->isRowDataBufferEmpty()) { - return true; - } - - // otherwise, try to read the next line line, and store it in the buffer - $this->rowDataBuffer = $this->read(); - - // if the buffer is still empty after reading a row, it means end of file was reached - $this->hasReachedEndOfFile = $this->isRowDataBufferEmpty(); - - return (!$this->hasReachedEndOfFile); - } - - /** - * Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by - * actually reading the file. - * - * @return array Array that contains the data for the read row - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first - * @throws \Box\Spout\Reader\Exception\EndOfFileReachedException - */ - public function nextRow() - { - if (!$this->hasNextRow()) { - throw new EndOfFileReachedException('End of file was reached. Cannot read more rows.'); - } - - // Get data from buffer (if the buffer was empty, it was filled by the call to hasNextRow()) - $rowData = $this->rowDataBuffer; - - // empty buffer to mark the row as consumed - $this->emptyRowDataBuffer(); - - $this->currentRowIndex++; - - return $rowData; - } - - /** - * Returns whether the buffer where the row data is stored is empty - * - * @return bool - */ - protected function isRowDataBufferEmpty() - { - return ($this->rowDataBuffer === null); - } - - /** - * Empty the buffer that stores row data - * - * @return void - */ - protected function emptyRowDataBuffer() - { - $this->rowDataBuffer = null; + return $this->getConcreteSheetIterator(); } /** @@ -190,6 +113,12 @@ abstract class AbstractReader implements ReaderInterface { if ($this->isStreamOpened) { $this->closeReader(); + + $sheetIterator = $this->getConcreteSheetIterator(); + if ($sheetIterator) { + $sheetIterator->end(); + } + $this->isStreamOpened = false; } } diff --git a/src/Spout/Reader/AbstractReader2.php b/src/Spout/Reader/AbstractReader2.php deleted file mode 100644 index ef24412..0000000 --- a/src/Spout/Reader/AbstractReader2.php +++ /dev/null @@ -1,111 +0,0 @@ -globalFunctionsHelper = $globalFunctionsHelper; - return $this; - } - - /** - * Prepares the reader to read the given file. It also makes sure - * that the file exists and is readable. - * - * @param string $filePath Path of the file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted - */ - public function open($filePath) - { - if (!$this->isPhpStream($filePath)) { - // we skip the checks if the provided file path points to a PHP stream - if (!$this->globalFunctionsHelper->file_exists($filePath)) { - throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.'); - } else if (!$this->globalFunctionsHelper->is_readable($filePath)) { - throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.'); - } - } - - try { - $this->openReader($filePath); - $this->isStreamOpened = true; - } catch (\Exception $exception) { - throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')'); - } - } - - /** - * Checks if a path is a PHP stream (like php://output, php://memory, ...) - * - * @param string $filePath Path of the file to be read - * @return bool Whether the given path maps to a PHP stream - */ - protected function isPhpStream($filePath) - { - return (strpos($filePath, 'php://') === 0); - } - - /** - * Closes the reader, preventing any additional reading - * - * @return void - */ - public function close() - { - if ($this->isStreamOpened) { - $this->closeReader(); - - $sheetIterator = $this->getSheetIterator(); - if ($sheetIterator) { - $sheetIterator->end(); - } - - $this->isStreamOpened = false; - } - } -} diff --git a/src/Spout/Reader/CSV.php b/src/Spout/Reader/CSV.php deleted file mode 100644 index 2da160f..0000000 --- a/src/Spout/Reader/CSV.php +++ /dev/null @@ -1,130 +0,0 @@ -fieldDelimiter = $fieldDelimiter; - return $this; - } - - /** - * Sets the field enclosure for the CSV - * - * @param string $fieldEnclosure Character that enclose fields - * @return CSV - */ - public function setFieldEnclosure($fieldEnclosure) - { - $this->fieldEnclosure = $fieldEnclosure; - return $this; - } - - /** - * Opens the file at the given path to make it ready to be read. - * The file must be UTF-8 encoded. - * @TODO add encoding detection/conversion - * - * @param string $filePath Path of the CSV file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException - */ - protected function openReader($filePath) - { - $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); - if (!$this->filePointer) { - throw new IOException('Could not open file ' . $filePath . ' for reading.'); - } - - $this->skipUtf8Bom(); - } - - /** - * This skips the UTF-8 BOM if inserted at the beginning of the file - * by moving the file pointer after it, so that it is not read. - * - * @return void - */ - protected function skipUtf8Bom() - { - $this->globalFunctionsHelper->rewind($this->filePointer); - - $hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM); - - if ($hasUtf8Bom) { - // we skip the 2 first bytes (so start from the 3rd byte) - $this->globalFunctionsHelper->fseek($this->filePointer, 3); - } else { - // if no BOM, reset the pointer to read from the beginning - $this->globalFunctionsHelper->fseek($this->filePointer, 0); - } - } - - /** - * Reads and returns next row if available. - * Empty rows are skipped. - * - * @return array|null Array that contains the data for the read row or null at the end of the file - */ - protected function read() - { - $lineData = null; - - if ($this->filePointer) { - do { - $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); - } while ($lineData && $this->isEmptyLine($lineData)); - } - - // When reaching the end of the file, return null instead of false - return ($lineData !== false) ? $lineData : null; - } - - /** - * @param array $lineData Array containing the cells value for the line - * @return bool Whether the given line is empty - */ - protected function isEmptyLine($lineData) - { - return (count($lineData) === 1 && $lineData[0] === null); - } - - /** - * Closes the reader. To be used after reading the file. - * - * @return void - */ - protected function closeReader() - { - if ($this->filePointer) { - $this->globalFunctionsHelper->fclose($this->filePointer); - } - } -} diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index 3b164d5..9f9e56f 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -2,7 +2,7 @@ namespace Box\Spout\Reader\CSV; -use Box\Spout\Reader\AbstractReader2; +use Box\Spout\Reader\AbstractReader; use Box\Spout\Common\Exception\IOException; /** @@ -11,7 +11,7 @@ use Box\Spout\Common\Exception\IOException; * * @package Box\Spout\Reader\CSV */ -class Reader extends AbstractReader2 +class Reader extends AbstractReader { /** @var resource Pointer to the file to be written */ protected $filePointer; @@ -75,7 +75,7 @@ class Reader extends AbstractReader2 * * @return SheetIterator To iterate over sheets */ - public function getSheetIterator() + public function getConcreteSheetIterator() { return $this->sheetIterator; } diff --git a/src/Spout/Reader/Exception/EndOfFileReachedException.php b/src/Spout/Reader/Exception/EndOfFileReachedException.php deleted file mode 100644 index 6194d49..0000000 --- a/src/Spout/Reader/Exception/EndOfFileReachedException.php +++ /dev/null @@ -1,12 +0,0 @@ - 0 - * Z => 25 - * AA => 26 : (26^(2-1) * (0+1)) + 0 - * AB => 27 : (26^(2-1) * (0+1)) + 1 - * BC => 54 : (26^(2-1) * (1+1)) + 2 - * BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25 - */ - foreach (str_split($column) as $single_cell_index) - { - $currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue; - - if ($columnLength == 1) { - $columnIndex += $currentColumnIndex; - } else { - $columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1); - } - - $columnLength--; - } - - return $columnIndex; - } - - /** - * Returns whether a cell index is valid, in an Excel world. - * To be valid, the cell index should start with capital letters and be followed by numbers. - * - * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) - * @return bool - */ - protected static function isValidCellIndex($cellIndex) - { - return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1); - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php b/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php deleted file mode 100644 index 642647a..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactory.php +++ /dev/null @@ -1,154 +0,0 @@ - 20 * 600 ≈ 12KB - */ - const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; - - /** - * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files - * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory - * and the string will be quickly retrieved. - * The performance bottleneck is not when creating these temporary files, but rather when loading their content. - * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works - * best when the indexes of the shared strings are sorted in the sheet data. - * 10,000 was chosen because it creates small files that are fast to be loaded in memory. - */ - const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; - - /** @var CachingStrategyFactory|null Singleton instance */ - protected static $instance = null; - - /** - * Private constructor for singleton - */ - private function __construct() - { - } - - /** - * Returns the singleton instance of the factory - * - * @return CachingStrategyFactory - */ - public static function getInstance() - { - if (self::$instance === null) { - self::$instance = new CachingStrategyFactory(); - } - - return self::$instance; - } - - /** - * Returns the best caching strategy, given the number of unique shared strings - * and the amount of memory available. - * - * @param int $sharedStringsUniqueCount Number of unique shared strings - * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored - * @return CachingStrategyInterface The best caching strategy - */ - public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null) - { - if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { - return new InMemoryStrategy($sharedStringsUniqueCount); - } else { - return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE); - } - } - - /** - * Returns whether it is safe to use in-memory caching, given the number of unique shared strings - * and the amount of memory available. - * - * @param int $sharedStringsUniqueCount Number of unique shared strings - * @return bool - */ - protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) - { - $memoryAvailable = $this->getMemoryLimitInKB(); - - if ($memoryAvailable === -1) { - // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe - return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE); - } else { - $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB; - return ($memoryAvailable > $memoryNeeded); - } - } - - /** - * Returns the PHP "memory_limit" in Kilobytes - * - * @return float - */ - protected function getMemoryLimitInKB() - { - $memoryLimitFormatted = $this->getMemoryLimitFromIni(); - $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted)); - - // No memory limit - if ($memoryLimitFormatted === '-1') { - return -1; - } - - if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { - $amount = intval($matches[1]); - $unit = $matches[2]; - - switch ($unit) { - case 'b': return ($amount / 1024); - case 'k': return $amount; - case 'm': return ($amount * 1024); - case 'g': return ($amount * 1024 * 1024); - case 't': return ($amount * 1024 * 1024 * 1024); - } - } - - return -1; - } - - /** - * Returns the formatted "memory_limit" value - * - * @return string - */ - protected function getMemoryLimitFromIni() - { - return ini_get('memory_limit'); - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php b/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php deleted file mode 100644 index 4334d86..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyInterface.php +++ /dev/null @@ -1,44 +0,0 @@ -fileSystemHelper = new FileSystemHelper($rootTempFolder); - $this->tempFolder = $this->fileSystemHelper->createFolder($rootTempFolder, uniqid('sharedstrings')); - - $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; - - $this->globalFunctionsHelper = new GlobalFunctionsHelper(); - $this->tempFilePointer = null; - } - - /** - * Adds the given string to the cache. - * - * @param string $sharedString The string to be added to the cache - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return void - */ - public function addStringForIndex($sharedString, $sharedStringIndex) - { - $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - - if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); - } - $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); - } - - // The shared string retrieval logic expects each cell data to be on one line only - // Encoding the line feed character allows to preserve this assumption - $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); - - $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); - } - - /** - * Returns the path for the temp file that should contain the string for the given index - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The temp file path for the given index - */ - protected function getSharedStringTempFilePath($sharedStringIndex) - { - $numTempFile = intval($sharedStringIndex / $this->maxNumStringsPerTempFile); - return $this->tempFolder . '/sharedstrings' . $numTempFile; - } - - /** - * Closes the cache after the last shared string was added. - * This prevents any additional string from being added to the cache. - * - * @return void - */ - public function closeCache() - { - // close pointer to the last temp file that was written - if ($this->tempFilePointer) { - $this->globalFunctionsHelper->fclose($this->tempFilePointer); - } - } - - - /** - * Returns the string located at the given index from the cache. - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The shared string at the given index - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index - */ - public function getStringAtIndex($sharedStringIndex) - { - $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); - $indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile; - - if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { - throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); - } - - if ($this->inMemoryTempFilePath !== $tempFilePath) { - // free memory - unset($this->inMemoryTempFileContents); - - $this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); - $this->inMemoryTempFilePath = $tempFilePath; - } - - $sharedString = null; - if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) { - $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; - $sharedString = $this->unescapeLineFeed($escapedSharedString); - } - - if ($sharedString === null) { - throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); - } - - return rtrim($sharedString, PHP_EOL); - } - - /** - * Escapes the line feed characters (\n) - * - * @param string $unescapedString - * @return string - */ - private function escapeLineFeed($unescapedString) - { - return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString); - } - - /** - * Unescapes the line feed characters (\n) - * - * @param string $escapedString - * @return string - */ - private function unescapeLineFeed($escapedString) - { - return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString); - } - - /** - * Destroys the cache, freeing memory and removing any created artifacts - * - * @return void - */ - public function clearCache() - { - if ($this->tempFolder) { - $this->fileSystemHelper->deleteFolderRecursively($this->tempFolder); - } - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php b/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php deleted file mode 100644 index 41b41be..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsCaching/InMemoryStrategy.php +++ /dev/null @@ -1,82 +0,0 @@ -inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount); - $this->isCacheClosed = false; - } - - /** - * Adds the given string to the cache. - * - * @param string $sharedString The string to be added to the cache - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return void - */ - public function addStringForIndex($sharedString, $sharedStringIndex) - { - if (!$this->isCacheClosed) { - $this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString); - } - } - - /** - * Closes the cache after the last shared string was added. - * This prevents any additional string from being added to the cache. - * - * @return void - */ - public function closeCache() - { - $this->isCacheClosed = true; - } - - /** - * Returns the string located at the given index from the cache. - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The shared string at the given index - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index - */ - public function getStringAtIndex($sharedStringIndex) - { - try { - return $this->inMemoryCache->offsetGet($sharedStringIndex); - } catch (\RuntimeException $e) { - throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); - } - } - - /** - * Destroys the cache, freeing memory and removing any created artifacts - * - * @return void - */ - public function clearCache() - { - unset($this->inMemoryCache); - $this->isCacheClosed = false; - } -} diff --git a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php b/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php deleted file mode 100644 index 0f6d21d..0000000 --- a/src/Spout/Reader/Helper/XLSX/SharedStringsHelper.php +++ /dev/null @@ -1,280 +0,0 @@ -filePath = $filePath; - $this->tempFolder = $tempFolder; - } - - /** - * Returns whether the XLSX file contains a shared strings XML file - * - * @return bool - */ - public function hasSharedStrings() - { - $hasSharedStrings = false; - $zip = new \ZipArchive(); - - if ($zip->open($this->filePath) === true) { - $hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false); - $zip->close(); - } - - return $hasSharedStrings; - } - - /** - * Builds an in-memory array containing all the shared strings of the worksheet. - * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. - * It is then accessed by the worksheet data, via the string index in the built table. - * - * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx - * - * The XML file can be really big with worksheets containing a lot of data. That is why - * we need to use a XML reader that provides streaming like the XMLReader library. - * Please note that SimpleXML does not provide such a functionality but since it is faster - * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. - * - * @return void - * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read - */ - public function extractSharedStrings() - { - $xmlReader = new \XMLReader(); - $sharedStringIndex = 0; - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - - $sharedStringsFilePath = $this->getSharedStringsFilePath(); - if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) { - throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); - } - - $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); - $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); - - while ($xmlReader->read() && $xmlReader->name !== 'si') { - // do nothing until a 'si' tag is reached - } - - while ($xmlReader->name === 'si') { - $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); - $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); - - // removes nodes that should not be read, like the pronunciation of the Kanji characters - $cleanNode = $this->removeSuperfluousTextNodes($node); - - // find all text nodes 't'; there can be multiple if the cell contains formatting - $textNodes = $cleanNode->xpath('//ns:t'); - - $textValue = ''; - foreach ($textNodes as $textNode) { - if ($this->shouldPreserveWhitespace($textNode)) { - $textValue .= $textNode->__toString(); - } else { - $textValue .= trim($textNode->__toString()); - } - } - - $unescapedTextValue = $escaper->unescape($textValue); - $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); - - $sharedStringIndex++; - - // jump to the next 'si' tag - $xmlReader->next('si'); - } - - $this->cachingStrategy->closeCache(); - - $xmlReader->close(); - } - - /** - * @return string The path to the shared strings XML file - */ - protected function getSharedStringsFilePath() - { - return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH; - } - - /** - * Returns the shared strings unique count, as specified in tag. - * - * @param \XMLReader $xmlReader XMLReader instance - * @return int Number of unique shared strings in the sharedStrings.xml file - * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read - */ - protected function getSharedStringsUniqueCount($xmlReader) - { - // Use internal errors to avoid displaying lots of warning messages in case of invalid file - // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks - libxml_clear_errors(); - libxml_use_internal_errors(true); - - $xmlReader->next('sst'); - - // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) - while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) { - $xmlReader->read(); - } - - $readError = libxml_get_last_error(); - if ($readError !== false) { - throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readError->message}]"); - } - - // reset the setting to display XML warnings/errors - libxml_use_internal_errors(false); - - return intval($xmlReader->getAttribute('uniqueCount')); - } - - /** - * Returns the best shared strings caching strategy. - * - * @param int $sharedStringsUniqueCount - * @return CachingStrategyInterface - */ - protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) - { - return CachingStrategyFactory::getInstance() - ->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder); - } - - /** - * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. - * This is to simplify the parsing of the subtree. - * - * @param \XMLReader $xmlReader - * @return \SimpleXMLElement - * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read - */ - protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) - { - // Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node. - // For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks - libxml_clear_errors(); - libxml_use_internal_errors(true); - - $node = null; - try { - $node = new \SimpleXMLElement($xmlReader->readOuterXml()); - } catch (\Exception $exception) { - $error = libxml_get_last_error(); - libxml_use_internal_errors(false); - - throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].'); - } - - libxml_use_internal_errors(false); - - return $node; - } - - /** - * Removes nodes that should not be read, like the pronunciation of the Kanji characters. - * By keeping them, their text content would be added to the read string. - * - * @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove - * @return \SimpleXMLElement Cleaned parent node - */ - protected function removeSuperfluousTextNodes($parentNode) - { - $tagsToRemove = [ - 'rPh', // Pronunciation of the text - ]; - - foreach ($tagsToRemove as $tagToRemove) { - $xpath = '//ns:' . $tagToRemove; - $nodesToRemove = $parentNode->xpath($xpath); - - foreach ($nodesToRemove as $nodeToRemove) { - // This is how to remove a node from the XML - unset($nodeToRemove[0]); - } - } - - return $parentNode; - } - - /** - * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. - * - * @param \SimpleXMLElement $textNode The text node element () whitespace may be preserved - * @return bool Whether whitespace should be preserved - */ - protected function shouldPreserveWhitespace($textNode) - { - $shouldPreserveWhitespace = false; - - $attributes = $textNode->attributes('xml', true); - if ($attributes) { - foreach ($attributes as $attributeName => $attributeValue) { - if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') { - $shouldPreserveWhitespace = true; - break; - } - } - } - - return $shouldPreserveWhitespace; - } - - /** - * Returns the shared string at the given index, using the previously chosen caching strategy. - * - * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file - * @return string The shared string at the given index - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index - */ - public function getStringAtIndex($sharedStringIndex) - { - return $this->cachingStrategy->getStringAtIndex($sharedStringIndex); - } - - /** - * Destroys the cache, freeing memory and removing any created artifacts - * - * @return void - */ - public function cleanup() - { - if ($this->cachingStrategy) { - $this->cachingStrategy->clearCache(); - } - } -} diff --git a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php b/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php deleted file mode 100644 index d869dd4..0000000 --- a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php +++ /dev/null @@ -1,209 +0,0 @@ -filePath = $filePath; - $this->globalFunctionsHelper = $globalFunctionsHelper; - } - - /** - * Returns the file paths of the worksheet data XML files within the XLSX file. - * The paths are read from the [Content_Types].xml file. - * - * @return Worksheet[] Worksheets within the XLSX file - */ - public function getWorksheets() - { - $worksheets = []; - - $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::CONTENT_TYPES_XML_FILE_PATH, - self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML - ); - - // find all nodes defining a worksheet - $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); - - for ($i = 0; $i < count($sheetNodes); $i++) { - $sheetNode = $sheetNodes[$i]; - $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; - - $sheet = $this->getSheet($sheetDataXMLFilePath, $i); - $worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath); - } - - return $worksheets; - } - - /** - * Returns an instance of a sheet, given the path of its data XML file. - * We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet. - * Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID. - * The entry contains the ID and name of the sheet. - * - * If this piece of data can't be found by parsing the different XML files, the ID will default - * to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will - * default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2"). - * - * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml - * @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) - * @return \Box\Spout\Reader\Sheet Sheet instance - */ - protected function getSheet($sheetDataXMLFilePath, $sheetIndexZeroBased) - { - $sheetId = $sheetIndexZeroBased + 1; - $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); - - /* - * In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" - * In workbook.xml.rels, it is only "worksheets/sheet1.xml" - */ - $sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/'); - - // find the node associated to the given file path - $workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement(); - $relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]'); - - if (count($relationshipNodes) === 1) { - $relationshipNode = $relationshipNodes[0]; - $sheetId = (string) $relationshipNode->attributes()->Id; - - $workbookXMLElement = $this->getWorkbookXMLAsXMLElement(); - $sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]'); - - if (count($sheetNodes) === 1) { - $sheetNode = $sheetNodes[0]; - $sheetId = (int) $sheetNode->attributes()->sheetId; - $escapedSheetName = (string) $sheetNode->attributes()->name; - - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - $sheetName = $escaper->unescape($escapedSheetName); - } - } - - return new Sheet($sheetId, $sheetIndexZeroBased, $sheetName); - } - - /** - * Returns the default name of the sheet whose data is located - * at the given path. - * - * @param $sheetDataXMLFilePath - * @return string The default sheet name - */ - protected function getDefaultSheetName($sheetDataXMLFilePath) - { - return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION); - } - - /** - * Returns a representation of the workbook.xml.rels file, ready to be parsed. - * The returned value is cached. - * - * @return \SimpleXMLElement XML element representating the workbook.xml.rels file - */ - protected function getWorkbookXMLRelsAsXMLElement() - { - if (!$this->workbookXMLRelsAsXMLElement) { - $this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::WORKBOOK_XML_RELS_FILE_PATH, - self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS - ); - } - - return $this->workbookXMLRelsAsXMLElement; - } - - /** - * Returns a representation of the workbook.xml file, ready to be parsed. - * The returned value is cached. - * - * @return \SimpleXMLElement XML element representating the workbook.xml.rels file - */ - protected function getWorkbookXMLAsXMLElement() - { - if (!$this->workbookXMLAsXMLElement) { - $this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace( - self::WORKBOOK_XML_FILE_PATH, - self::MAIN_NAMESPACE_FOR_WORKBOOK_XML - ); - } - - return $this->workbookXMLAsXMLElement; - } - - /** - * Loads the contents of the given file in an XML parser and register the given XPath namespace. - * - * @param string $xmlFilePath The path of the XML file inside the XLSX file - * @param string $mainNamespace The main XPath namespace to register - * @return \SimpleXMLElement The XML element representing the file - */ - protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace) - { - $xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath); - - $xmlElement = new \SimpleXMLElement($xmlContents); - $xmlElement->registerXPathNamespace('ns', $mainNamespace); - - return $xmlElement; - } - - /** - * Returns whether another worksheet exists after the current worksheet. - * The order is determined by the order of appearance in the [Content_Types].xml file. - * - * @param Worksheet|null $currentWorksheet The worksheet being currently read or null if reading has not started yet - * @param Worksheet[] $allWorksheets A list of all worksheets in the XLSX file. Must contain at least one worksheet - * @return bool Whether another worksheet exists after the current sheet - */ - public function hasNextWorksheet($currentWorksheet, $allWorksheets) - { - return ($currentWorksheet === null || ($currentWorksheet->getWorksheetIndex() + 1 < count($allWorksheets))); - } -} diff --git a/src/Spout/Reader/Internal/XLSX/Worksheet.php b/src/Spout/Reader/Internal/XLSX/Worksheet.php deleted file mode 100644 index 5145c28..0000000 --- a/src/Spout/Reader/Internal/XLSX/Worksheet.php +++ /dev/null @@ -1,58 +0,0 @@ -externalSheet = $externalSheet; - $this->worksheetIndex = $worksheetIndex; - $this->dataXmlFilePath = $dataXmlFilePath; - } - - /** - * @return string Path of the XML file containing the worksheet data, - * without the leading slash. - */ - public function getDataXmlFilePath() - { - return ltrim($this->dataXmlFilePath, '/'); - } - - /** - * @return \Box\Spout\Reader\Sheet The "external" sheet - */ - public function getExternalSheet() - { - return $this->externalSheet; - } - - /** - * @return int - */ - public function getWorksheetIndex() - { - return $this->worksheetIndex; - } -} diff --git a/src/Spout/Reader/ReaderFactory.php b/src/Spout/Reader/ReaderFactory.php index 9766978..800ac89 100644 --- a/src/Spout/Reader/ReaderFactory.php +++ b/src/Spout/Reader/ReaderFactory.php @@ -19,7 +19,7 @@ class ReaderFactory * This creates an instance of the appropriate reader, given the type of the file to be read * * @param string $readerType Type of the reader to instantiate - * @return \Box\Spout\Reader\CSV|\Box\Spout\Reader\XLSX + * @return \Box\Spout\Reader\CSV\Reader|\Box\Spout\Reader\XLSX\Reader * @throws \Box\Spout\Common\Exception\UnsupportedTypeException */ public static function create($readerType) @@ -28,10 +28,10 @@ class ReaderFactory switch ($readerType) { case Type::CSV: - $reader = new CSV(); + $reader = new CSV\Reader(); break; case Type::XLSX: - $reader = new XLSX(); + $reader = new XLSX\Reader(); break; default: throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType); diff --git a/src/Spout/Reader/ReaderFactory2.php b/src/Spout/Reader/ReaderFactory2.php deleted file mode 100644 index 74f5d19..0000000 --- a/src/Spout/Reader/ReaderFactory2.php +++ /dev/null @@ -1,44 +0,0 @@ -setGlobalFunctionsHelper(new GlobalFunctionsHelper()); - - return $reader; - } -} diff --git a/src/Spout/Reader/ReaderInterface.php b/src/Spout/Reader/ReaderInterface.php index 7253821..8ecde30 100644 --- a/src/Spout/Reader/ReaderInterface.php +++ b/src/Spout/Reader/ReaderInterface.php @@ -20,26 +20,12 @@ interface ReaderInterface public function open($filePath); /** - * Returns whether all rows have been read (i.e. if we are at the end of the file). - * To know if the end of file has been reached, it uses a buffer. If the buffer is - * empty (meaning, nothing has been read or previous read line has been consumed), then - * it reads the next line, store it in the buffer for the next time or flip a variable if - * the end of file has been reached. + * Returns an iterator to iterate over sheets. * - * @return bool - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first + * @return \Iterator To iterate over sheets + * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader */ - public function hasNextRow(); - - /** - * Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by - * actually reading the file. - * - * @return array Array that contains the data for the read row - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first - * @throws \Box\Spout\Reader\Exception\EndOfFileReachedException - */ - public function nextRow(); + public function getSheetIterator(); /** * Closes the reader, preventing any additional reading diff --git a/src/Spout/Reader/ReaderInterface2.php b/src/Spout/Reader/ReaderInterface2.php deleted file mode 100644 index a61c83c..0000000 --- a/src/Spout/Reader/ReaderInterface2.php +++ /dev/null @@ -1,35 +0,0 @@ -id = $sheetId; - $this->index = $sheetIndex; - $this->name = $sheetName; - } - - /** - * @return int ID of the sheet - */ - public function getId() - { - return $this->id; - } - - /** - * @return int Index of the sheet, based on order of creation (zero-based) - */ - public function getIndex() - { - return $this->index; - } - - /** - * @return string Name of the sheet - */ - public function getName() - { - return $this->name; - } -} diff --git a/src/Spout/Reader/XLSX.php b/src/Spout/Reader/XLSX.php deleted file mode 100644 index 7f176fe..0000000 --- a/src/Spout/Reader/XLSX.php +++ /dev/null @@ -1,394 +0,0 @@ -tempFolder = $tempFolder; - return $this; - } - - /** - * Opens the file at the given file path to make it ready to be read. - * It also parses the sharedStrings.xml file to get all the shared strings available in memory - * and fetches all the available worksheets. - * - * @param string $filePath Path of the file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read - * @throws Exception\NoWorksheetsFoundException If there are no worksheets in the file - */ - protected function openReader($filePath) - { - $this->filePath = $filePath; - $this->zip = new \ZipArchive(); - - if ($this->zip->open($filePath) === true) { - $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); - - if ($this->sharedStringsHelper->hasSharedStrings()) { - // Extracts all the strings from the worksheets for easy access in the future - $this->sharedStringsHelper->extractSharedStrings(); - } - - // Fetch all available worksheets - $this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper); - $this->worksheets = $this->worksheetHelper->getWorksheets($filePath); - - if (count($this->worksheets) === 0) { - throw new NoWorksheetsFoundException('The file must contain at least one worksheet.'); - } - } else { - throw new IOException('Could not open ' . $filePath . ' for reading.'); - } - } - - /** - * Returns whether another worksheet exists after the current worksheet. - * - * @return bool Whether another worksheet exists after the current worksheet. - * @throws Exception\ReaderNotOpenedException If the stream was not opened first - */ - public function hasNextSheet() - { - if (!$this->isStreamOpened) { - throw new ReaderNotOpenedException('Stream should be opened first.'); - } - - return $this->worksheetHelper->hasNextWorksheet($this->currentWorksheet, $this->worksheets); - } - - /** - * Moves the pointer to the current worksheet. - * Moving to another worksheet will stop the reading in the current worksheet. - * - * @return \Box\Spout\Reader\Sheet The next sheet - * @throws Exception\ReaderNotOpenedException If the stream was not opened first - * @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read - */ - public function nextSheet() - { - if (!$this->hasNextSheet()) { - throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.'); - } - - if ($this->currentWorksheet === null) { - $nextWorksheet = $this->worksheets[0]; - } else { - $currentWorksheetIndex = $this->currentWorksheet->getWorksheetIndex(); - $nextWorksheet = $this->worksheets[$currentWorksheetIndex + 1]; - } - - $this->initXmlReaderForWorksheetData($nextWorksheet); - $this->currentWorksheet = $nextWorksheet; - - // make sure that we are ready to read more rows - $this->hasReachedEndOfFile = false; - $this->emptyRowDataBuffer(); - - return $this->currentWorksheet->getExternalSheet(); - } - - /** - * Initializes the XMLReader object that reads worksheet data for the given worksheet. - * If another worksheet was being read, it closes the reader before reopening it for the new worksheet. - * The XMLReader is configured to be safe from billion laughs attack. - * - * @param Internal\XLSX\Worksheet $worksheet The worksheet to initialize the XMLReader with - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the worksheet data XML cannot be read - */ - protected function initXmlReaderForWorksheetData($worksheet) - { - // if changing worksheet and the XMLReader was initialized for the current worksheet - if ($worksheet != $this->currentWorksheet && $this->xmlReader) { - $this->xmlReader->close(); - } else if (!$this->xmlReader) { - $this->xmlReader = new \XMLReader(); - } - - $worksheetDataXMLFilePath = $worksheet->getDataXmlFilePath(); - - $worksheetDataFilePath = 'zip://' . $this->filePath . '#' . $worksheetDataXMLFilePath; - if ($this->xmlReader->open($worksheetDataFilePath, null, LIBXML_NONET) === false) { - throw new IOException('Could not open "' . $worksheetDataXMLFilePath . '".'); - } - } - - /** - * Reads and returns data of the line that comes after the last read line, on the current worksheet. - * Empty rows will be skipped. - * - * @return array|null Array that contains the data for the read line or null at the end of the file - * @throws \Box\Spout\Common\Exception\BadUsageException If the pointer to the current worksheet has not been set - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found - */ - protected function read() - { - if (!$this->currentWorksheet) { - throw new BadUsageException('You must call nextSheet() before calling hasNextRow() or nextRow()'); - } - - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - $isInsideRowTag = false; - $rowData = []; - - while ($this->xmlReader->read()) { - if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'dimension') { - // Read dimensions of the worksheet - $dimensionRef = $this->xmlReader->getAttribute('ref'); // returns 'A1:M13' for instance (or 'A1' for empty sheet) - if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { - $lastCellIndex = $matches[1]; - $this->numberOfColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; - } - - } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'row') { - // Start of the row description - $isInsideRowTag = true; - - // Read spans info if present - $numberOfColumnsForRow = $this->numberOfColumns; - $spans = $this->xmlReader->getAttribute('spans'); // returns '1:5' for instance - if ($spans) { - list(, $numberOfColumnsForRow) = explode(':', $spans); - $numberOfColumnsForRow = intval($numberOfColumnsForRow); - } - $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - - } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'c') { - // Start of a cell description - $currentCellIndex = $this->xmlReader->getAttribute('r'); - $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); - - $node = $this->xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node, $escaper); - - } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === 'row') { - // End of the row description - // If needed, we fill the empty cells - $rowData = ($this->numberOfColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - break; - } - } - - // no data means "end of file" - return ($rowData !== []) ? $rowData : null; - } - - /** - * Returns the cell's string value from a node's nested value node - * - * @param \DOMNode $node - * @return string The value associated with the cell - */ - protected function getVNodeValue($node) - { - // for cell types having a "v" tag containing the value. - // if not, the returned value should be empty string. - $vNode = $node->getElementsByTagName('v')->item(0); - if ($vNode !== null) { - return $vNode->nodeValue; - } - return ""; - } - - /** - * Returns the cell String value where string is inline. - * - * @param \DOMNode $node - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatInlineStringCellValue($node, $escaper) - { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell String value from shared-strings file using nodeValue index. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatSharedStringCellValue($nodeValue, $escaper) - { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($nodeValue); - $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell String value, where string is stored in value node. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatStrCellValue($nodeValue, $escaper) - { - $escapedCellValue = trim($nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell Numeric value from string of nodeValue. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return int|float The value associated with the cell - */ - protected function formatNumericCellValue($nodeValue) - { - $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); - return $cellValue; - } - - /** - * Returns the cell Boolean value from a specific node's Value. - * - * @param string $nodeValue - * @return bool The value associated with the cell - */ - protected function formatBooleanCellValue($nodeValue) - { - // !! is similar to boolval() - $cellValue = !!$nodeValue; - return $cellValue; - } - - /** - * Returns a cell's PHP Date value, associated to the given stored nodeValue. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return DateTime|null The value associated with the cell (null when the cell has an error) - */ - protected function formatDateCellValue($nodeValue) - { - try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) - $cellValue = new \DateTime($nodeValue); - return $cellValue; - } catch (\Exception $e) { - return null; - } - } - - /** - * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. - * - * @param \DOMNode $node - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string|int|float|bool|null The value associated with the cell (null when the cell has an error) - */ - protected function getCellValue($node, $escaper) - { - // Default cell type is "n" - $cellType = $node->getAttribute('t') ?: 'n'; - $vNodeValue = $this->getVNodeValue($node); - if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) { - return $vNodeValue; - } - - switch ($cellType) - { - case self::CELL_TYPE_INLINE_STRING: - return $this->formatInlineStringCellValue($node, $escaper); - case self::CELL_TYPE_SHARED_STRING: - return $this->formatSharedStringCellValue($vNodeValue, $escaper); - case self::CELL_TYPE_STR: - return $this->formatStrCellValue($vNodeValue, $escaper); - case self::CELL_TYPE_BOOLEAN: - return $this->formatBooleanCellValue($vNodeValue); - case self::CELL_TYPE_NUMERIC: - return $this->formatNumericCellValue($vNodeValue); - case self::CELL_TYPE_DATE: - return $this->formatDateCellValue($vNodeValue); - default: - return null; - } - } - - /** - * Closes the reader. To be used after reading the file. - * - * @return void - */ - protected function closeReader() - { - if ($this->xmlReader) { - $this->xmlReader->close(); - } - - if ($this->zip) { - $this->zip->close(); - } - - $this->sharedStringsHelper->cleanup(); - } -} diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php index 68712cc..f24d185 100644 --- a/src/Spout/Reader/XLSX/Reader.php +++ b/src/Spout/Reader/XLSX/Reader.php @@ -3,7 +3,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Exception\IOException; -use Box\Spout\Reader\AbstractReader2; +use Box\Spout\Reader\AbstractReader; use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper; /** @@ -12,7 +12,7 @@ use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper; * * @package Box\Spout\Reader\XLSX */ -class Reader extends AbstractReader2 +class Reader extends AbstractReader { /** @var string Temporary folder where the temporary files will be created */ protected $tempFolder; @@ -70,7 +70,7 @@ class Reader extends AbstractReader2 * * @return SheetIterator To iterate over sheets */ - public function getSheetIterator() + public function getConcreteSheetIterator() { return $this->sheetIterator; } diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index de55b94..922c61b 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -3,7 +3,7 @@ namespace Box\Spout\Reader\CSV; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory2; +use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; /** @@ -22,7 +22,17 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testOpenShouldThrowExceptionIfFileDoesNotExist() { - ReaderFactory2::create(Type::CSV)->open('/path/to/fake/file.csv'); + ReaderFactory::create(Type::CSV)->open('/path/to/fake/file.csv'); + } + + /** + * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException + * + * @return void + */ + public function testOpenShouldThrowExceptionIfTryingToReadBeforeOpeningReader() + { + ReaderFactory::create(Type::CSV)->getSheetIterator(); } /** @@ -39,7 +49,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $resourcePath = $this->getResourcePath('csv_standard.csv'); - $reader = ReaderFactory2::create(Type::CSV); + $reader = ReaderFactory::create(Type::CSV); $reader->setGlobalFunctionsHelper($helperStub); $reader->open($resourcePath); } @@ -162,7 +172,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory2::create(Type::CSV); + $reader = ReaderFactory::create(Type::CSV); $reader->setFieldDelimiter($fieldDelimiter); $reader->setFieldEnclosure($fieldEnclosure); diff --git a/tests/Spout/Reader/CSVTest.php b/tests/Spout/Reader/CSVTest.php deleted file mode 100644 index 8d02849..0000000 --- a/tests/Spout/Reader/CSVTest.php +++ /dev/null @@ -1,208 +0,0 @@ -open('/path/to/fake/file.csv'); - } - - /** - * @expectedException \Box\Spout\Common\Exception\IOException - * - * @return void - */ - public function testOpenShouldThrowExceptionIfFileNotReadable() - { - $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') - ->setMethods(['is_readable']) - ->getMock(); - $helperStub->method('is_readable')->willReturn(false); - - $resourcePath = $this->getResourcePath('csv_standard.csv'); - - $reader = ReaderFactory::create(Type::CSV); - $reader->setGlobalFunctionsHelper($helperStub); - $reader->open($resourcePath); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException - * - * @return void - */ - public function testReadShouldThrowExceptionIfReadBeforeReaderOpened() - { - $reader = ReaderFactory::create(Type::CSV); - $reader->hasNextRow(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\EndOfFileReachedException - * - * @return void - */ - public function testReadShouldThrowExceptionIfNextRowCalledAfterReadingDone() - { - $resourcePath = $this->getResourcePath('csv_standard.csv'); - - $reader = ReaderFactory::create(Type::CSV); - $reader->open($resourcePath); - - while ($reader->hasNextRow()) { - $reader->nextRow(); - } - - $reader->nextRow(); - } - - - /** - * @return void - */ - public function testReadStandardCSV() - { - $allRows = $this->getAllRowsForFile('csv_standard.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldNotStopAtCommaIfEnclosed() - { - $allRows = $this->getAllRowsForFile('csv_with_comma_enclosed.csv'); - $this->assertEquals('This is, a comma', $allRows[0][0]); - } - - /** - * @return void - */ - public function testReadShouldKeepEmptyCells() - { - $allRows = $this->getAllRowsForFile('csv_with_empty_cells.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', '', 'csv--23'], - ['csv--31', 'csv--32', ''], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipEmptyLines() - { - $allRows = $this->getAllRowsForFile('csv_with_empty_line.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldHaveTheRightNumberOfCells() - { - $allRows = $this->getAllRowsForFile('csv_with_different_cells_number.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22'], - ['csv--31'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportCustomFieldDelimiter() - { - $allRows = $this->getAllRowsForFile('csv_delimited_with_pipes.csv', '|'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ['csv--31', 'csv--32', 'csv--33'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportCustomFieldEnclosure() - { - $allRows = $this->getAllRowsForFile('csv_text_enclosed_with_pound.csv', ',', '#'); - $this->assertEquals('This is, a comma', $allRows[0][0]); - } - - /** - * @return void - */ - public function testReadShouldSkipUtf8Bom() - { - $allRows = $this->getAllRowsForFile('csv_with_utf8_bom.csv'); - - $expectedRows = [ - ['csv--11', 'csv--12', 'csv--13'], - ['csv--21', 'csv--22', 'csv--23'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @param string $fileName - * @param string|void $fieldDelimiter - * @param string|void $fieldEnclosure - * @return array All the read rows the given file - */ - private function getAllRowsForFile($fileName, $fieldDelimiter = ",", $fieldEnclosure = '"') - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::CSV); - $reader->setFieldDelimiter($fieldDelimiter); - $reader->setFieldEnclosure($fieldEnclosure); - - $reader->open($resourcePath); - - while ($reader->hasNextRow()) { - $allRows[] = $reader->nextRow(); - } - - $reader->close(); - - return $allRows; - } -} diff --git a/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php b/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php deleted file mode 100644 index 8851b33..0000000 --- a/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php +++ /dev/null @@ -1,60 +0,0 @@ - 1, 3 => 3]; - $filledArray = CellHelper::fillMissingArrayIndexes($arrayToFill, 'FILL'); - - $expectedFilledArray = ['FILL', 1, 'FILL', 3]; - $this->assertEquals($expectedFilledArray, $filledArray); - } - - /** - * @return array - */ - public function dataProviderForTestGetColumnIndexFromCellIndex() - { - return [ - ['A1', 0], - ['Z3', 25], - ['AA5', 26], - ['AB24', 27], - ['BC5', 54], - ['BCZ99', 1455], - ]; - } - - /** - * @dataProvider dataProviderForTestGetColumnIndexFromCellIndex - * - * @param string $cellIndex - * @param int $expectedColumnIndex - * @return void - */ - public function testGetColumnIndexFromCellIndex($cellIndex, $expectedColumnIndex) - { - $this->assertEquals($expectedColumnIndex, CellHelper::getColumnIndexFromCellIndex($cellIndex)); - } - - /** - * @expectedException \Box\Spout\Common\Exception\InvalidArgumentException - * - * @return void - */ - public function testGetColumnIndexFromCellIndexShouldThrowIfInvalidCellIndex() - { - CellHelper::getColumnIndexFromCellIndex('InvalidCellIndex'); - } -} diff --git a/tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php b/tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php deleted file mode 100644 index 18b1c74..0000000 --- a/tests/Spout/Reader/Helper/XLSX/SharedStringsCaching/CachingStrategyFactoryTest.php +++ /dev/null @@ -1,99 +0,0 @@ -getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory') - ->disableOriginalConstructor() - ->setMethods(['getMemoryLimitInKB']) - ->getMock(); - - $factoryStub->method('getMemoryLimitInKB')->willReturn($memoryLimitInKB); - - \ReflectionHelper::setStaticValue('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub); - - $strategy = $factoryStub->getBestCachingStrategy($sharedStringsUniqueCount, null); - - $fullExpectedStrategyClassName = 'Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\\' . $expectedStrategyClassName; - $this->assertEquals($fullExpectedStrategyClassName, get_class($strategy)); - - $strategy->clearCache(); - \ReflectionHelper::reset(); - } - - /** - * @return array - */ - public function dataProviderForTestGetMemoryLimitInKB() - { - return [ - ['-1', -1], - ['invalid', -1], - ['1024B', 1], - ['128K', 128], - ['256KB', 256], - ['512M', 512 * 1024], - ['2MB', 2 * 1024], - ['1G', 1 * 1024 * 1024], - ['10GB', 10 * 1024 * 1024], - ['2T', 2 * 1024 * 1024 * 1024], - ['5TB', 5 * 1024 * 1024 * 1024], - ]; - } - - /** - * @dataProvider dataProviderForTestGetMemoryLimitInKB - * - * @param string $memoryLimitFormatted - * @param float $expectedMemoryLimitInKB - * @return void - */ - public function testGetMemoryLimitInKB($memoryLimitFormatted, $expectedMemoryLimitInKB) - { - /** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */ - $factoryStub = $this - ->getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory') - ->disableOriginalConstructor() - ->setMethods(['getMemoryLimitFromIni']) - ->getMock(); - - $factoryStub->method('getMemoryLimitFromIni')->willReturn($memoryLimitFormatted); - - $memoryLimitInKB = \ReflectionHelper::callMethodOnObject($factoryStub, 'getMemoryLimitInKB'); - - $this->assertEquals($expectedMemoryLimitInKB, $memoryLimitInKB); - } -} diff --git a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php b/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php deleted file mode 100644 index 82631bc..0000000 --- a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php +++ /dev/null @@ -1,112 +0,0 @@ -getResourcePath('one_sheet_with_shared_strings.xlsx'); - $this->sharedStringsHelper = new SharedStringsHelper($resourcePath); - } - - /** - * @return void - */ - public function tearDown() - { - $this->sharedStringsHelper->cleanup(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\SharedStringNotFoundException - * @return void - */ - public function testGetStringAtIndexShouldThrowExceptionIfStringNotFound() - { - $this->sharedStringsHelper->extractSharedStrings(); - $this->sharedStringsHelper->getStringAtIndex(PHP_INT_MAX); - } - - /** - * @return void - */ - public function testGetStringAtIndexShouldReturnTheCorrectStringIfFound() - { - $this->sharedStringsHelper->extractSharedStrings(); - - $sharedString = $this->sharedStringsHelper->getStringAtIndex(0); - $this->assertEquals('s1--A1', $sharedString); - - $sharedString = $this->sharedStringsHelper->getStringAtIndex(24); - $this->assertEquals('s1--E5', $sharedString); - - $usedCachingStrategy = \ReflectionHelper::getValueOnObject($this->sharedStringsHelper, 'cachingStrategy'); - $this->assertTrue($usedCachingStrategy instanceof InMemoryStrategy); - } - - /** - * @return void - */ - public function testGetStringAtIndexShouldWorkWithMultilineStrings() - { - $resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx'); - $sharedStringsHelper = new SharedStringsHelper($resourcePath); - - $sharedStringsHelper->extractSharedStrings(); - - $sharedString = $sharedStringsHelper->getStringAtIndex(0); - $this->assertEquals("s1\nA1", $sharedString); - - $sharedString = $sharedStringsHelper->getStringAtIndex(24); - $this->assertEquals("s1\nE5", $sharedString); - - $sharedStringsHelper->cleanup(); - } - - /** - * @return void - */ - public function testGetStringAtIndexWithFileBasedStrategy() - { - // force the file-based strategy by setting no memory limit - $originalMemoryLimit = ini_get('memory_limit'); - ini_set('memory_limit', '-1'); - - $resourcePath = $this->getResourcePath('sheet_with_lots_of_shared_strings.xlsx'); - $sharedStringsHelper = new SharedStringsHelper($resourcePath); - - $sharedStringsHelper->extractSharedStrings(); - - $sharedString = $sharedStringsHelper->getStringAtIndex(0); - $this->assertEquals('str', $sharedString); - - $sharedString = $sharedStringsHelper->getStringAtIndex(CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE + 1); - $this->assertEquals('str', $sharedString); - - $usedCachingStrategy = \ReflectionHelper::getValueOnObject($sharedStringsHelper, 'cachingStrategy'); - $this->assertTrue($usedCachingStrategy instanceof FileBasedStrategy); - - $sharedStringsHelper->cleanup(); - - ini_set('memory_limit', $originalMemoryLimit); - } -} diff --git a/tests/Spout/Reader/SheetTest.php b/tests/Spout/Reader/SheetTest.php deleted file mode 100644 index 5f6e02d..0000000 --- a/tests/Spout/Reader/SheetTest.php +++ /dev/null @@ -1,52 +0,0 @@ -openFileAndReturnSheets('two_sheets_with_custom_names.xlsx'); - - $this->assertEquals('CustomName1', $sheets[0]->getName()); - $this->assertEquals(0, $sheets[0]->getIndex()); - $this->assertEquals(1, $sheets[0]->getId()); - - $this->assertEquals('CustomName2', $sheets[1]->getName()); - $this->assertEquals(1, $sheets[1]->getIndex()); - $this->assertEquals(2, $sheets[1]->getId()); - } - - /** - * @param string $fileName - * @return Sheet[] - */ - private function openFileAndReturnSheets($fileName) - { - $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - $sheets = []; - while ($reader->hasNextSheet()) { - $sheets[] = $reader->nextSheet(); - } - - $reader->close(); - - return $sheets; - } -} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index c5fb583..9643d54 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -4,7 +4,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory2; +use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; /** @@ -284,7 +284,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $allRows = []; $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory2::create(Type::XLSX); + $reader = ReaderFactory::create(Type::XLSX); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/Spout/Reader/XLSX/SheetTest.php b/tests/Spout/Reader/XLSX/SheetTest.php index c9449f4..8f3f9df 100644 --- a/tests/Spout/Reader/XLSX/SheetTest.php +++ b/tests/Spout/Reader/XLSX/SheetTest.php @@ -3,7 +3,7 @@ namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Type; -use Box\Spout\Reader\ReaderFactory2; +use Box\Spout\Reader\ReaderFactory; use Box\Spout\TestUsingResource; /** @@ -38,7 +38,7 @@ class SheetTest extends \PHPUnit_Framework_TestCase private function openFileAndReturnSheets($fileName) { $resourcePath = $this->getResourcePath($fileName); - $reader = ReaderFactory2::create(Type::XLSX); + $reader = ReaderFactory::create(Type::XLSX); $reader->open($resourcePath); $sheets = []; diff --git a/tests/Spout/Reader/XLSXTest.php b/tests/Spout/Reader/XLSXTest.php deleted file mode 100644 index 531bdb8..0000000 --- a/tests/Spout/Reader/XLSXTest.php +++ /dev/null @@ -1,332 +0,0 @@ -getAllRowsForFile($filePath); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException - * - * @return void - */ - public function testHasNextSheetShouldThrowExceptionIfReaderNotOpened() - { - $reader = ReaderFactory::create(Type::XLSX); - $reader->hasNextSheet(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\EndOfWorksheetsReachedException - * - * @return void - */ - public function testNextSheetShouldThrowExceptionIfNoMoreSheetsToRead() - { - $fileName = 'one_sheet_with_shared_strings.xlsx'; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - while ($reader->hasNextSheet()) { - $reader->nextSheet(); - } - - $reader->nextSheet(); - } - - /** - * @return array - */ - public function dataProviderForTestReadForAllWorksheets() - { - return [ - ['one_sheet_with_shared_strings.xlsx', 5, 5], - ['one_sheet_with_inline_strings.xlsx', 5, 5], - ['two_sheets_with_shared_strings.xlsx', 10, 5], - ['two_sheets_with_inline_strings.xlsx', 10, 5] - ]; - } - - /** - * @dataProvider dataProviderForTestReadForAllWorksheets - * - * @param string $resourceName - * @param int $expectedNumOfRows - * @param int $expectedNumOfCellsPerRow - * @return void - */ - public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow) - { - $allRows = $this->getAllRowsForFile($resourceName); - - $this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows"); - foreach ($allRows as $row) { - $this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row"); - } - } - - /** - * @return void - */ - public function testReadShouldSupportFilesWithoutSharedStringsFile() - { - $allRows = $this->getAllRowsForFile('sheet_with_no_shared_strings_file.xlsx'); - - $expectedRows = [ - [10, 11], - [20, 21], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportAllCellTypes() - { - $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); - - $expectedRows = [ - [ - 's1--A1', 's1--A2', - false, true, - \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'), - \DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'), - 10, 10.43, - null, - 'weird string', // valid 'str' string - null, // invalid date - ], - ['', '', '', '', '', '', '', '', ''], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldKeepEmptyCellsAtTheEndIfDimensionsSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_without_dimensions_but_spans_and_empty_cells.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be 2 rows'); - foreach ($allRows as $row) { - $this->assertEquals(5, count($row), 'There should be 5 cells for every row, because empty rows should be preserved'); - } - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A2', 's1--B2', 's1--C2', '', ''], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldKeepEmptyCellsAtTheEndIfNoDimensionsButSpansSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be 2 rows'); - $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); - $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A2', 's1--B2', 's1--C2'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipEmptyCellsAtTheEndIfDimensionsNotSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be 2 rows'); - $this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row'); - $this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip'); - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A2', 's1--B2', 's1--C2'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipEmptyRows() - { - $allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx'); - - $this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped'); - - $expectedRows = [ - ['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'], - ['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSupportEmptySharedString() - { - $allRows = $this->getAllRowsForFile('sheet_with_empty_shared_string.xlsx'); - - $expectedRows = [ - ['s1--A1', '', 's1--C1'], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldPreserveSpaceIfSpecified() - { - $allRows = $this->getAllRowsForFile('sheet_with_preserve_space_shared_strings.xlsx'); - - $expectedRows = [ - [' s1--A1', 's1--B1 ', ' s1--C1 '], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @return void - */ - public function testReadShouldSkipPronunciationData() - { - $allRows = $this->getAllRowsForFile('sheet_with_pronunciation.xlsx'); - - $expectedRow = ['名前', '一二三四']; - $this->assertEquals($expectedRow, $allRows[0], 'Pronunciation data should be removed.'); - } - - - /** - * @return array - */ - public function dataProviderForTestReadShouldBeProtectedAgainstAttacks() - { - return [ - ['attack_billion_laughs.xlsx'], - ['attack_quadratic_blowup.xlsx'], - ]; - } - - /** - * @dataProvider dataProviderForTestReadShouldBeProtectedAgainstAttacks - * @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used) - * - * @param string $fileName - * @return void - */ - public function testReadShouldBeProtectedAgainstAttacks($fileName) - { - $startTime = microtime(true); - - try { - $this->getAllRowsForFile($fileName); - $this->fail('An exception should have been thrown'); - } catch (IOException $exception) { - $duration = microtime(true) - $startTime; - $this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.'); - - $expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB - $this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.'); - } - } - - /** - * @return void - */ - public function testReadShouldBeAbleToProcessEmptySheets() - { - $allRows = $this->getAllRowsForFile('sheet_with_no_cells.xlsx'); - $this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.'); - } - - /** - * @return void - */ - public function testReadShouldSkipFormulas() - { - $allRows = $this->getAllRowsForFile('sheet_with_formulas.xlsx'); - - $expectedRows = [ - ['val1', 'val2', 'total1', 'total2'], - [10, 20, 30, 21], - [11, 21, 32, 41], - ]; - $this->assertEquals($expectedRows, $allRows); - } - - /** - * @param string $fileName - * @return array All the read rows the given file - */ - private function getAllRowsForFile($fileName) - { - $allRows = []; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - while ($reader->hasNextSheet()) { - $reader->nextSheet(); - - while ($reader->hasNextRow()) { - $allRows[] = $reader->nextRow(); - } - } - - $reader->close(); - - return $allRows; - } -}