diff --git a/README.md b/README.md index 6686a2b..5bf9ed1 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,10 @@ use Box\Spout\Common\Type; $reader = ReaderFactory::create(Type::CSV); $reader->open($filePath); -while ($reader->hasNextRow()) { - $row = $reader->nextRow(); - // do stuff +foreach ($reader->getSheetIterator() as $sheet) { + foreach ($reader->getRowIterator() as $row) { + // do stuff + } } $reader->close(); @@ -81,11 +82,8 @@ use Box\Spout\Common\Type; $reader = ReaderFactory::create(Type::XLSX); $reader->open($filePath); -while ($reader->hasNextSheet()) { - $reader->nextSheet(); - - while ($reader->hasNextRow()) { - $row = $reader->nextRow(); +foreach ($reader->getSheetIterator() as $sheet) { + foreach ($reader->getRowIterator() as $row) { // do stuff } } @@ -202,8 +200,7 @@ $sheets = $writer->getSheets(); If you rely on the sheet's name in your application, you can access it and customize it this way: ```php // Accessing the sheet name when reading -while ($reader->hasNextSheet()) { - $sheet = $reader->nextSheet(); +foreach ($reader->getSheetIterator() as $sheet) { $sheetName = $sheet->getName(); } @@ -253,7 +250,7 @@ For information, the performance tests take about one hour to run (processing 2 When writing data, Spout is streaming the data to files, one or few lines at a time. That means that it only keeps in memory the few rows that it needs to write. Once written, the memory is freed. -Same goes with reading. Only one row at a time is stored in memory. A special technique is used to handle shared strings in XLSX, storing them into several small temporary files that allows fast access. +Same goes with reading. Only one row at a time is stored in memory. A special technique is used to handle shared strings in XLSX, storing them - if needed - into several small temporary files that allows fast access. #### How long does it take to generate a file with X rows? diff --git a/phpunit.xml b/phpunit.xml index fc6d657..06ddf63 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -5,7 +5,6 @@ colors="true" convertErrorsToExceptions="false" convertWarningsToExceptions="false" - strict="false" verbose="false"> diff --git a/src/Spout/Common/Helper/FileSystemHelper.php b/src/Spout/Common/Helper/FileSystemHelper.php index d7ca64f..6186822 100644 --- a/src/Spout/Common/Helper/FileSystemHelper.php +++ b/src/Spout/Common/Helper/FileSystemHelper.php @@ -63,7 +63,7 @@ class FileSystemHelper $filePath = $parentFolderPath . '/' . $fileName; $wasCreationSuccessful = file_put_contents($filePath, $fileContents); - if (!$wasCreationSuccessful) { + if ($wasCreationSuccessful === false) { throw new IOException('Unable to create file: ' . $filePath); } diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index feeb782..47ed052 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -160,7 +160,7 @@ class GlobalFunctionsHelper * @see file_get_contents() * * @param string $filePath - * @return bool + * @return string */ public function file_get_contents($filePath) { diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index 2e2e2de..bfbedf8 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -4,7 +4,6 @@ namespace Box\Spout\Reader; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\ReaderNotOpenedException; -use Box\Spout\Reader\Exception\EndOfFileReachedException; /** * Class AbstractReader @@ -14,18 +13,9 @@ use Box\Spout\Reader\Exception\EndOfFileReachedException; */ abstract class AbstractReader implements ReaderInterface { - /** @var int Used to keep track of the row index */ - protected $currentRowIndex = 0; - /** @var bool Indicates whether the stream is currently open */ protected $isStreamOpened = false; - /** @var bool Indicates whether all rows have been read */ - protected $hasReachedEndOfFile = false; - - /** @var array Buffer used to store the row data, while checking if there are more rows to read */ - protected $rowDataBuffer = null; - /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -38,11 +28,11 @@ abstract class AbstractReader implements ReaderInterface abstract protected function openReader($filePath); /** - * Reads and returns next row if available. + * Returns an iterator to iterate over sheets. * - * @return array|null Array that contains the data for the read row or null at the end of the file + * @return \Iterator To iterate over sheets */ - abstract protected function read(); + abstract public function getConcreteSheetIterator(); /** * Closes the reader. To be used after reading the file. @@ -80,9 +70,6 @@ abstract class AbstractReader implements ReaderInterface } } - $this->currentRowIndex = 0; - $this->hasReachedEndOfFile = false; - try { $this->openReader($filePath); $this->isStreamOpened = true; @@ -103,82 +90,18 @@ abstract class AbstractReader implements ReaderInterface } /** - * Returns whether all rows have been read (i.e. if we are at the end of the file). - * To know if the end of file has been reached, it uses a buffer. If the buffer is - * empty (meaning, nothing has been read or previous read line has been consumed), then - * it reads the next line, store it in the buffer for the next time or flip a variable if - * the end of file has been reached. + * Returns an iterator to iterate over sheets. * - * @return bool Whether all rows have been read (i.e. if we are at the end of the file) - * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If the stream was not opened first + * @return \Iterator To iterate over sheets + * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader */ - public function hasNextRow() + public function getSheetIterator() { if (!$this->isStreamOpened) { - throw new ReaderNotOpenedException('Stream should be opened first.'); + throw new ReaderNotOpenedException('Reader should be opened first.'); } - if ($this->hasReachedEndOfFile) { - return false; - } - - // if the buffer contains unprocessed row - if (!$this->isRowDataBufferEmpty()) { - return true; - } - - // otherwise, try to read the next line line, and store it in the buffer - $this->rowDataBuffer = $this->read(); - - // if the buffer is still empty after reading a row, it means end of file was reached - $this->hasReachedEndOfFile = $this->isRowDataBufferEmpty(); - - return (!$this->hasReachedEndOfFile); - } - - /** - * Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by - * actually reading the file. - * - * @return array Array that contains the data for the read row - * @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first - * @throws \Box\Spout\Reader\Exception\EndOfFileReachedException - */ - public function nextRow() - { - if (!$this->hasNextRow()) { - throw new EndOfFileReachedException('End of file was reached. Cannot read more rows.'); - } - - // Get data from buffer (if the buffer was empty, it was filled by the call to hasNextRow()) - $rowData = $this->rowDataBuffer; - - // empty buffer to mark the row as consumed - $this->emptyRowDataBuffer(); - - $this->currentRowIndex++; - - return $rowData; - } - - /** - * Returns whether the buffer where the row data is stored is empty - * - * @return bool - */ - protected function isRowDataBufferEmpty() - { - return ($this->rowDataBuffer === null); - } - - /** - * Empty the buffer that stores row data - * - * @return void - */ - protected function emptyRowDataBuffer() - { - $this->rowDataBuffer = null; + return $this->getConcreteSheetIterator(); } /** @@ -190,6 +113,12 @@ abstract class AbstractReader implements ReaderInterface { if ($this->isStreamOpened) { $this->closeReader(); + + $sheetIterator = $this->getConcreteSheetIterator(); + if ($sheetIterator) { + $sheetIterator->end(); + } + $this->isStreamOpened = false; } } diff --git a/src/Spout/Reader/CSV.php b/src/Spout/Reader/CSV.php deleted file mode 100644 index 2da160f..0000000 --- a/src/Spout/Reader/CSV.php +++ /dev/null @@ -1,130 +0,0 @@ -fieldDelimiter = $fieldDelimiter; - return $this; - } - - /** - * Sets the field enclosure for the CSV - * - * @param string $fieldEnclosure Character that enclose fields - * @return CSV - */ - public function setFieldEnclosure($fieldEnclosure) - { - $this->fieldEnclosure = $fieldEnclosure; - return $this; - } - - /** - * Opens the file at the given path to make it ready to be read. - * The file must be UTF-8 encoded. - * @TODO add encoding detection/conversion - * - * @param string $filePath Path of the CSV file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException - */ - protected function openReader($filePath) - { - $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); - if (!$this->filePointer) { - throw new IOException('Could not open file ' . $filePath . ' for reading.'); - } - - $this->skipUtf8Bom(); - } - - /** - * This skips the UTF-8 BOM if inserted at the beginning of the file - * by moving the file pointer after it, so that it is not read. - * - * @return void - */ - protected function skipUtf8Bom() - { - $this->globalFunctionsHelper->rewind($this->filePointer); - - $hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM); - - if ($hasUtf8Bom) { - // we skip the 2 first bytes (so start from the 3rd byte) - $this->globalFunctionsHelper->fseek($this->filePointer, 3); - } else { - // if no BOM, reset the pointer to read from the beginning - $this->globalFunctionsHelper->fseek($this->filePointer, 0); - } - } - - /** - * Reads and returns next row if available. - * Empty rows are skipped. - * - * @return array|null Array that contains the data for the read row or null at the end of the file - */ - protected function read() - { - $lineData = null; - - if ($this->filePointer) { - do { - $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); - } while ($lineData && $this->isEmptyLine($lineData)); - } - - // When reaching the end of the file, return null instead of false - return ($lineData !== false) ? $lineData : null; - } - - /** - * @param array $lineData Array containing the cells value for the line - * @return bool Whether the given line is empty - */ - protected function isEmptyLine($lineData) - { - return (count($lineData) === 1 && $lineData[0] === null); - } - - /** - * Closes the reader. To be used after reading the file. - * - * @return void - */ - protected function closeReader() - { - if ($this->filePointer) { - $this->globalFunctionsHelper->fclose($this->filePointer); - } - } -} diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php new file mode 100644 index 0000000..9f9e56f --- /dev/null +++ b/src/Spout/Reader/CSV/Reader.php @@ -0,0 +1,95 @@ +fieldDelimiter = $fieldDelimiter; + return $this; + } + + /** + * Sets the field enclosure for the CSV. + * Needs to be called before opening the reader. + * + * @param string $fieldEnclosure Character that enclose fields + * @return Reader + */ + public function setFieldEnclosure($fieldEnclosure) + { + $this->fieldEnclosure = $fieldEnclosure; + return $this; + } + + /** + * Opens the file at the given path to make it ready to be read. + * The file must be UTF-8 encoded. + * @TODO add encoding detection/conversion + * + * @param string $filePath Path of the CSV file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException + */ + protected function openReader($filePath) + { + $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); + if (!$this->filePointer) { + throw new IOException('Could not open file ' . $filePath . ' for reading.'); + } + + $this->sheetIterator = new SheetIterator($this->filePointer, $this->fieldDelimiter, $this->fieldEnclosure, $this->globalFunctionsHelper); + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getConcreteSheetIterator() + { + return $this->sheetIterator; + } + + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->filePointer) { + $this->globalFunctionsHelper->fclose($this->filePointer); + } + } +} diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php new file mode 100644 index 0000000..ffb533f --- /dev/null +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -0,0 +1,163 @@ +filePointer = $filePointer; + $this->fieldDelimiter = $fieldDelimiter; + $this->fieldEnclosure = $fieldEnclosure; + $this->globalFunctionsHelper = $globalFunctionsHelper; + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->rewindAndSkipUtf8Bom(); + + $this->numReadRows = 0; + $this->rowDataBuffer = null; + + $this->next(); + } + + /** + * This rewinds and skips the UTF-8 BOM if inserted at the beginning of the file + * by moving the file pointer after it, so that it is not read. + * + * @return void + */ + protected function rewindAndSkipUtf8Bom() + { + $this->globalFunctionsHelper->rewind($this->filePointer); + + $hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM); + + if ($hasUtf8Bom) { + // we skip the 2 first bytes (so start from the 3rd byte) + $this->globalFunctionsHelper->fseek($this->filePointer, 3); + } else { + // if no BOM, reset the pointer to read from the beginning + $this->globalFunctionsHelper->fseek($this->filePointer, 0); + } + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return ($this->filePointer && !$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows are skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $lineData = null; + $this->hasReachedEndOfFile = feof($this->filePointer); + + if (!$this->hasReachedEndOfFile) { + do { + $lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); + } while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData))); + + if ($lineData !== false && $lineData !== null) { + $this->rowDataBuffer = $lineData; + $this->numReadRows++; + } + } + } + + /** + * @param array $lineData Array containing the cells value for the line + * @return bool Whether the given line is empty + */ + protected function isEmptyLine($lineData) + { + return (count($lineData) === 1 && $lineData[0] === null); + } + + /** + * Return the current element from the buffer + * @link http://php.net/manual/en/iterator.current.php + * + * @return array|null + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // do nothing + } +} diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php new file mode 100644 index 0000000..207fcae --- /dev/null +++ b/src/Spout/Reader/CSV/Sheet.php @@ -0,0 +1,35 @@ +rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $globalFunctionsHelper); + } + + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } +} diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php new file mode 100644 index 0000000..f424cd8 --- /dev/null +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -0,0 +1,96 @@ +sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $globalFunctionsHelper); + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->hasReadUniqueSheet = false; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReadUniqueSheet); + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + $this->hasReadUniqueSheet = true; + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + return $this->sheet; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // do nothing + } +} diff --git a/src/Spout/Reader/Exception/EndOfWorksheetsReachedException.php b/src/Spout/Reader/Exception/EndOfWorksheetsReachedException.php deleted file mode 100644 index f2000c0..0000000 --- a/src/Spout/Reader/Exception/EndOfWorksheetsReachedException.php +++ /dev/null @@ -1,12 +0,0 @@ -externalSheet = $externalSheet; - $this->worksheetIndex = $worksheetIndex; - $this->dataXmlFilePath = $dataXmlFilePath; - } - - /** - * @return string Path of the XML file containing the worksheet data, - * without the leading slash. - */ - public function getDataXmlFilePath() - { - return ltrim($this->dataXmlFilePath, '/'); - } - - /** - * @return \Box\Spout\Reader\Sheet The "external" sheet - */ - public function getExternalSheet() - { - return $this->externalSheet; - } - - /** - * @return int - */ - public function getWorksheetIndex() - { - return $this->worksheetIndex; - } -} diff --git a/src/Spout/Reader/IteratorInterface.php b/src/Spout/Reader/IteratorInterface.php new file mode 100644 index 0000000..7d58e28 --- /dev/null +++ b/src/Spout/Reader/IteratorInterface.php @@ -0,0 +1,18 @@ +tempFolder = $tempFolder; - return $this; - } - - /** - * Opens the file at the given file path to make it ready to be read. - * It also parses the sharedStrings.xml file to get all the shared strings available in memory - * and fetches all the available worksheets. - * - * @param string $filePath Path of the file to be read - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read - * @throws Exception\NoWorksheetsFoundException If there are no worksheets in the file - */ - protected function openReader($filePath) - { - $this->filePath = $filePath; - $this->zip = new \ZipArchive(); - - if ($this->zip->open($filePath) === true) { - $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); - - if ($this->sharedStringsHelper->hasSharedStrings()) { - // Extracts all the strings from the worksheets for easy access in the future - $this->sharedStringsHelper->extractSharedStrings(); - } - - // Fetch all available worksheets - $this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper); - $this->worksheets = $this->worksheetHelper->getWorksheets($filePath); - - if (count($this->worksheets) === 0) { - throw new NoWorksheetsFoundException('The file must contain at least one worksheet.'); - } - } else { - throw new IOException('Could not open ' . $filePath . ' for reading.'); - } - } - - /** - * Returns whether another worksheet exists after the current worksheet. - * - * @return bool Whether another worksheet exists after the current worksheet. - * @throws Exception\ReaderNotOpenedException If the stream was not opened first - */ - public function hasNextSheet() - { - if (!$this->isStreamOpened) { - throw new ReaderNotOpenedException('Stream should be opened first.'); - } - - return $this->worksheetHelper->hasNextWorksheet($this->currentWorksheet, $this->worksheets); - } - - /** - * Moves the pointer to the current worksheet. - * Moving to another worksheet will stop the reading in the current worksheet. - * - * @return \Box\Spout\Reader\Sheet The next sheet - * @throws Exception\ReaderNotOpenedException If the stream was not opened first - * @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read - */ - public function nextSheet() - { - if (!$this->hasNextSheet()) { - throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.'); - } - - if ($this->currentWorksheet === null) { - $nextWorksheet = $this->worksheets[0]; - } else { - $currentWorksheetIndex = $this->currentWorksheet->getWorksheetIndex(); - $nextWorksheet = $this->worksheets[$currentWorksheetIndex + 1]; - } - - $this->initXmlReaderForWorksheetData($nextWorksheet); - $this->currentWorksheet = $nextWorksheet; - - // make sure that we are ready to read more rows - $this->hasReachedEndOfFile = false; - $this->emptyRowDataBuffer(); - - return $this->currentWorksheet->getExternalSheet(); - } - - /** - * Initializes the XMLReader object that reads worksheet data for the given worksheet. - * If another worksheet was being read, it closes the reader before reopening it for the new worksheet. - * The XMLReader is configured to be safe from billion laughs attack. - * - * @param Internal\XLSX\Worksheet $worksheet The worksheet to initialize the XMLReader with - * @return void - * @throws \Box\Spout\Common\Exception\IOException If the worksheet data XML cannot be read - */ - protected function initXmlReaderForWorksheetData($worksheet) - { - // if changing worksheet and the XMLReader was initialized for the current worksheet - if ($worksheet != $this->currentWorksheet && $this->xmlReader) { - $this->xmlReader->close(); - } else if (!$this->xmlReader) { - $this->xmlReader = new \XMLReader(); - } - - $worksheetDataXMLFilePath = $worksheet->getDataXmlFilePath(); - - $worksheetDataFilePath = 'zip://' . $this->filePath . '#' . $worksheetDataXMLFilePath; - if ($this->xmlReader->open($worksheetDataFilePath, null, LIBXML_NONET) === false) { - throw new IOException('Could not open "' . $worksheetDataXMLFilePath . '".'); - } - } - - /** - * Reads and returns data of the line that comes after the last read line, on the current worksheet. - * Empty rows will be skipped. - * - * @return array|null Array that contains the data for the read line or null at the end of the file - * @throws \Box\Spout\Common\Exception\BadUsageException If the pointer to the current worksheet has not been set - * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found - */ - protected function read() - { - if (!$this->currentWorksheet) { - throw new BadUsageException('You must call nextSheet() before calling hasNextRow() or nextRow()'); - } - - $escaper = new \Box\Spout\Common\Escaper\XLSX(); - $isInsideRowTag = false; - $rowData = []; - - while ($this->xmlReader->read()) { - if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'dimension') { - // Read dimensions of the worksheet - $dimensionRef = $this->xmlReader->getAttribute('ref'); // returns 'A1:M13' for instance (or 'A1' for empty sheet) - if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { - $lastCellIndex = $matches[1]; - $this->numberOfColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; - } - - } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'row') { - // Start of the row description - $isInsideRowTag = true; - - // Read spans info if present - $numberOfColumnsForRow = $this->numberOfColumns; - $spans = $this->xmlReader->getAttribute('spans'); // returns '1:5' for instance - if ($spans) { - list(, $numberOfColumnsForRow) = explode(':', $spans); - $numberOfColumnsForRow = intval($numberOfColumnsForRow); - } - $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - - } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'c') { - // Start of a cell description - $currentCellIndex = $this->xmlReader->getAttribute('r'); - $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); - - $node = $this->xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node, $escaper); - - } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === 'row') { - // End of the row description - // If needed, we fill the empty cells - $rowData = ($this->numberOfColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - break; - } - } - - // no data means "end of file" - return ($rowData !== []) ? $rowData : null; - } - - /** - * Returns the cell's string value from a node's nested value node - * - * @param \DOMNode $node - * @return string The value associated with the cell - */ - protected function getVNodeValue($node) - { - // for cell types having a "v" tag containing the value. - // if not, the returned value should be empty string. - $vNode = $node->getElementsByTagName('v')->item(0); - if ($vNode !== null) { - return $vNode->nodeValue; - } - return ""; - } - - /** - * Returns the cell String value where string is inline. - * - * @param \DOMNode $node - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatInlineStringCellValue($node, $escaper) - { - // inline strings are formatted this way: - // [INLINE_STRING] - $tNode = $node->getElementsByTagName('t')->item(0); - $escapedCellValue = trim($tNode->nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell String value from shared-strings file using nodeValue index. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatSharedStringCellValue($nodeValue, $escaper) - { - // shared strings are formatted this way: - // [SHARED_STRING_INDEX] - $sharedStringIndex = intval($nodeValue); - $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell String value, where string is stored in value node. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string The value associated with the cell (null when the cell has an error) - */ - protected function formatStrCellValue($nodeValue, $escaper) - { - $escapedCellValue = trim($nodeValue); - $cellValue = $escaper->unescape($escapedCellValue); - return $cellValue; - } - - /** - * Returns the cell Numeric value from string of nodeValue. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return int|float The value associated with the cell - */ - protected function formatNumericCellValue($nodeValue) - { - $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); - return $cellValue; - } - - /** - * Returns the cell Boolean value from a specific node's Value. - * - * @param string $nodeValue - * @return bool The value associated with the cell - */ - protected function formatBooleanCellValue($nodeValue) - { - // !! is similar to boolval() - $cellValue = !!$nodeValue; - return $cellValue; - } - - /** - * Returns a cell's PHP Date value, associated to the given stored nodeValue. - * - * @param string $nodeValue - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return DateTime|null The value associated with the cell (null when the cell has an error) - */ - protected function formatDateCellValue($nodeValue) - { - try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) - $cellValue = new \DateTime($nodeValue); - return $cellValue; - } catch (\Exception $e) { - return null; - } - } - - /** - * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. - * - * @param \DOMNode $node - * @param \Box\Spout\Common\Escaper\XLSX $escaper - * @return string|int|float|bool|null The value associated with the cell (null when the cell has an error) - */ - protected function getCellValue($node, $escaper) - { - // Default cell type is "n" - $cellType = $node->getAttribute('t') ?: 'n'; - $vNodeValue = $this->getVNodeValue($node); - if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) { - return $vNodeValue; - } - - switch ($cellType) - { - case self::CELL_TYPE_INLINE_STRING: - return $this->formatInlineStringCellValue($node, $escaper); - case self::CELL_TYPE_SHARED_STRING: - return $this->formatSharedStringCellValue($vNodeValue, $escaper); - case self::CELL_TYPE_STR: - return $this->formatStrCellValue($vNodeValue, $escaper); - case self::CELL_TYPE_BOOLEAN: - return $this->formatBooleanCellValue($vNodeValue); - case self::CELL_TYPE_NUMERIC: - return $this->formatNumericCellValue($vNodeValue); - case self::CELL_TYPE_DATE: - return $this->formatDateCellValue($vNodeValue); - default: - return null; - } - } - - /** - * Closes the reader. To be used after reading the file. - * - * @return void - */ - protected function closeReader() - { - if ($this->xmlReader) { - $this->xmlReader->close(); - } - - if ($this->zip) { - $this->zip->close(); - } - - $this->sharedStringsHelper->cleanup(); - } -} diff --git a/src/Spout/Reader/Helper/XLSX/CellHelper.php b/src/Spout/Reader/XLSX/Helper/CellHelper.php similarity index 97% rename from src/Spout/Reader/Helper/XLSX/CellHelper.php rename to src/Spout/Reader/XLSX/Helper/CellHelper.php index 7125ded..d2d31e8 100644 --- a/src/Spout/Reader/Helper/XLSX/CellHelper.php +++ b/src/Spout/Reader/XLSX/Helper/CellHelper.php @@ -1,6 +1,6 @@ message}]"); + $readErrorMessage = trim($readError->message); + throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readErrorMessage}]"); } // reset the setting to display XML warnings/errors diff --git a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php similarity index 78% rename from src/Spout/Reader/Helper/XLSX/WorksheetHelper.php rename to src/Spout/Reader/XLSX/Helper/SheetHelper.php index d869dd4..a3431ae 100644 --- a/src/Spout/Reader/Helper/XLSX/WorksheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -1,17 +1,16 @@ filePath = $filePath; + $this->sharedStringsHelper = $sharedStringsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper; } /** - * Returns the file paths of the worksheet data XML files within the XLSX file. - * The paths are read from the [Content_Types].xml file. + * Returns the sheets metadata of the file located at the previously given file path. + * The paths to the sheets' data are read from the [Content_Types].xml file. * - * @return Worksheet[] Worksheets within the XLSX file + * @return Sheet[] Sheets within the XLSX file */ - public function getWorksheets() + public function getSheets() { - $worksheets = []; + $sheets = []; $contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace( self::CONTENT_TYPES_XML_FILE_PATH, self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML ); - // find all nodes defining a worksheet + // find all nodes defining a sheet $sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]'); + $numSheetNodes = count($sheetNodes); - for ($i = 0; $i < count($sheetNodes); $i++) { + for ($i = 0; $i < $numSheetNodes; $i++) { $sheetNode = $sheetNodes[$i]; $sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName; - $sheet = $this->getSheet($sheetDataXMLFilePath, $i); - $worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath); + $sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath, $i); } - return $worksheets; + return $sheets; } /** @@ -92,9 +96,9 @@ class WorksheetHelper * * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based) - * @return \Box\Spout\Reader\Sheet Sheet instance + * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance */ - protected function getSheet($sheetDataXMLFilePath, $sheetIndexZeroBased) + protected function getSheetFromXML($sheetDataXMLFilePath, $sheetIndexZeroBased) { $sheetId = $sheetIndexZeroBased + 1; $sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath); @@ -126,14 +130,14 @@ class WorksheetHelper } } - return new Sheet($sheetId, $sheetIndexZeroBased, $sheetName); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetId, $sheetIndexZeroBased, $sheetName); } /** * Returns the default name of the sheet whose data is located * at the given path. * - * @param $sheetDataXMLFilePath + * @param string $sheetDataXMLFilePath Path of the sheet data XML file * @return string The default sheet name */ protected function getDefaultSheetName($sheetDataXMLFilePath) @@ -193,17 +197,4 @@ class WorksheetHelper return $xmlElement; } - - /** - * Returns whether another worksheet exists after the current worksheet. - * The order is determined by the order of appearance in the [Content_Types].xml file. - * - * @param Worksheet|null $currentWorksheet The worksheet being currently read or null if reading has not started yet - * @param Worksheet[] $allWorksheets A list of all worksheets in the XLSX file. Must contain at least one worksheet - * @return bool Whether another worksheet exists after the current sheet - */ - public function hasNextWorksheet($currentWorksheet, $allWorksheets) - { - return ($currentWorksheet === null || ($currentWorksheet->getWorksheetIndex() + 1 < count($allWorksheets))); - } } diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php new file mode 100644 index 0000000..f24d185 --- /dev/null +++ b/src/Spout/Reader/XLSX/Reader.php @@ -0,0 +1,93 @@ +tempFolder = $tempFolder; + return $this; + } + + /** + * Opens the file at the given file path to make it ready to be read. + * It also parses the sharedStrings.xml file to get all the shared strings available in memory + * and fetches all the available sheets. + * + * @param string $filePath Path of the file to be read + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read + * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file + */ + protected function openReader($filePath) + { + $this->zip = new \ZipArchive(); + + if ($this->zip->open($filePath) === true) { + $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); + + if ($this->sharedStringsHelper->hasSharedStrings()) { + // Extracts all the strings from the sheets for easy access in the future + $this->sharedStringsHelper->extractSharedStrings(); + } + + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper); + } else { + throw new IOException('Could not open ' . $filePath . ' for reading.'); + } + } + + /** + * Returns an iterator to iterate over sheets. + * + * @return SheetIterator To iterate over sheets + */ + public function getConcreteSheetIterator() + { + return $this->sheetIterator; + } + + /** + * Closes the reader. To be used after reading the file. + * + * @return void + */ + protected function closeReader() + { + if ($this->zip) { + $this->zip->close(); + } + + if ($this->sharedStringsHelper) { + $this->sharedStringsHelper->cleanup(); + } + } +} diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php new file mode 100644 index 0000000..6fc1dde --- /dev/null +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -0,0 +1,369 @@ +filePath = $filePath; + $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); + $this->sharedStringsHelper = $sharedStringsHelper; + + $this->xmlReader = new \XMLReader(); + $this->escaper = new \Box\Spout\Common\Escaper\XLSX(); + } + + /** + * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @return string Path of the XML file containing the sheet data, + * without the leading slash. + */ + protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) + { + return ltrim($sheetDataXMLFilePath, '/'); + } + + /** + * Rewind the Iterator to the first element. + * Initializes the XMLReader object that reads the associated sheet data. + * The XMLReader is configured to be safe from billion laughs attack. + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read + */ + public function rewind() + { + $this->xmlReader->close(); + + $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath; + if ($this->xmlReader->open($sheetDataFilePath, null, LIBXML_NONET) === false) { + throw new IOException('Could not open "' . $this->sheetDataXMLFilePath . '".'); + } + + $this->numReadRows = 0; + $this->rowDataBuffer = null; + $this->hasReachedEndOfFile = false; + $this->numColumns = 0; + + $this->next(); + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return (!$this->hasReachedEndOfFile); + } + + /** + * Move forward to next element. Empty rows will be skipped. + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + public function next() + { + $isInsideRowTag = false; + $rowData = []; + + // Use internal errors to avoid displaying lots of warning messages in case of invalid file + // For instance on HHVM, XMLReader->open() won't fail when trying to read a unexisting file within a zip... + // But the XMLReader->read() will fail! + libxml_clear_errors(); + libxml_use_internal_errors(true); + + while ($this->xmlReader->read()) { + if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_DIMENSION) { + // Read dimensions of the sheet + $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) + if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { + $lastCellIndex = $matches[1]; + $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; + } + + } else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + // Start of the row description + $isInsideRowTag = true; + + // Read spans info if present + $numberOfColumnsForRow = $this->numColumns; + $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance + if ($spans) { + list(, $numberOfColumnsForRow) = explode(':', $spans); + $numberOfColumnsForRow = intval($numberOfColumnsForRow); + } + $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + + } else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === self::XML_NODE_CELL) { + // Start of a cell description + $currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); + $currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex); + + $node = $this->xmlReader->expand(); + $rowData[$currentColumnIndex] = $this->getCellValue($node); + + } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_ROW) { + // End of the row description + // If needed, we fill the empty cells + $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); + $this->numReadRows++; + break; + + } else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === self::XML_NODE_WORKSHEET) { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + } + } + + $readError = libxml_get_last_error(); + if ($readError !== false) { + $readErrorMessage = trim($readError->message); + throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$readErrorMessage}]"); + } + + $this->rowDataBuffer = $rowData; + } + + /** + * Returns the cell's string value from a node's nested value node + * + * @param \DOMNode $node + * @return string The value associated with the cell + */ + protected function getVNodeValue($node) + { + // for cell types having a "v" tag containing the value. + // if not, the returned value should be empty string. + $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); + if ($vNode !== null) { + return $vNode->nodeValue; + } + return ""; + } + + /** + * Returns the cell String value where string is inline. + * + * @param \DOMNode $node + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatInlineStringCellValue($node) + { + // inline strings are formatted this way: + // [INLINE_STRING] + $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); + $escapedCellValue = trim($tNode->nodeValue); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value from shared-strings file using nodeValue index. + * + * @param string $nodeValue + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatSharedStringCellValue($nodeValue) + { + // shared strings are formatted this way: + // [SHARED_STRING_INDEX] + $sharedStringIndex = intval($nodeValue); + $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell String value, where string is stored in value node. + * + * @param string $nodeValue + * @return string The value associated with the cell (null when the cell has an error) + */ + protected function formatStrCellValue($nodeValue) + { + $escapedCellValue = trim($nodeValue); + $cellValue = $this->escaper->unescape($escapedCellValue); + return $cellValue; + } + + /** + * Returns the cell Numeric value from string of nodeValue. + * + * @param string $nodeValue + * @return int|float The value associated with the cell + */ + protected function formatNumericCellValue($nodeValue) + { + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); + return $cellValue; + } + + /** + * Returns the cell Boolean value from a specific node's Value. + * + * @param string $nodeValue + * @return bool The value associated with the cell + */ + protected function formatBooleanCellValue($nodeValue) + { + // !! is similar to boolval() + $cellValue = !!$nodeValue; + return $cellValue; + } + + /** + * Returns a cell's PHP Date value, associated to the given stored nodeValue. + * + * @param string $nodeValue + * @return \DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function formatDateCellValue($nodeValue) + { + // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) + try { + $cellValue = new \DateTime($nodeValue); + return $cellValue; + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. + * + * @param \DOMNode $node + * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) + */ + protected function getCellValue($node) + { + // Default cell type is "n" + $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; + $vNodeValue = $this->getVNodeValue($node); + + if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { + return $vNodeValue; + } + + switch ($cellType) { + case self::CELL_TYPE_INLINE_STRING: + return $this->formatInlineStringCellValue($node); + case self::CELL_TYPE_SHARED_STRING: + return $this->formatSharedStringCellValue($vNodeValue); + case self::CELL_TYPE_STR: + return $this->formatStrCellValue($vNodeValue); + case self::CELL_TYPE_BOOLEAN: + return $this->formatBooleanCellValue($vNodeValue); + case self::CELL_TYPE_NUMERIC: + return $this->formatNumericCellValue($vNodeValue); + case self::CELL_TYPE_DATE: + return $this->formatDateCellValue($vNodeValue); + default: + return null; + } + } + + /** + * Return the current element, from the buffer. + * @link http://php.net/manual/en/iterator.current.php + * + * @return array|null + */ + public function current() + { + return $this->rowDataBuffer; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->numReadRows; + } + + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + $this->xmlReader->close(); + } +} diff --git a/src/Spout/Reader/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php similarity index 53% rename from src/Spout/Reader/Sheet.php rename to src/Spout/Reader/XLSX/Sheet.php index c8603ad..9510ecd 100644 --- a/src/Spout/Reader/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -1,15 +1,20 @@ rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper); $this->id = $sheetId; $this->index = $sheetIndex; $this->name = $sheetName; } + /** + * @return RowIterator + */ + public function getRowIterator() + { + return $this->rowIterator; + } + /** * @return int ID of the sheet */ diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php new file mode 100644 index 0000000..aae58c2 --- /dev/null +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -0,0 +1,112 @@ +sheets = $sheetHelper->getSheets(); + + if (count($this->sheets) === 0) { + throw new NoSheetsFoundException('The file must contain at least one sheet.'); + } + } + + /** + * Rewind the Iterator to the first element + * @link http://php.net/manual/en/iterator.rewind.php + * + * @return void + */ + public function rewind() + { + $this->currentSheetIndex = 0; + } + + /** + * Checks if current position is valid + * @link http://php.net/manual/en/iterator.valid.php + * + * @return boolean + */ + public function valid() + { + return ($this->currentSheetIndex < count($this->sheets)); + } + + /** + * Move forward to next element + * @link http://php.net/manual/en/iterator.next.php + * + * @return void + */ + public function next() + { + if (array_key_exists($this->currentSheetIndex, $this->sheets)) { + $currentSheet = $this->sheets[$this->currentSheetIndex]; + $currentSheet->getRowIterator()->end(); + + $this->currentSheetIndex++; + } + } + + /** + * Return the current element + * @link http://php.net/manual/en/iterator.current.php + * + * @return Sheet + */ + public function current() + { + return $this->sheets[$this->currentSheetIndex]; + } + + /** + * Return the key of the current element + * @link http://php.net/manual/en/iterator.key.php + * + * @return int + */ + public function key() + { + return $this->currentSheetIndex + 1; + } + + /** + * Cleans up what was created to iterate over the object. + * + * @return void + */ + public function end() + { + // make sure we are not leaking memory in case the iteration stopped before the end + foreach ($this->sheets as $sheet) { + $sheet->getRowIterator()->end(); + } + } +} diff --git a/src/Spout/Writer/AbstractWriter.php b/src/Spout/Writer/AbstractWriter.php index 68d452f..e17e16a 100644 --- a/src/Spout/Writer/AbstractWriter.php +++ b/src/Spout/Writer/AbstractWriter.php @@ -69,7 +69,7 @@ abstract class AbstractWriter implements WriterInterface * By using this method, the data will be written to a file. * * @param string $outputFilePath Path of the output file that will contain the data - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Common\Exception\IOException If the writer cannot be opened or if the given path is not writable */ public function openToFile($outputFilePath) @@ -92,7 +92,7 @@ abstract class AbstractWriter implements WriterInterface * @codeCoverageIgnore * * @param string $outputFileName Name of the output file that will contain the data. If a path is passed in, only the file name will be kept - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Common\Exception\IOException If the writer cannot be opened */ public function openToBrowser($outputFileName) @@ -144,7 +144,7 @@ abstract class AbstractWriter implements WriterInterface * If empty, no data is added (i.e. not even as a blank row) * Example: $dataRow = ['data1', 1234, null, '', 'data5', false]; * - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If this function is called before opening the writer * @throws \Box\Spout\Common\Exception\IOException If unable to write data */ @@ -173,7 +173,7 @@ abstract class AbstractWriter implements WriterInterface * ['data21', 'data22', null, false], * ]; * - * @return \Box\Spout\Writer\AbstractWriter + * @return AbstractWriter * @throws \Box\Spout\Common\Exception\InvalidArgumentException If the input param is not valid * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If this function is called before opening the writer * @throws \Box\Spout\Common\Exception\IOException If unable to write data diff --git a/src/Spout/Writer/CSV.php b/src/Spout/Writer/CSV/Writer.php similarity index 93% rename from src/Spout/Writer/CSV.php rename to src/Spout/Writer/CSV/Writer.php index 59e37a7..d37ad5b 100644 --- a/src/Spout/Writer/CSV.php +++ b/src/Spout/Writer/CSV/Writer.php @@ -1,16 +1,17 @@ EOD; - /** @var \Box\Spout\Writer\Sheet The "external" sheet */ + /** @var \Box\Spout\Writer\XLSX\Sheet The "external" sheet */ protected $externalSheet; /** @var string Path to the XML file that will contain the sheet data */ protected $worksheetFilePath; - /** @var \Box\Spout\Writer\Helper\XLSX\SharedStringsHelper Helper to write shared strings */ + /** @var \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper Helper to write shared strings */ protected $sharedStringsHelper; /** @var bool Whether inline or shared strings should be used */ @@ -42,9 +42,9 @@ EOD; protected $lastWrittenRowIndex = 0; /** - * @param \Box\Spout\Writer\Sheet $externalSheet The associated "external" sheet + * @param \Box\Spout\Writer\XLSX\Sheet $externalSheet The associated "external" sheet * @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored - * @param \Box\Spout\Writer\Helper\XLSX\SharedStringsHelper $sharedStringsHelper Helper for shared strings + * @param \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper $sharedStringsHelper Helper for shared strings * @param bool $shouldUseInlineStrings Whether inline or shared strings should be used * @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing */ @@ -76,7 +76,7 @@ EOD; } /** - * @return \Box\Spout\Writer\Sheet The "external" sheet + * @return \Box\Spout\Writer\XLSX\Sheet The "external" sheet */ public function getExternalSheet() { diff --git a/src/Spout/Writer/Sheet.php b/src/Spout/Writer/XLSX/Sheet.php similarity index 88% rename from src/Spout/Writer/Sheet.php rename to src/Spout/Writer/XLSX/Sheet.php index 7f8a874..858adcd 100644 --- a/src/Spout/Writer/Sheet.php +++ b/src/Spout/Writer/XLSX/Sheet.php @@ -1,12 +1,12 @@ index = $sheetIndex; $this->name = self::DEFAULT_SHEET_NAME_PREFIX . ($sheetIndex + 1); @@ -45,7 +45,7 @@ class Sheet /** * @param string $name Name of the sheet - * @return \Box\Spout\Writer\Sheet + * @return Sheet */ public function setName($name) { diff --git a/src/Spout/Writer/XLSX.php b/src/Spout/Writer/XLSX/Writer.php similarity index 93% rename from src/Spout/Writer/XLSX.php rename to src/Spout/Writer/XLSX/Writer.php index 62e98f1..40f9ae4 100644 --- a/src/Spout/Writer/XLSX.php +++ b/src/Spout/Writer/XLSX/Writer.php @@ -1,17 +1,18 @@ book->getWorksheets(); - /** @var Internal\XLSX\Worksheet $worksheet */ + /** @var Internal\Worksheet $worksheet */ foreach ($worksheets as $worksheet) { $externalSheets[] = $worksheet->getExternalSheet(); } diff --git a/tests/Spout/Reader/CSVTest.php b/tests/Spout/Reader/CSV/ReaderTest.php similarity index 70% rename from tests/Spout/Reader/CSVTest.php rename to tests/Spout/Reader/CSV/ReaderTest.php index 8d02849..932633f 100644 --- a/tests/Spout/Reader/CSVTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -1,16 +1,17 @@ open('/path/to/fake/file.csv'); } + /** + * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException + * + * @return void + */ + public function testOpenShouldThrowExceptionIfTryingToReadBeforeOpeningReader() + { + ReaderFactory::create(Type::CSV)->getSheetIterator(); + } + /** * @expectedException \Box\Spout\Common\Exception\IOException * @@ -44,33 +55,22 @@ class CSVTest extends \PHPUnit_Framework_TestCase } /** - * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException + * @expectedException \Box\Spout\Common\Exception\IOException * * @return void */ - public function testReadShouldThrowExceptionIfReadBeforeReaderOpened() + public function testOpenShouldThrowExceptionIfCannotOpenFile() { - $reader = ReaderFactory::create(Type::CSV); - $reader->hasNextRow(); - } + $helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper') + ->setMethods(['fopen']) + ->getMock(); + $helperStub->method('fopen')->willReturn(false); - /** - * @expectedException \Box\Spout\Reader\Exception\EndOfFileReachedException - * - * @return void - */ - public function testReadShouldThrowExceptionIfNextRowCalledAfterReadingDone() - { $resourcePath = $this->getResourcePath('csv_standard.csv'); $reader = ReaderFactory::create(Type::CSV); + $reader->setGlobalFunctionsHelper($helperStub); $reader->open($resourcePath); - - while ($reader->hasNextRow()) { - $reader->nextRow(); - } - - $reader->nextRow(); } @@ -180,6 +180,50 @@ class CSVTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadMultipleTimesShouldRewindReader() + { + $allRows = []; + $resourcePath = $this->getResourcePath('csv_standard.csv'); + + $reader = ReaderFactory::create(Type::CSV); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + // do nothing + } + + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + } + + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + } + + $reader->close(); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--11', 'csv--12', 'csv--13'], + ['csv--11', 'csv--12', 'csv--13'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @param string $fileName * @param string|void $fieldDelimiter @@ -197,8 +241,10 @@ class CSVTest extends \PHPUnit_Framework_TestCase $reader->open($resourcePath); - while ($reader->hasNextRow()) { - $allRows[] = $reader->nextRow(); + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; + } } $reader->close(); diff --git a/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php similarity index 94% rename from tests/Spout/Reader/Helper/XLSX/CellHelperTest.php rename to tests/Spout/Reader/XLSX/Helper/CellHelperTest.php index 8851b33..ff417b9 100644 --- a/tests/Spout/Reader/Helper/XLSX/CellHelperTest.php +++ b/tests/Spout/Reader/XLSX/Helper/CellHelperTest.php @@ -1,11 +1,11 @@ getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory') + ->getMockBuilder('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory') ->disableOriginalConstructor() ->setMethods(['getMemoryLimitInKB']) ->getMock(); $factoryStub->method('getMemoryLimitInKB')->willReturn($memoryLimitInKB); - \ReflectionHelper::setStaticValue('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub); + \ReflectionHelper::setStaticValue('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub); $strategy = $factoryStub->getBestCachingStrategy($sharedStringsUniqueCount, null); - $fullExpectedStrategyClassName = 'Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\\' . $expectedStrategyClassName; + $fullExpectedStrategyClassName = 'Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\\' . $expectedStrategyClassName; $this->assertEquals($fullExpectedStrategyClassName, get_class($strategy)); $strategy->clearCache(); @@ -85,7 +85,7 @@ class CachingStrategyFactoryTest extends \PHPUnit_Framework_TestCase { /** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */ $factoryStub = $this - ->getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory') + ->getMockBuilder('\Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory') ->disableOriginalConstructor() ->setMethods(['getMemoryLimitFromIni']) ->getMock(); diff --git a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php similarity index 92% rename from tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php rename to tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php index 82631bc..a72d19a 100644 --- a/tests/Spout/Reader/Helper/XLSX/SharedStringsHelperTest.php +++ b/tests/Spout/Reader/XLSX/Helper/SharedStringsHelperTest.php @@ -1,16 +1,16 @@ getAllRowsForFile($filePath); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException - * - * @return void - */ - public function testHasNextSheetShouldThrowExceptionIfReaderNotOpened() - { - $reader = ReaderFactory::create(Type::XLSX); - $reader->hasNextSheet(); - } - - /** - * @expectedException \Box\Spout\Reader\Exception\EndOfWorksheetsReachedException - * - * @return void - */ - public function testNextSheetShouldThrowExceptionIfNoMoreSheetsToRead() - { - $fileName = 'one_sheet_with_shared_strings.xlsx'; - $resourcePath = $this->getResourcePath($fileName); - - $reader = ReaderFactory::create(Type::XLSX); - $reader->open($resourcePath); - - while ($reader->hasNextSheet()) { - $reader->nextSheet(); - } - - $reader->nextSheet(); + // using @ to prevent warnings/errors from being displayed + @$this->getAllRowsForFile($filePath); } /** @@ -120,6 +93,9 @@ class XLSXTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSupportAllCellTypes() { + // make sure dates are always created with the same timezone + date_default_timezone_set('UTC'); + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx'); $expectedRows = [ @@ -270,7 +246,8 @@ class XLSXTest extends \PHPUnit_Framework_TestCase $startTime = microtime(true); try { - $this->getAllRowsForFile($fileName); + // using @ to prevent warnings/errors from being displayed + @$this->getAllRowsForFile($fileName); $this->fail('An exception should have been thrown'); } catch (IOException $exception) { $duration = microtime(true) - $startTime; @@ -305,6 +282,60 @@ class XLSXTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadMultipleTimesShouldRewindReader() + { + $allRows = []; + $resourcePath = $this->getResourcePath('two_sheets_with_inline_strings.xlsx'); + + $reader = ReaderFactory::create(Type::XLSX); + $reader->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + // do nothing + } + + foreach ($reader->getSheetIterator() as $sheet) { + // this loop should only add the first row of the first sheet + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // this loop should rewind the iterator and restart reading from the 1st row again + // therefore, it should only add the first row of the first sheet + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // not reading any more sheets + break; + } + + foreach ($reader->getSheetIterator() as $sheet) { + // this loop should only add the first row of the current sheet + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + break; + } + + // not breaking, so we keep reading the next sheets + } + + $reader->close(); + + $expectedRows = [ + ['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'], + ['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'], + ['s1 - A1', 's1 - B1', 's1 - C1', 's1 - D1', 's1 - E1'], + ['s2 - A1', 's2 - B1', 's2 - C1', 's2 - D1', 's2 - E1'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @param string $fileName * @return array All the read rows the given file @@ -317,11 +348,9 @@ class XLSXTest extends \PHPUnit_Framework_TestCase $reader = ReaderFactory::create(Type::XLSX); $reader->open($resourcePath); - while ($reader->hasNextSheet()) { - $reader->nextSheet(); - - while ($reader->hasNextRow()) { - $allRows[] = $reader->nextRow(); + foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { + foreach ($sheet->getRowIterator() as $rowIndex => $row) { + $allRows[] = $row; } } diff --git a/tests/Spout/Reader/SheetTest.php b/tests/Spout/Reader/XLSX/SheetTest.php similarity index 85% rename from tests/Spout/Reader/SheetTest.php rename to tests/Spout/Reader/XLSX/SheetTest.php index 5f6e02d..8f3f9df 100644 --- a/tests/Spout/Reader/SheetTest.php +++ b/tests/Spout/Reader/XLSX/SheetTest.php @@ -1,14 +1,15 @@ open($resourcePath); $sheets = []; - while ($reader->hasNextSheet()) { - $sheets[] = $reader->nextSheet(); + foreach ($reader->getSheetIterator() as $sheet) { + $sheets[] = $sheet; } $reader->close(); diff --git a/tests/Spout/Writer/CSVTest.php b/tests/Spout/Writer/CSV/WriterTest.php similarity index 93% rename from tests/Spout/Writer/CSVTest.php rename to tests/Spout/Writer/CSV/WriterTest.php index ef71b87..83e2e03 100644 --- a/tests/Spout/Writer/CSVTest.php +++ b/tests/Spout/Writer/CSV/WriterTest.php @@ -1,16 +1,17 @@ writeToCsvFileAndReturnWrittenContent($allRows, 'csv_with_utf8_bom.csv'); - $this->assertContains(CSV::UTF8_BOM, $writtenContent, 'The CSV file should contain a UTF-8 BOM'); + $this->assertContains(Writer::UTF8_BOM, $writtenContent, 'The CSV file should contain a UTF-8 BOM'); } /** @@ -161,6 +162,6 @@ class CSVTest extends \PHPUnit_Framework_TestCase private function trimWrittenContent($writtenContent) { // remove line feeds and UTF-8 BOM - return trim($writtenContent, PHP_EOL . CSV::UTF8_BOM); + return trim($writtenContent, PHP_EOL . Writer::UTF8_BOM); } } diff --git a/tests/Spout/Writer/Helper/XLSX/CellHelperTest.php b/tests/Spout/Writer/XLSX/Helper/CellHelperTest.php similarity index 97% rename from tests/Spout/Writer/Helper/XLSX/CellHelperTest.php rename to tests/Spout/Writer/XLSX/Helper/CellHelperTest.php index a5045aa..f46b1c6 100644 --- a/tests/Spout/Writer/Helper/XLSX/CellHelperTest.php +++ b/tests/Spout/Writer/XLSX/Helper/CellHelperTest.php @@ -1,11 +1,11 @@ createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->openToFile($resourcePath); diff --git a/tests/Spout/Writer/XLSXTest.php b/tests/Spout/Writer/XLSX/WriterTest.php similarity index 96% rename from tests/Spout/Writer/XLSXTest.php rename to tests/Spout/Writer/XLSX/WriterTest.php index 0abc252..7c6f5ea 100644 --- a/tests/Spout/Writer/XLSXTest.php +++ b/tests/Spout/Writer/XLSX/WriterTest.php @@ -1,16 +1,17 @@ createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->setShouldUseInlineStrings(true); @@ -278,7 +279,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase ]; // set the maxRowsPerSheet limit to 2 - \ReflectionHelper::setStaticValue('\Box\Spout\Writer\Internal\XLSX\Workbook', 'maxRowsPerWorksheet', 2); + \ReflectionHelper::setStaticValue('\Box\Spout\Writer\XLSX\Internal\Workbook', 'maxRowsPerWorksheet', 2); $writer = $this->writeToXLSXFile($dataRows, $fileName, true, $shouldCreateSheetsAutomatically = true); $this->assertEquals(2, count($writer->getSheets()), '2 sheets should have been created.'); @@ -302,7 +303,7 @@ class XLSXTest extends \PHPUnit_Framework_TestCase ]; // set the maxRowsPerSheet limit to 2 - \ReflectionHelper::setStaticValue('\Box\Spout\Writer\Internal\XLSX\Workbook', 'maxRowsPerWorksheet', 2); + \ReflectionHelper::setStaticValue('\Box\Spout\Writer\XLSX\Internal\Workbook', 'maxRowsPerWorksheet', 2); $writer = $this->writeToXLSXFile($dataRows, $fileName, true, $shouldCreateSheetsAutomatically = false); $this->assertEquals(1, count($writer->getSheets()), 'Only 1 sheet should have been created.'); @@ -348,14 +349,14 @@ class XLSXTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param bool $shouldUseInlineStrings * @param bool $shouldCreateSheetsAutomatically - * @return XLSX + * @return Writer */ private function writeToXLSXFile($allRows, $fileName, $shouldUseInlineStrings = true, $shouldCreateSheetsAutomatically = true) { $this->createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->setShouldUseInlineStrings($shouldUseInlineStrings); $writer->setShouldCreateNewSheetsAutomatically($shouldCreateSheetsAutomatically); @@ -373,14 +374,14 @@ class XLSXTest extends \PHPUnit_Framework_TestCase * @param string $fileName * @param bool $shouldUseInlineStrings * @param bool $shouldCreateSheetsAutomatically - * @return XLSX + * @return Writer */ private function writeToMultipleSheetsInXLSXFile($allRows, $numSheets, $fileName, $shouldUseInlineStrings = true, $shouldCreateSheetsAutomatically = true) { $this->createGeneratedFolderIfNeeded($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName); - /** @var \Box\Spout\Writer\XLSX $writer */ + /** @var \Box\Spout\Writer\XLSX\Writer $writer */ $writer = WriterFactory::create(Type::XLSX); $writer->setShouldUseInlineStrings($shouldUseInlineStrings); $writer->setShouldCreateNewSheetsAutomatically($shouldCreateSheetsAutomatically); diff --git a/tests/resources/xlsx/file_with_sheet_xml_not_matching_content_types.xlsx b/tests/resources/xlsx/file_with_sheet_xml_not_matching_content_types.xlsx new file mode 100644 index 0000000..f33c9d7 Binary files /dev/null and b/tests/resources/xlsx/file_with_sheet_xml_not_matching_content_types.xlsx differ