Remove old reader files
This commit is contained in:
parent
ae3ee357ff
commit
c52dd7bde8
@ -4,7 +4,6 @@ namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\ReaderNotOpenedException;
|
||||
use Box\Spout\Reader\Exception\EndOfFileReachedException;
|
||||
|
||||
/**
|
||||
* Class AbstractReader
|
||||
@ -14,18 +13,9 @@ use Box\Spout\Reader\Exception\EndOfFileReachedException;
|
||||
*/
|
||||
abstract class AbstractReader implements ReaderInterface
|
||||
{
|
||||
/** @var int Used to keep track of the row index */
|
||||
protected $currentRowIndex = 0;
|
||||
|
||||
/** @var bool Indicates whether the stream is currently open */
|
||||
protected $isStreamOpened = false;
|
||||
|
||||
/** @var bool Indicates whether all rows have been read */
|
||||
protected $hasReachedEndOfFile = false;
|
||||
|
||||
/** @var array Buffer used to store the row data, while checking if there are more rows to read */
|
||||
protected $rowDataBuffer = null;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
@ -38,11 +28,11 @@ abstract class AbstractReader implements ReaderInterface
|
||||
abstract protected function openReader($filePath);
|
||||
|
||||
/**
|
||||
* Reads and returns next row if available.
|
||||
* Returns an iterator to iterate over sheets.
|
||||
*
|
||||
* @return array|null Array that contains the data for the read row or null at the end of the file
|
||||
* @return \Iterator To iterate over sheets
|
||||
*/
|
||||
abstract protected function read();
|
||||
abstract public function getConcreteSheetIterator();
|
||||
|
||||
/**
|
||||
* Closes the reader. To be used after reading the file.
|
||||
@ -80,9 +70,6 @@ abstract class AbstractReader implements ReaderInterface
|
||||
}
|
||||
}
|
||||
|
||||
$this->currentRowIndex = 0;
|
||||
$this->hasReachedEndOfFile = false;
|
||||
|
||||
try {
|
||||
$this->openReader($filePath);
|
||||
$this->isStreamOpened = true;
|
||||
@ -103,82 +90,18 @@ abstract class AbstractReader implements ReaderInterface
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether all rows have been read (i.e. if we are at the end of the file).
|
||||
* To know if the end of file has been reached, it uses a buffer. If the buffer is
|
||||
* empty (meaning, nothing has been read or previous read line has been consumed), then
|
||||
* it reads the next line, store it in the buffer for the next time or flip a variable if
|
||||
* the end of file has been reached.
|
||||
* Returns an iterator to iterate over sheets.
|
||||
*
|
||||
* @return bool Whether all rows have been read (i.e. if we are at the end of the file)
|
||||
* @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If the stream was not opened first
|
||||
* @return \Iterator To iterate over sheets
|
||||
* @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader
|
||||
*/
|
||||
public function hasNextRow()
|
||||
public function getSheetIterator()
|
||||
{
|
||||
if (!$this->isStreamOpened) {
|
||||
throw new ReaderNotOpenedException('Stream should be opened first.');
|
||||
throw new ReaderNotOpenedException('Reader should be opened first.');
|
||||
}
|
||||
|
||||
if ($this->hasReachedEndOfFile) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// if the buffer contains unprocessed row
|
||||
if (!$this->isRowDataBufferEmpty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// otherwise, try to read the next line line, and store it in the buffer
|
||||
$this->rowDataBuffer = $this->read();
|
||||
|
||||
// if the buffer is still empty after reading a row, it means end of file was reached
|
||||
$this->hasReachedEndOfFile = $this->isRowDataBufferEmpty();
|
||||
|
||||
return (!$this->hasReachedEndOfFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by
|
||||
* actually reading the file.
|
||||
*
|
||||
* @return array Array that contains the data for the read row
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first
|
||||
* @throws \Box\Spout\Reader\Exception\EndOfFileReachedException
|
||||
*/
|
||||
public function nextRow()
|
||||
{
|
||||
if (!$this->hasNextRow()) {
|
||||
throw new EndOfFileReachedException('End of file was reached. Cannot read more rows.');
|
||||
}
|
||||
|
||||
// Get data from buffer (if the buffer was empty, it was filled by the call to hasNextRow())
|
||||
$rowData = $this->rowDataBuffer;
|
||||
|
||||
// empty buffer to mark the row as consumed
|
||||
$this->emptyRowDataBuffer();
|
||||
|
||||
$this->currentRowIndex++;
|
||||
|
||||
return $rowData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the buffer where the row data is stored is empty
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected function isRowDataBufferEmpty()
|
||||
{
|
||||
return ($this->rowDataBuffer === null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Empty the buffer that stores row data
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function emptyRowDataBuffer()
|
||||
{
|
||||
$this->rowDataBuffer = null;
|
||||
return $this->getConcreteSheetIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -190,6 +113,12 @@ abstract class AbstractReader implements ReaderInterface
|
||||
{
|
||||
if ($this->isStreamOpened) {
|
||||
$this->closeReader();
|
||||
|
||||
$sheetIterator = $this->getConcreteSheetIterator();
|
||||
if ($sheetIterator) {
|
||||
$sheetIterator->end();
|
||||
}
|
||||
|
||||
$this->isStreamOpened = false;
|
||||
}
|
||||
}
|
||||
|
@ -1,111 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\ReaderNotOpenedException;
|
||||
use Box\Spout\Reader\Exception\EndOfFileReachedException;
|
||||
|
||||
/**
|
||||
* Class AbstractReader2
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
* @abstract
|
||||
*/
|
||||
abstract class AbstractReader2 implements ReaderInterface2
|
||||
{
|
||||
/** @var bool Indicates whether the stream is currently open */
|
||||
protected $isStreamOpened = false;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/**
|
||||
* Opens the file at the given file path to make it ready to be read
|
||||
*
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @return void
|
||||
*/
|
||||
abstract protected function openReader($filePath);
|
||||
|
||||
/**
|
||||
* Returns an iterator to iterate over sheets.
|
||||
*
|
||||
* @return \Iterator To iterate over sheets
|
||||
*/
|
||||
abstract public function getSheetIterator();
|
||||
|
||||
/**
|
||||
* Closes the reader. To be used after reading the file.
|
||||
*
|
||||
* @return AbstractReader
|
||||
*/
|
||||
abstract protected function closeReader();
|
||||
|
||||
/**
|
||||
* @param $globalFunctionsHelper
|
||||
* @return AbstractReader
|
||||
*/
|
||||
public function setGlobalFunctionsHelper($globalFunctionsHelper)
|
||||
{
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the reader to read the given file. It also makes sure
|
||||
* that the file exists and is readable.
|
||||
*
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted
|
||||
*/
|
||||
public function open($filePath)
|
||||
{
|
||||
if (!$this->isPhpStream($filePath)) {
|
||||
// we skip the checks if the provided file path points to a PHP stream
|
||||
if (!$this->globalFunctionsHelper->file_exists($filePath)) {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading! File does not exist.');
|
||||
} else if (!$this->globalFunctionsHelper->is_readable($filePath)) {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading! File is not readable.');
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
$this->openReader($filePath);
|
||||
$this->isStreamOpened = true;
|
||||
} catch (\Exception $exception) {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading! (' . $exception->getMessage() . ')');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a path is a PHP stream (like php://output, php://memory, ...)
|
||||
*
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @return bool Whether the given path maps to a PHP stream
|
||||
*/
|
||||
protected function isPhpStream($filePath)
|
||||
{
|
||||
return (strpos($filePath, 'php://') === 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the reader, preventing any additional reading
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
if ($this->isStreamOpened) {
|
||||
$this->closeReader();
|
||||
|
||||
$sheetIterator = $this->getSheetIterator();
|
||||
if ($sheetIterator) {
|
||||
$sheetIterator->end();
|
||||
}
|
||||
|
||||
$this->isStreamOpened = false;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,130 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
|
||||
/**
|
||||
* Class CSV
|
||||
* This class provides support to read data from a CSV file.
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class CSV extends AbstractReader
|
||||
{
|
||||
const UTF8_BOM = "\xEF\xBB\xBF";
|
||||
|
||||
/** @var resource Pointer to the file to be written */
|
||||
protected $filePointer;
|
||||
|
||||
/** @var string Defines the character used to delimit fields (one character only) */
|
||||
protected $fieldDelimiter = ',';
|
||||
|
||||
/** @var string Defines the character used to enclose fields (one character only) */
|
||||
protected $fieldEnclosure = '"';
|
||||
|
||||
/**
|
||||
* Sets the field delimiter for the CSV
|
||||
*
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
* @return CSV
|
||||
*/
|
||||
public function setFieldDelimiter($fieldDelimiter)
|
||||
{
|
||||
$this->fieldDelimiter = $fieldDelimiter;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the field enclosure for the CSV
|
||||
*
|
||||
* @param string $fieldEnclosure Character that enclose fields
|
||||
* @return CSV
|
||||
*/
|
||||
public function setFieldEnclosure($fieldEnclosure)
|
||||
{
|
||||
$this->fieldEnclosure = $fieldEnclosure;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the file at the given path to make it ready to be read.
|
||||
* The file must be UTF-8 encoded.
|
||||
* @TODO add encoding detection/conversion
|
||||
*
|
||||
* @param string $filePath Path of the CSV file to be read
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException
|
||||
*/
|
||||
protected function openReader($filePath)
|
||||
{
|
||||
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
|
||||
if (!$this->filePointer) {
|
||||
throw new IOException('Could not open file ' . $filePath . ' for reading.');
|
||||
}
|
||||
|
||||
$this->skipUtf8Bom();
|
||||
}
|
||||
|
||||
/**
|
||||
* This skips the UTF-8 BOM if inserted at the beginning of the file
|
||||
* by moving the file pointer after it, so that it is not read.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function skipUtf8Bom()
|
||||
{
|
||||
$this->globalFunctionsHelper->rewind($this->filePointer);
|
||||
|
||||
$hasUtf8Bom = ($this->globalFunctionsHelper->fgets($this->filePointer, 4) === self::UTF8_BOM);
|
||||
|
||||
if ($hasUtf8Bom) {
|
||||
// we skip the 2 first bytes (so start from the 3rd byte)
|
||||
$this->globalFunctionsHelper->fseek($this->filePointer, 3);
|
||||
} else {
|
||||
// if no BOM, reset the pointer to read from the beginning
|
||||
$this->globalFunctionsHelper->fseek($this->filePointer, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and returns next row if available.
|
||||
* Empty rows are skipped.
|
||||
*
|
||||
* @return array|null Array that contains the data for the read row or null at the end of the file
|
||||
*/
|
||||
protected function read()
|
||||
{
|
||||
$lineData = null;
|
||||
|
||||
if ($this->filePointer) {
|
||||
do {
|
||||
$lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
} while ($lineData && $this->isEmptyLine($lineData));
|
||||
}
|
||||
|
||||
// When reaching the end of the file, return null instead of false
|
||||
return ($lineData !== false) ? $lineData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $lineData Array containing the cells value for the line
|
||||
* @return bool Whether the given line is empty
|
||||
*/
|
||||
protected function isEmptyLine($lineData)
|
||||
{
|
||||
return (count($lineData) === 1 && $lineData[0] === null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the reader. To be used after reading the file.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function closeReader()
|
||||
{
|
||||
if ($this->filePointer) {
|
||||
$this->globalFunctionsHelper->fclose($this->filePointer);
|
||||
}
|
||||
}
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
|
||||
namespace Box\Spout\Reader\CSV;
|
||||
|
||||
use Box\Spout\Reader\AbstractReader2;
|
||||
use Box\Spout\Reader\AbstractReader;
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
|
||||
/**
|
||||
@ -11,7 +11,7 @@ use Box\Spout\Common\Exception\IOException;
|
||||
*
|
||||
* @package Box\Spout\Reader\CSV
|
||||
*/
|
||||
class Reader extends AbstractReader2
|
||||
class Reader extends AbstractReader
|
||||
{
|
||||
/** @var resource Pointer to the file to be written */
|
||||
protected $filePointer;
|
||||
@ -75,7 +75,7 @@ class Reader extends AbstractReader2
|
||||
*
|
||||
* @return SheetIterator To iterate over sheets
|
||||
*/
|
||||
public function getSheetIterator()
|
||||
public function getConcreteSheetIterator()
|
||||
{
|
||||
return $this->sheetIterator;
|
||||
}
|
||||
|
@ -1,12 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Exception;
|
||||
|
||||
/**
|
||||
* Class EndOfFileReachedException
|
||||
*
|
||||
* @package Box\Spout\Reader\Exception
|
||||
*/
|
||||
class EndOfFileReachedException extends ReaderException
|
||||
{
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Exception;
|
||||
|
||||
/**
|
||||
* Class EndOfWorksheetsReachedException
|
||||
*
|
||||
* @package Box\Spout\Reader\Exception
|
||||
*/
|
||||
class EndOfWorksheetsReachedException extends ReaderException
|
||||
{
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Exception;
|
||||
|
||||
/**
|
||||
* Class NoWorksheetsFoundException
|
||||
*
|
||||
* @package Box\Spout\Reader\Exception
|
||||
*/
|
||||
class NoWorksheetsFoundException extends ReaderException
|
||||
{
|
||||
}
|
@ -1,97 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\InvalidArgumentException;
|
||||
|
||||
/**
|
||||
* Class CellHelper
|
||||
* This class provides helper functions when working with cells
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX
|
||||
*/
|
||||
class CellHelper
|
||||
{
|
||||
/**
|
||||
* Fills the missing indexes of an array with a given value.
|
||||
* For instance, $dataArray = []; $a[1] = 1; $a[3] = 3;
|
||||
* Calling fillMissingArrayIndexes($dataArray, 'FILL') will return this array: ['FILL', 1, 'FILL', 3]
|
||||
*
|
||||
* @param array $dataArray The array to fill
|
||||
* @param string|void $fillValue optional
|
||||
* @return array
|
||||
*/
|
||||
public static function fillMissingArrayIndexes($dataArray, $fillValue = '')
|
||||
{
|
||||
$existingIndexes = array_keys($dataArray);
|
||||
|
||||
$newIndexes = array_fill_keys(range(0, max($existingIndexes)), $fillValue);
|
||||
$dataArray += $newIndexes;
|
||||
|
||||
ksort($dataArray);
|
||||
|
||||
return $dataArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the base 10 column index associated to the cell index (base 26).
|
||||
* Excel uses A to Z letters for column indexing, where A is the 1st column,
|
||||
* Z is the 26th and AA is the 27th.
|
||||
* The mapping is zero based, so that A1 maps to 0, B2 maps to 1, Z13 to 25 and AA4 to 26.
|
||||
*
|
||||
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
|
||||
* @return int
|
||||
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
|
||||
*/
|
||||
public static function getColumnIndexFromCellIndex($cellIndex)
|
||||
{
|
||||
if (!self::isValidCellIndex($cellIndex)) {
|
||||
throw new InvalidArgumentException('Cannot get column index from an invalid cell index.');
|
||||
}
|
||||
|
||||
$columnIndex = 0;
|
||||
$capitalAAsciiValue = ord('A');
|
||||
$capitalZAsciiValue = ord('Z');
|
||||
$step = $capitalZAsciiValue - $capitalAAsciiValue + 1;
|
||||
|
||||
// Remove row information
|
||||
$column = preg_replace('/\d/', '', $cellIndex);
|
||||
$columnLength = strlen($column);
|
||||
|
||||
/*
|
||||
* This is how the following loop will process the data:
|
||||
* A => 0
|
||||
* Z => 25
|
||||
* AA => 26 : (26^(2-1) * (0+1)) + 0
|
||||
* AB => 27 : (26^(2-1) * (0+1)) + 1
|
||||
* BC => 54 : (26^(2-1) * (1+1)) + 2
|
||||
* BCZ => 1455 : (26^(3-1) * (1+1)) + (26^(2-1) * (2+1)) + 25
|
||||
*/
|
||||
foreach (str_split($column) as $single_cell_index)
|
||||
{
|
||||
$currentColumnIndex = ord($single_cell_index) - $capitalAAsciiValue;
|
||||
|
||||
if ($columnLength == 1) {
|
||||
$columnIndex += $currentColumnIndex;
|
||||
} else {
|
||||
$columnIndex += pow($step, ($columnLength - 1)) * ($currentColumnIndex + 1);
|
||||
}
|
||||
|
||||
$columnLength--;
|
||||
}
|
||||
|
||||
return $columnIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a cell index is valid, in an Excel world.
|
||||
* To be valid, the cell index should start with capital letters and be followed by numbers.
|
||||
*
|
||||
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
|
||||
* @return bool
|
||||
*/
|
||||
protected static function isValidCellIndex($cellIndex)
|
||||
{
|
||||
return (preg_match('/^[A-Z]+\d+$/', $cellIndex) === 1);
|
||||
}
|
||||
}
|
@ -1,154 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX\SharedStringsCaching;
|
||||
|
||||
/**
|
||||
* Class CachingStrategyFactory
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX\SharedStringsCaching
|
||||
*/
|
||||
class CachingStrategyFactory
|
||||
{
|
||||
/**
|
||||
* The memory amount needed to store a string was obtained empirically from this data:
|
||||
*
|
||||
* ------------------------------------
|
||||
* | Number of chars⁺ | Memory needed |
|
||||
* ------------------------------------
|
||||
* | 3,000 | 1 MB |
|
||||
* | 15,000 | 2 MB |
|
||||
* | 30,000 | 5 MB |
|
||||
* | 75,000 | 11 MB |
|
||||
* | 150,000 | 21 MB |
|
||||
* | 300,000 | 43 MB |
|
||||
* | 750,000 | 105 MB |
|
||||
* | 1,500,000 | 210 MB |
|
||||
* | 2,250,000 | 315 MB |
|
||||
* | 3,000,000 | 420 MB |
|
||||
* | 4,500,000 | 630 MB |
|
||||
* ------------------------------------
|
||||
*
|
||||
* ⁺ All characters were 1 byte long
|
||||
*
|
||||
* This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
|
||||
* Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
|
||||
* Also, there is on average about 20 characters per cell (this is entirely empirical data...).
|
||||
*
|
||||
* This means that in order to store one shared string in memory, the memory amount needed is:
|
||||
* => 20 * 600 ≈ 12KB
|
||||
*/
|
||||
const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
|
||||
|
||||
/**
|
||||
* To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
|
||||
* instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
|
||||
* and the string will be quickly retrieved.
|
||||
* The performance bottleneck is not when creating these temporary files, but rather when loading their content.
|
||||
* Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
|
||||
* best when the indexes of the shared strings are sorted in the sheet data.
|
||||
* 10,000 was chosen because it creates small files that are fast to be loaded in memory.
|
||||
*/
|
||||
const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
|
||||
|
||||
/** @var CachingStrategyFactory|null Singleton instance */
|
||||
protected static $instance = null;
|
||||
|
||||
/**
|
||||
* Private constructor for singleton
|
||||
*/
|
||||
private function __construct()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the singleton instance of the factory
|
||||
*
|
||||
* @return CachingStrategyFactory
|
||||
*/
|
||||
public static function getInstance()
|
||||
{
|
||||
if (self::$instance === null) {
|
||||
self::$instance = new CachingStrategyFactory();
|
||||
}
|
||||
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best caching strategy, given the number of unique shared strings
|
||||
* and the amount of memory available.
|
||||
*
|
||||
* @param int $sharedStringsUniqueCount Number of unique shared strings
|
||||
* @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
|
||||
* @return CachingStrategyInterface The best caching strategy
|
||||
*/
|
||||
public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null)
|
||||
{
|
||||
if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
|
||||
return new InMemoryStrategy($sharedStringsUniqueCount);
|
||||
} else {
|
||||
return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether it is safe to use in-memory caching, given the number of unique shared strings
|
||||
* and the amount of memory available.
|
||||
*
|
||||
* @param int $sharedStringsUniqueCount Number of unique shared strings
|
||||
* @return bool
|
||||
*/
|
||||
protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
|
||||
{
|
||||
$memoryAvailable = $this->getMemoryLimitInKB();
|
||||
|
||||
if ($memoryAvailable === -1) {
|
||||
// if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
|
||||
return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
|
||||
} else {
|
||||
$memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
|
||||
return ($memoryAvailable > $memoryNeeded);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the PHP "memory_limit" in Kilobytes
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
protected function getMemoryLimitInKB()
|
||||
{
|
||||
$memoryLimitFormatted = $this->getMemoryLimitFromIni();
|
||||
$memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
|
||||
|
||||
// No memory limit
|
||||
if ($memoryLimitFormatted === '-1') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
|
||||
$amount = intval($matches[1]);
|
||||
$unit = $matches[2];
|
||||
|
||||
switch ($unit) {
|
||||
case 'b': return ($amount / 1024);
|
||||
case 'k': return $amount;
|
||||
case 'm': return ($amount * 1024);
|
||||
case 'g': return ($amount * 1024 * 1024);
|
||||
case 't': return ($amount * 1024 * 1024 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the formatted "memory_limit" value
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function getMemoryLimitFromIni()
|
||||
{
|
||||
return ini_get('memory_limit');
|
||||
}
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX\SharedStringsCaching;
|
||||
|
||||
/**
|
||||
* Interface CachingStrategyInterface
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX\SharedStringsCaching
|
||||
*/
|
||||
interface CachingStrategyInterface
|
||||
{
|
||||
/**
|
||||
* Adds the given string to the cache.
|
||||
*
|
||||
* @param string $sharedString The string to be added to the cache
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return void
|
||||
*/
|
||||
public function addStringForIndex($sharedString, $sharedStringIndex);
|
||||
|
||||
/**
|
||||
* Closes the cache after the last shared string was added.
|
||||
* This prevents any additional string from being added to the cache.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function closeCache();
|
||||
|
||||
/**
|
||||
* Returns the string located at the given index from the cache.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex);
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clearCache();
|
||||
}
|
@ -1,188 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX\SharedStringsCaching;
|
||||
|
||||
use Box\Spout\Common\Helper\FileSystemHelper;
|
||||
use Box\Spout\Common\Helper\GlobalFunctionsHelper;
|
||||
use Box\Spout\Reader\Exception\SharedStringNotFoundException;
|
||||
|
||||
/**
|
||||
* Class FileBasedStrategy
|
||||
*
|
||||
* This class implements the file-based caching strategy for shared strings.
|
||||
* Shared strings are stored in small files (with a max number of strings per file).
|
||||
* This strategy is slower than an in-memory strategy but is used to avoid out of memory crashes.
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX\SharedStringsCaching
|
||||
*/
|
||||
class FileBasedStrategy implements CachingStrategyInterface
|
||||
{
|
||||
/** Value to use to escape the line feed character ("\n") */
|
||||
const ESCAPED_LINE_FEED_CHARACTER = '_x000A_';
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\FileSystemHelper Helper to perform file system operations */
|
||||
protected $fileSystemHelper;
|
||||
|
||||
/**
|
||||
* @var int Maximum number of strings that can be stored in one temp file
|
||||
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
|
||||
*/
|
||||
protected $maxNumStringsPerTempFile;
|
||||
|
||||
/** @var resource Pointer to the last temp file a shared string was written to */
|
||||
protected $tempFilePointer;
|
||||
|
||||
/**
|
||||
* @var string Path of the temporary file whose contents is currently stored in memory
|
||||
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
|
||||
*/
|
||||
protected $inMemoryTempFilePath;
|
||||
|
||||
/**
|
||||
* @var string Contents of the temporary file that was last read
|
||||
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
|
||||
*/
|
||||
protected $inMemoryTempFileContents;
|
||||
|
||||
/**
|
||||
* @param string|null $tempFolder Temporary folder where the temporary files to store shared strings will be stored
|
||||
* @param int $maxNumStringsPerTempFile Maximum number of strings that can be stored in one temp file
|
||||
*/
|
||||
public function __construct($tempFolder, $maxNumStringsPerTempFile)
|
||||
{
|
||||
$rootTempFolder = ($tempFolder) ?: sys_get_temp_dir();
|
||||
$this->fileSystemHelper = new FileSystemHelper($rootTempFolder);
|
||||
$this->tempFolder = $this->fileSystemHelper->createFolder($rootTempFolder, uniqid('sharedstrings'));
|
||||
|
||||
$this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile;
|
||||
|
||||
$this->globalFunctionsHelper = new GlobalFunctionsHelper();
|
||||
$this->tempFilePointer = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given string to the cache.
|
||||
*
|
||||
* @param string $sharedString The string to be added to the cache
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return void
|
||||
*/
|
||||
public function addStringForIndex($sharedString, $sharedStringIndex)
|
||||
{
|
||||
$tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex);
|
||||
|
||||
if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) {
|
||||
if ($this->tempFilePointer) {
|
||||
$this->globalFunctionsHelper->fclose($this->tempFilePointer);
|
||||
}
|
||||
$this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w');
|
||||
}
|
||||
|
||||
// The shared string retrieval logic expects each cell data to be on one line only
|
||||
// Encoding the line feed character allows to preserve this assumption
|
||||
$lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString);
|
||||
|
||||
$this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path for the temp file that should contain the string for the given index
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The temp file path for the given index
|
||||
*/
|
||||
protected function getSharedStringTempFilePath($sharedStringIndex)
|
||||
{
|
||||
$numTempFile = intval($sharedStringIndex / $this->maxNumStringsPerTempFile);
|
||||
return $this->tempFolder . '/sharedstrings' . $numTempFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the cache after the last shared string was added.
|
||||
* This prevents any additional string from being added to the cache.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function closeCache()
|
||||
{
|
||||
// close pointer to the last temp file that was written
|
||||
if ($this->tempFilePointer) {
|
||||
$this->globalFunctionsHelper->fclose($this->tempFilePointer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string located at the given index from the cache.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex)
|
||||
{
|
||||
$tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex);
|
||||
$indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile;
|
||||
|
||||
if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) {
|
||||
throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex");
|
||||
}
|
||||
|
||||
if ($this->inMemoryTempFilePath !== $tempFilePath) {
|
||||
// free memory
|
||||
unset($this->inMemoryTempFileContents);
|
||||
|
||||
$this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath));
|
||||
$this->inMemoryTempFilePath = $tempFilePath;
|
||||
}
|
||||
|
||||
$sharedString = null;
|
||||
if (array_key_exists($indexInFile, $this->inMemoryTempFileContents)) {
|
||||
$escapedSharedString = $this->inMemoryTempFileContents[$indexInFile];
|
||||
$sharedString = $this->unescapeLineFeed($escapedSharedString);
|
||||
}
|
||||
|
||||
if ($sharedString === null) {
|
||||
throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex");
|
||||
}
|
||||
|
||||
return rtrim($sharedString, PHP_EOL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes the line feed characters (\n)
|
||||
*
|
||||
* @param string $unescapedString
|
||||
* @return string
|
||||
*/
|
||||
private function escapeLineFeed($unescapedString)
|
||||
{
|
||||
return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescapes the line feed characters (\n)
|
||||
*
|
||||
* @param string $escapedString
|
||||
* @return string
|
||||
*/
|
||||
private function unescapeLineFeed($escapedString)
|
||||
{
|
||||
return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clearCache()
|
||||
{
|
||||
if ($this->tempFolder) {
|
||||
$this->fileSystemHelper->deleteFolderRecursively($this->tempFolder);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX\SharedStringsCaching;
|
||||
use Box\Spout\Reader\Exception\SharedStringNotFoundException;
|
||||
|
||||
/**
|
||||
* Class InMemoryStrategy
|
||||
*
|
||||
* This class implements the in-memory caching strategy for shared strings.
|
||||
* This strategy is used when the number of unique strings is low, compared to the memory available.
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX\SharedStringsCaching
|
||||
*/
|
||||
class InMemoryStrategy implements CachingStrategyInterface
|
||||
{
|
||||
/** @var \SplFixedArray Array used to cache the shared strings */
|
||||
protected $inMemoryCache;
|
||||
|
||||
/** @var bool Whether the cache has been closed */
|
||||
protected $isCacheClosed;
|
||||
|
||||
/**
|
||||
* @param int $sharedStringsUniqueCount Number of unique shared strings
|
||||
*/
|
||||
public function __construct($sharedStringsUniqueCount)
|
||||
{
|
||||
$this->inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount);
|
||||
$this->isCacheClosed = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given string to the cache.
|
||||
*
|
||||
* @param string $sharedString The string to be added to the cache
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return void
|
||||
*/
|
||||
public function addStringForIndex($sharedString, $sharedStringIndex)
|
||||
{
|
||||
if (!$this->isCacheClosed) {
|
||||
$this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the cache after the last shared string was added.
|
||||
* This prevents any additional string from being added to the cache.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function closeCache()
|
||||
{
|
||||
$this->isCacheClosed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string located at the given index from the cache.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex)
|
||||
{
|
||||
try {
|
||||
return $this->inMemoryCache->offsetGet($sharedStringIndex);
|
||||
} catch (\RuntimeException $e) {
|
||||
throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function clearCache()
|
||||
{
|
||||
unset($this->inMemoryCache);
|
||||
$this->isCacheClosed = false;
|
||||
}
|
||||
}
|
@ -1,280 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory;
|
||||
use Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyInterface;
|
||||
|
||||
/**
|
||||
* Class SharedStringsHelper
|
||||
* This class provides helper functions for reading sharedStrings XML file
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX
|
||||
*/
|
||||
class SharedStringsHelper
|
||||
{
|
||||
/** Path of sharedStrings XML file inside the XLSX file */
|
||||
const SHARED_STRINGS_XML_FILE_PATH = 'xl/sharedStrings.xml';
|
||||
|
||||
/** Main namespace for the sharedStrings.xml file */
|
||||
const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var string Temporary folder where the temporary files to store shared strings will be stored */
|
||||
protected $tempFolder;
|
||||
|
||||
/** @var CachingStrategyInterface The best caching strategy for storing shared strings */
|
||||
protected $cachingStrategy;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
|
||||
*/
|
||||
public function __construct($filePath, $tempFolder = null)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->tempFolder = $tempFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the XLSX file contains a shared strings XML file
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasSharedStrings()
|
||||
{
|
||||
$hasSharedStrings = false;
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($this->filePath) === true) {
|
||||
$hasSharedStrings = ($zip->locateName(self::SHARED_STRINGS_XML_FILE_PATH) !== false);
|
||||
$zip->close();
|
||||
}
|
||||
|
||||
return $hasSharedStrings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an in-memory array containing all the shared strings of the worksheet.
|
||||
* All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'.
|
||||
* It is then accessed by the worksheet data, via the string index in the built table.
|
||||
*
|
||||
* More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
|
||||
*
|
||||
* The XML file can be really big with worksheets containing a lot of data. That is why
|
||||
* we need to use a XML reader that provides streaming like the XMLReader library.
|
||||
* Please note that SimpleXML does not provide such a functionality but since it is faster
|
||||
* and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose.
|
||||
*
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
|
||||
*/
|
||||
public function extractSharedStrings()
|
||||
{
|
||||
$xmlReader = new \XMLReader();
|
||||
$sharedStringIndex = 0;
|
||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
|
||||
$sharedStringsFilePath = $this->getSharedStringsFilePath();
|
||||
if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NONET) === false) {
|
||||
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
|
||||
}
|
||||
|
||||
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
|
||||
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
|
||||
|
||||
while ($xmlReader->read() && $xmlReader->name !== 'si') {
|
||||
// do nothing until a 'si' tag is reached
|
||||
}
|
||||
|
||||
while ($xmlReader->name === 'si') {
|
||||
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
|
||||
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
|
||||
|
||||
// removes nodes that should not be read, like the pronunciation of the Kanji characters
|
||||
$cleanNode = $this->removeSuperfluousTextNodes($node);
|
||||
|
||||
// find all text nodes 't'; there can be multiple if the cell contains formatting
|
||||
$textNodes = $cleanNode->xpath('//ns:t');
|
||||
|
||||
$textValue = '';
|
||||
foreach ($textNodes as $textNode) {
|
||||
if ($this->shouldPreserveWhitespace($textNode)) {
|
||||
$textValue .= $textNode->__toString();
|
||||
} else {
|
||||
$textValue .= trim($textNode->__toString());
|
||||
}
|
||||
}
|
||||
|
||||
$unescapedTextValue = $escaper->unescape($textValue);
|
||||
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
|
||||
|
||||
$sharedStringIndex++;
|
||||
|
||||
// jump to the next 'si' tag
|
||||
$xmlReader->next('si');
|
||||
}
|
||||
|
||||
$this->cachingStrategy->closeCache();
|
||||
|
||||
$xmlReader->close();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string The path to the shared strings XML file
|
||||
*/
|
||||
protected function getSharedStringsFilePath()
|
||||
{
|
||||
return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the shared strings unique count, as specified in <sst> tag.
|
||||
*
|
||||
* @param \XMLReader $xmlReader XMLReader instance
|
||||
* @return int Number of unique shared strings in the sharedStrings.xml file
|
||||
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
|
||||
*/
|
||||
protected function getSharedStringsUniqueCount($xmlReader)
|
||||
{
|
||||
// Use internal errors to avoid displaying lots of warning messages in case of invalid file
|
||||
// For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$xmlReader->next('sst');
|
||||
|
||||
// Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE)
|
||||
while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== \XMLReader::ELEMENT) {
|
||||
$xmlReader->read();
|
||||
}
|
||||
|
||||
$readError = libxml_get_last_error();
|
||||
if ($readError !== false) {
|
||||
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$readError->message}]");
|
||||
}
|
||||
|
||||
// reset the setting to display XML warnings/errors
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
return intval($xmlReader->getAttribute('uniqueCount'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best shared strings caching strategy.
|
||||
*
|
||||
* @param int $sharedStringsUniqueCount
|
||||
* @return CachingStrategyInterface
|
||||
*/
|
||||
protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount)
|
||||
{
|
||||
return CachingStrategyFactory::getInstance()
|
||||
->getBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
|
||||
* This is to simplify the parsing of the subtree.
|
||||
*
|
||||
* @param \XMLReader $xmlReader
|
||||
* @return \SimpleXMLElement
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read
|
||||
*/
|
||||
protected function getSimpleXmlElementNodeFromXMLReader($xmlReader)
|
||||
{
|
||||
// Use internal errors to avoid displaying lots of warning messages in case of error found in the XML node.
|
||||
// For instance, if the file is used to perform a "Billion Laughs" or "Quadratic Blowup" attacks
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$node = null;
|
||||
try {
|
||||
$node = new \SimpleXMLElement($xmlReader->readOuterXml());
|
||||
} catch (\Exception $exception) {
|
||||
$error = libxml_get_last_error();
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
throw new IOException('The sharedStrings.xml file contains unreadable data [' . trim($error->message) . '].');
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(false);
|
||||
|
||||
return $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes nodes that should not be read, like the pronunciation of the Kanji characters.
|
||||
* By keeping them, their text content would be added to the read string.
|
||||
*
|
||||
* @param \SimpleXMLElement $parentNode Parent node that may contain nodes to remove
|
||||
* @return \SimpleXMLElement Cleaned parent node
|
||||
*/
|
||||
protected function removeSuperfluousTextNodes($parentNode)
|
||||
{
|
||||
$tagsToRemove = [
|
||||
'rPh', // Pronunciation of the text
|
||||
];
|
||||
|
||||
foreach ($tagsToRemove as $tagToRemove) {
|
||||
$xpath = '//ns:' . $tagToRemove;
|
||||
$nodesToRemove = $parentNode->xpath($xpath);
|
||||
|
||||
foreach ($nodesToRemove as $nodeToRemove) {
|
||||
// This is how to remove a node from the XML
|
||||
unset($nodeToRemove[0]);
|
||||
}
|
||||
}
|
||||
|
||||
return $parentNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
|
||||
*
|
||||
* @param \SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved
|
||||
* @return bool Whether whitespace should be preserved
|
||||
*/
|
||||
protected function shouldPreserveWhitespace($textNode)
|
||||
{
|
||||
$shouldPreserveWhitespace = false;
|
||||
|
||||
$attributes = $textNode->attributes('xml', true);
|
||||
if ($attributes) {
|
||||
foreach ($attributes as $attributeName => $attributeValue) {
|
||||
if ($attributeName === 'space' && $attributeValue->__toString() === 'preserve') {
|
||||
$shouldPreserveWhitespace = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $shouldPreserveWhitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the shared string at the given index, using the previously chosen caching strategy.
|
||||
*
|
||||
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
|
||||
* @return string The shared string at the given index
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
|
||||
*/
|
||||
public function getStringAtIndex($sharedStringIndex)
|
||||
{
|
||||
return $this->cachingStrategy->getStringAtIndex($sharedStringIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroys the cache, freeing memory and removing any created artifacts
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function cleanup()
|
||||
{
|
||||
if ($this->cachingStrategy) {
|
||||
$this->cachingStrategy->clearCache();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,209 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX;
|
||||
|
||||
use Box\Spout\Reader\Internal\XLSX\Worksheet;
|
||||
use Box\Spout\Reader\Sheet;
|
||||
|
||||
/**
|
||||
* Class WorksheetHelper
|
||||
* This class provides helper functions related to XLSX worksheets
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX
|
||||
*/
|
||||
class WorksheetHelper
|
||||
{
|
||||
/** Extension for XML files */
|
||||
const XML_EXTENSION = '.xml';
|
||||
|
||||
/** Paths of XML files relative to the XLSX file root */
|
||||
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
|
||||
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
|
||||
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
|
||||
|
||||
/** Namespaces for the XML files */
|
||||
const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types';
|
||||
const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships';
|
||||
const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
/** Value of the Override attribute used in [Content_Types].xml to define worksheets */
|
||||
const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml';
|
||||
|
||||
/** @var string Path of the XLSX file being read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var \SimpleXMLElement XML element representing the workbook.xml.rels file */
|
||||
protected $workbookXMLRelsAsXMLElement;
|
||||
|
||||
/** @var \SimpleXMLElement XML element representing the workbook.xml file */
|
||||
protected $workbookXMLAsXMLElement;
|
||||
|
||||
/**
|
||||
* @param string $filePath Path of the XLSX file being read
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($filePath, $globalFunctionsHelper)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the file paths of the worksheet data XML files within the XLSX file.
|
||||
* The paths are read from the [Content_Types].xml file.
|
||||
*
|
||||
* @return Worksheet[] Worksheets within the XLSX file
|
||||
*/
|
||||
public function getWorksheets()
|
||||
{
|
||||
$worksheets = [];
|
||||
|
||||
$contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace(
|
||||
self::CONTENT_TYPES_XML_FILE_PATH,
|
||||
self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML
|
||||
);
|
||||
|
||||
// find all nodes defining a worksheet
|
||||
$sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
|
||||
|
||||
for ($i = 0; $i < count($sheetNodes); $i++) {
|
||||
$sheetNode = $sheetNodes[$i];
|
||||
$sheetDataXMLFilePath = (string) $sheetNode->attributes()->PartName;
|
||||
|
||||
$sheet = $this->getSheet($sheetDataXMLFilePath, $i);
|
||||
$worksheets[] = new Worksheet($sheet, $i, $sheetDataXMLFilePath);
|
||||
}
|
||||
|
||||
return $worksheets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of a sheet, given the path of its data XML file.
|
||||
* We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet.
|
||||
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
|
||||
* The entry contains the ID and name of the sheet.
|
||||
*
|
||||
* If this piece of data can't be found by parsing the different XML files, the ID will default
|
||||
* to the sheet index, based on order in [Content_Types].xml. Similarly, the sheet's name will
|
||||
* default to the data sheet XML file name ("xl/worksheets/sheet2.xml" => "sheet2").
|
||||
*
|
||||
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
|
||||
* @param int $sheetIndexZeroBased Index of the sheet, based on order in [Content_Types].xml (zero-based)
|
||||
* @return \Box\Spout\Reader\Sheet Sheet instance
|
||||
*/
|
||||
protected function getSheet($sheetDataXMLFilePath, $sheetIndexZeroBased)
|
||||
{
|
||||
$sheetId = $sheetIndexZeroBased + 1;
|
||||
$sheetName = $this->getDefaultSheetName($sheetDataXMLFilePath);
|
||||
|
||||
/*
|
||||
* In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
|
||||
* In workbook.xml.rels, it is only "worksheets/sheet1.xml"
|
||||
*/
|
||||
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
|
||||
|
||||
// find the node associated to the given file path
|
||||
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
|
||||
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
|
||||
|
||||
if (count($relationshipNodes) === 1) {
|
||||
$relationshipNode = $relationshipNodes[0];
|
||||
$sheetId = (string) $relationshipNode->attributes()->Id;
|
||||
|
||||
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
|
||||
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $sheetId . '"]');
|
||||
|
||||
if (count($sheetNodes) === 1) {
|
||||
$sheetNode = $sheetNodes[0];
|
||||
$sheetId = (int) $sheetNode->attributes()->sheetId;
|
||||
$escapedSheetName = (string) $sheetNode->attributes()->name;
|
||||
|
||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
$sheetName = $escaper->unescape($escapedSheetName);
|
||||
}
|
||||
}
|
||||
|
||||
return new Sheet($sheetId, $sheetIndexZeroBased, $sheetName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default name of the sheet whose data is located
|
||||
* at the given path.
|
||||
*
|
||||
* @param $sheetDataXMLFilePath
|
||||
* @return string The default sheet name
|
||||
*/
|
||||
protected function getDefaultSheetName($sheetDataXMLFilePath)
|
||||
{
|
||||
return $this->globalFunctionsHelper->basename($sheetDataXMLFilePath, self::XML_EXTENSION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
|
||||
* The returned value is cached.
|
||||
*
|
||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
*/
|
||||
protected function getWorkbookXMLRelsAsXMLElement()
|
||||
{
|
||||
if (!$this->workbookXMLRelsAsXMLElement) {
|
||||
$this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace(
|
||||
self::WORKBOOK_XML_RELS_FILE_PATH,
|
||||
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS
|
||||
);
|
||||
}
|
||||
|
||||
return $this->workbookXMLRelsAsXMLElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a representation of the workbook.xml file, ready to be parsed.
|
||||
* The returned value is cached.
|
||||
*
|
||||
* @return \SimpleXMLElement XML element representating the workbook.xml.rels file
|
||||
*/
|
||||
protected function getWorkbookXMLAsXMLElement()
|
||||
{
|
||||
if (!$this->workbookXMLAsXMLElement) {
|
||||
$this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace(
|
||||
self::WORKBOOK_XML_FILE_PATH,
|
||||
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML
|
||||
);
|
||||
}
|
||||
|
||||
return $this->workbookXMLAsXMLElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the contents of the given file in an XML parser and register the given XPath namespace.
|
||||
*
|
||||
* @param string $xmlFilePath The path of the XML file inside the XLSX file
|
||||
* @param string $mainNamespace The main XPath namespace to register
|
||||
* @return \SimpleXMLElement The XML element representing the file
|
||||
*/
|
||||
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
|
||||
{
|
||||
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
|
||||
|
||||
$xmlElement = new \SimpleXMLElement($xmlContents);
|
||||
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
|
||||
|
||||
return $xmlElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether another worksheet exists after the current worksheet.
|
||||
* The order is determined by the order of appearance in the [Content_Types].xml file.
|
||||
*
|
||||
* @param Worksheet|null $currentWorksheet The worksheet being currently read or null if reading has not started yet
|
||||
* @param Worksheet[] $allWorksheets A list of all worksheets in the XLSX file. Must contain at least one worksheet
|
||||
* @return bool Whether another worksheet exists after the current sheet
|
||||
*/
|
||||
public function hasNextWorksheet($currentWorksheet, $allWorksheets)
|
||||
{
|
||||
return ($currentWorksheet === null || ($currentWorksheet->getWorksheetIndex() + 1 < count($allWorksheets)));
|
||||
}
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Internal\XLSX;
|
||||
|
||||
/**
|
||||
* Class Worksheet
|
||||
* Represents a worksheet within a XLSX file
|
||||
*
|
||||
* @package Box\Spout\Reader\Internal\XLSX
|
||||
*/
|
||||
class Worksheet
|
||||
{
|
||||
/** @var \Box\Spout\Reader\Sheet The "external" sheet */
|
||||
protected $externalSheet;
|
||||
|
||||
/** @var int Worksheet index, based on the order of appareance in [Content_Types].xml (zero-based) */
|
||||
protected $worksheetIndex;
|
||||
|
||||
/** @var string Path of the XML file containing the worksheet data */
|
||||
protected $dataXmlFilePath;
|
||||
|
||||
/**\
|
||||
* @param \Box\Spout\Reader\Sheet $externalSheet The associated "external" sheet
|
||||
* @param int $worksheetIndex Worksheet index, based on the order of appareance in [Content_Types].xml (zero-based)
|
||||
* @param string $dataXmlFilePath Path of the XML file containing the worksheet data
|
||||
*/
|
||||
public function __construct($externalSheet, $worksheetIndex, $dataXmlFilePath)
|
||||
{
|
||||
$this->externalSheet = $externalSheet;
|
||||
$this->worksheetIndex = $worksheetIndex;
|
||||
$this->dataXmlFilePath = $dataXmlFilePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string Path of the XML file containing the worksheet data,
|
||||
* without the leading slash.
|
||||
*/
|
||||
public function getDataXmlFilePath()
|
||||
{
|
||||
return ltrim($this->dataXmlFilePath, '/');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return \Box\Spout\Reader\Sheet The "external" sheet
|
||||
*/
|
||||
public function getExternalSheet()
|
||||
{
|
||||
return $this->externalSheet;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
public function getWorksheetIndex()
|
||||
{
|
||||
return $this->worksheetIndex;
|
||||
}
|
||||
}
|
@ -19,7 +19,7 @@ class ReaderFactory
|
||||
* This creates an instance of the appropriate reader, given the type of the file to be read
|
||||
*
|
||||
* @param string $readerType Type of the reader to instantiate
|
||||
* @return \Box\Spout\Reader\CSV|\Box\Spout\Reader\XLSX
|
||||
* @return \Box\Spout\Reader\CSV\Reader|\Box\Spout\Reader\XLSX\Reader
|
||||
* @throws \Box\Spout\Common\Exception\UnsupportedTypeException
|
||||
*/
|
||||
public static function create($readerType)
|
||||
@ -28,10 +28,10 @@ class ReaderFactory
|
||||
|
||||
switch ($readerType) {
|
||||
case Type::CSV:
|
||||
$reader = new CSV();
|
||||
$reader = new CSV\Reader();
|
||||
break;
|
||||
case Type::XLSX:
|
||||
$reader = new XLSX();
|
||||
$reader = new XLSX\Reader();
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType);
|
||||
|
@ -1,44 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Exception\UnsupportedTypeException;
|
||||
use Box\Spout\Common\Helper\GlobalFunctionsHelper;
|
||||
use Box\Spout\Common\Type;
|
||||
|
||||
/**
|
||||
* Class ReaderFactory2
|
||||
* This factory is used to create readers, based on the type of the file to be read.
|
||||
* It supports CSV and XLSX formats.
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class ReaderFactory2
|
||||
{
|
||||
/**
|
||||
* This creates an instance of the appropriate reader, given the type of the file to be read
|
||||
*
|
||||
* @param string $readerType Type of the reader to instantiate
|
||||
* @return \Box\Spout\Reader\CSV\Reader|\Box\Spout\Reader\XLSX\Reader
|
||||
* @throws \Box\Spout\Common\Exception\UnsupportedTypeException
|
||||
*/
|
||||
public static function create($readerType)
|
||||
{
|
||||
$reader = null;
|
||||
|
||||
switch ($readerType) {
|
||||
case Type::CSV:
|
||||
$reader = new CSV\Reader();
|
||||
break;
|
||||
case Type::XLSX:
|
||||
$reader = new XLSX\Reader();
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType);
|
||||
}
|
||||
|
||||
$reader->setGlobalFunctionsHelper(new GlobalFunctionsHelper());
|
||||
|
||||
return $reader;
|
||||
}
|
||||
}
|
@ -20,26 +20,12 @@ interface ReaderInterface
|
||||
public function open($filePath);
|
||||
|
||||
/**
|
||||
* Returns whether all rows have been read (i.e. if we are at the end of the file).
|
||||
* To know if the end of file has been reached, it uses a buffer. If the buffer is
|
||||
* empty (meaning, nothing has been read or previous read line has been consumed), then
|
||||
* it reads the next line, store it in the buffer for the next time or flip a variable if
|
||||
* the end of file has been reached.
|
||||
* Returns an iterator to iterate over sheets.
|
||||
*
|
||||
* @return bool
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first
|
||||
* @return \Iterator To iterate over sheets
|
||||
* @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader
|
||||
*/
|
||||
public function hasNextRow();
|
||||
|
||||
/**
|
||||
* Returns next row if available. The row is either retrieved from the buffer if it is not empty or fetched by
|
||||
* actually reading the file.
|
||||
*
|
||||
* @return array Array that contains the data for the read row
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the stream was not opened first
|
||||
* @throws \Box\Spout\Reader\Exception\EndOfFileReachedException
|
||||
*/
|
||||
public function nextRow();
|
||||
public function getSheetIterator();
|
||||
|
||||
/**
|
||||
* Closes the reader, preventing any additional reading
|
||||
|
@ -1,35 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
/**
|
||||
* Interface ReaderInterface2
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
interface ReaderInterface2
|
||||
{
|
||||
/**
|
||||
* Prepares the reader to read the given file. It also makes sure
|
||||
* that the file exists and is readable.
|
||||
*
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException
|
||||
*/
|
||||
public function open($filePath);
|
||||
|
||||
/**
|
||||
* Returns an iterator to iterate over sheets.
|
||||
*
|
||||
* @return \Iterator To iterate over sheets
|
||||
*/
|
||||
public function getSheetIterator();
|
||||
|
||||
/**
|
||||
* Closes the reader, preventing any additional reading
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function close();
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
/**
|
||||
* Class Sheet
|
||||
* Represents a worksheet within a XLSX file
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class Sheet
|
||||
{
|
||||
/** @var int ID of the sheet */
|
||||
protected $id;
|
||||
|
||||
/** @var int Index of the sheet, based on order of creation (zero-based) */
|
||||
protected $index;
|
||||
|
||||
/** @var string Name of the sheet */
|
||||
protected $name;
|
||||
|
||||
/**
|
||||
* @param int $sheetId ID of the sheet
|
||||
* @param int $sheetIndex Index of the sheet, based on order of creation (zero-based)
|
||||
* @param string $sheetName Name of the sheet
|
||||
*/
|
||||
function __construct($sheetId, $sheetIndex, $sheetName)
|
||||
{
|
||||
$this->id = $sheetId;
|
||||
$this->index = $sheetIndex;
|
||||
$this->name = $sheetName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int ID of the sheet
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int Index of the sheet, based on order of creation (zero-based)
|
||||
*/
|
||||
public function getIndex()
|
||||
{
|
||||
return $this->index;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string Name of the sheet
|
||||
*/
|
||||
public function getName()
|
||||
{
|
||||
return $this->name;
|
||||
}
|
||||
}
|
@ -1,394 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Exception\BadUsageException;
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\Exception\EndOfWorksheetsReachedException;
|
||||
use Box\Spout\Reader\Exception\NoWorksheetsFoundException;
|
||||
use Box\Spout\Reader\Exception\ReaderNotOpenedException;
|
||||
use Box\Spout\Reader\Helper\XLSX\CellHelper;
|
||||
use Box\Spout\Reader\Helper\XLSX\SharedStringsHelper;
|
||||
use Box\Spout\Reader\Helper\XLSX\WorksheetHelper;
|
||||
|
||||
/**
|
||||
* Class XLSX
|
||||
* This class provides support to read data from a XLSX file
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class XLSX extends AbstractReader
|
||||
{
|
||||
const CELL_TYPE_INLINE_STRING = 'inlineStr';
|
||||
const CELL_TYPE_STR = 'str';
|
||||
const CELL_TYPE_SHARED_STRING = 's';
|
||||
const CELL_TYPE_BOOLEAN = 'b';
|
||||
const CELL_TYPE_NUMERIC = 'n';
|
||||
const CELL_TYPE_DATE = 'd';
|
||||
const CELL_TYPE_ERROR = 'e';
|
||||
|
||||
/** @var string Real path of the file to read */
|
||||
protected $filePath;
|
||||
|
||||
/** @var string Temporary folder where the temporary files will be created */
|
||||
protected $tempFolder;
|
||||
|
||||
/** @var \ZipArchive */
|
||||
protected $zip;
|
||||
|
||||
/** @var Helper\XLSX\SharedStringsHelper Helper to work with shared strings */
|
||||
protected $sharedStringsHelper;
|
||||
|
||||
/** @var Helper\XLSX\WorksheetHelper Helper to work with worksheets */
|
||||
protected $worksheetHelper;
|
||||
|
||||
/** @var Internal\XLSX\Worksheet[] The list of worksheets present in the file */
|
||||
protected $worksheets;
|
||||
|
||||
/** @var Internal\XLSX\Worksheet The worksheet being read */
|
||||
protected $currentWorksheet;
|
||||
|
||||
/** @var \XMLReader The XMLReader object that will help read sheets XML data */
|
||||
protected $xmlReader;
|
||||
|
||||
/** @var int The number of columns the worksheet has (0 meaning undefined) */
|
||||
protected $numberOfColumns = 0;
|
||||
|
||||
/**
|
||||
* @param string $tempFolder Temporary folder where the temporary files will be created
|
||||
* @return XLSX
|
||||
*/
|
||||
public function setTempFolder($tempFolder)
|
||||
{
|
||||
$this->tempFolder = $tempFolder;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the file at the given file path to make it ready to be read.
|
||||
* It also parses the sharedStrings.xml file to get all the shared strings available in memory
|
||||
* and fetches all the available worksheets.
|
||||
*
|
||||
* @param string $filePath Path of the file to be read
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read
|
||||
* @throws Exception\NoWorksheetsFoundException If there are no worksheets in the file
|
||||
*/
|
||||
protected function openReader($filePath)
|
||||
{
|
||||
$this->filePath = $filePath;
|
||||
$this->zip = new \ZipArchive();
|
||||
|
||||
if ($this->zip->open($filePath) === true) {
|
||||
$this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder);
|
||||
|
||||
if ($this->sharedStringsHelper->hasSharedStrings()) {
|
||||
// Extracts all the strings from the worksheets for easy access in the future
|
||||
$this->sharedStringsHelper->extractSharedStrings();
|
||||
}
|
||||
|
||||
// Fetch all available worksheets
|
||||
$this->worksheetHelper = new WorksheetHelper($filePath, $this->globalFunctionsHelper);
|
||||
$this->worksheets = $this->worksheetHelper->getWorksheets($filePath);
|
||||
|
||||
if (count($this->worksheets) === 0) {
|
||||
throw new NoWorksheetsFoundException('The file must contain at least one worksheet.');
|
||||
}
|
||||
} else {
|
||||
throw new IOException('Could not open ' . $filePath . ' for reading.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether another worksheet exists after the current worksheet.
|
||||
*
|
||||
* @return bool Whether another worksheet exists after the current worksheet.
|
||||
* @throws Exception\ReaderNotOpenedException If the stream was not opened first
|
||||
*/
|
||||
public function hasNextSheet()
|
||||
{
|
||||
if (!$this->isStreamOpened) {
|
||||
throw new ReaderNotOpenedException('Stream should be opened first.');
|
||||
}
|
||||
|
||||
return $this->worksheetHelper->hasNextWorksheet($this->currentWorksheet, $this->worksheets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the pointer to the current worksheet.
|
||||
* Moving to another worksheet will stop the reading in the current worksheet.
|
||||
*
|
||||
* @return \Box\Spout\Reader\Sheet The next sheet
|
||||
* @throws Exception\ReaderNotOpenedException If the stream was not opened first
|
||||
* @throws Exception\EndOfWorksheetsReachedException If there is no more worksheets to read
|
||||
*/
|
||||
public function nextSheet()
|
||||
{
|
||||
if (!$this->hasNextSheet()) {
|
||||
throw new EndOfWorksheetsReachedException('End of worksheets was reached. Cannot read more worksheets.');
|
||||
}
|
||||
|
||||
if ($this->currentWorksheet === null) {
|
||||
$nextWorksheet = $this->worksheets[0];
|
||||
} else {
|
||||
$currentWorksheetIndex = $this->currentWorksheet->getWorksheetIndex();
|
||||
$nextWorksheet = $this->worksheets[$currentWorksheetIndex + 1];
|
||||
}
|
||||
|
||||
$this->initXmlReaderForWorksheetData($nextWorksheet);
|
||||
$this->currentWorksheet = $nextWorksheet;
|
||||
|
||||
// make sure that we are ready to read more rows
|
||||
$this->hasReachedEndOfFile = false;
|
||||
$this->emptyRowDataBuffer();
|
||||
|
||||
return $this->currentWorksheet->getExternalSheet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the XMLReader object that reads worksheet data for the given worksheet.
|
||||
* If another worksheet was being read, it closes the reader before reopening it for the new worksheet.
|
||||
* The XMLReader is configured to be safe from billion laughs attack.
|
||||
*
|
||||
* @param Internal\XLSX\Worksheet $worksheet The worksheet to initialize the XMLReader with
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\IOException If the worksheet data XML cannot be read
|
||||
*/
|
||||
protected function initXmlReaderForWorksheetData($worksheet)
|
||||
{
|
||||
// if changing worksheet and the XMLReader was initialized for the current worksheet
|
||||
if ($worksheet != $this->currentWorksheet && $this->xmlReader) {
|
||||
$this->xmlReader->close();
|
||||
} else if (!$this->xmlReader) {
|
||||
$this->xmlReader = new \XMLReader();
|
||||
}
|
||||
|
||||
$worksheetDataXMLFilePath = $worksheet->getDataXmlFilePath();
|
||||
|
||||
$worksheetDataFilePath = 'zip://' . $this->filePath . '#' . $worksheetDataXMLFilePath;
|
||||
if ($this->xmlReader->open($worksheetDataFilePath, null, LIBXML_NONET) === false) {
|
||||
throw new IOException('Could not open "' . $worksheetDataXMLFilePath . '".');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and returns data of the line that comes after the last read line, on the current worksheet.
|
||||
* Empty rows will be skipped.
|
||||
*
|
||||
* @return array|null Array that contains the data for the read line or null at the end of the file
|
||||
* @throws \Box\Spout\Common\Exception\BadUsageException If the pointer to the current worksheet has not been set
|
||||
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
|
||||
*/
|
||||
protected function read()
|
||||
{
|
||||
if (!$this->currentWorksheet) {
|
||||
throw new BadUsageException('You must call nextSheet() before calling hasNextRow() or nextRow()');
|
||||
}
|
||||
|
||||
$escaper = new \Box\Spout\Common\Escaper\XLSX();
|
||||
$isInsideRowTag = false;
|
||||
$rowData = [];
|
||||
|
||||
while ($this->xmlReader->read()) {
|
||||
if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'dimension') {
|
||||
// Read dimensions of the worksheet
|
||||
$dimensionRef = $this->xmlReader->getAttribute('ref'); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
|
||||
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
|
||||
$lastCellIndex = $matches[1];
|
||||
$this->numberOfColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
|
||||
}
|
||||
|
||||
} else if ($this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'row') {
|
||||
// Start of the row description
|
||||
$isInsideRowTag = true;
|
||||
|
||||
// Read spans info if present
|
||||
$numberOfColumnsForRow = $this->numberOfColumns;
|
||||
$spans = $this->xmlReader->getAttribute('spans'); // returns '1:5' for instance
|
||||
if ($spans) {
|
||||
list(, $numberOfColumnsForRow) = explode(':', $spans);
|
||||
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
|
||||
}
|
||||
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
|
||||
|
||||
} else if ($isInsideRowTag && $this->xmlReader->nodeType == \XMLReader::ELEMENT && $this->xmlReader->name === 'c') {
|
||||
// Start of a cell description
|
||||
$currentCellIndex = $this->xmlReader->getAttribute('r');
|
||||
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
|
||||
|
||||
$node = $this->xmlReader->expand();
|
||||
$rowData[$currentColumnIndex] = $this->getCellValue($node, $escaper);
|
||||
|
||||
} else if ($this->xmlReader->nodeType == \XMLReader::END_ELEMENT && $this->xmlReader->name === 'row') {
|
||||
// End of the row description
|
||||
// If needed, we fill the empty cells
|
||||
$rowData = ($this->numberOfColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// no data means "end of file"
|
||||
return ($rowData !== []) ? $rowData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell's string value from a node's nested value node
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @return string The value associated with the cell
|
||||
*/
|
||||
protected function getVNodeValue($node)
|
||||
{
|
||||
// for cell types having a "v" tag containing the value.
|
||||
// if not, the returned value should be empty string.
|
||||
$vNode = $node->getElementsByTagName('v')->item(0);
|
||||
if ($vNode !== null) {
|
||||
return $vNode->nodeValue;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell String value where string is inline.
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @param \Box\Spout\Common\Escaper\XLSX $escaper
|
||||
* @return string The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatInlineStringCellValue($node, $escaper)
|
||||
{
|
||||
// inline strings are formatted this way:
|
||||
// <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
|
||||
$tNode = $node->getElementsByTagName('t')->item(0);
|
||||
$escapedCellValue = trim($tNode->nodeValue);
|
||||
$cellValue = $escaper->unescape($escapedCellValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell String value from shared-strings file using nodeValue index.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @param \Box\Spout\Common\Escaper\XLSX $escaper
|
||||
* @return string The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatSharedStringCellValue($nodeValue, $escaper)
|
||||
{
|
||||
// shared strings are formatted this way:
|
||||
// <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
|
||||
$sharedStringIndex = intval($nodeValue);
|
||||
$escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
|
||||
$cellValue = $escaper->unescape($escapedCellValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell String value, where string is stored in value node.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @param \Box\Spout\Common\Escaper\XLSX $escaper
|
||||
* @return string The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatStrCellValue($nodeValue, $escaper)
|
||||
{
|
||||
$escapedCellValue = trim($nodeValue);
|
||||
$cellValue = $escaper->unescape($escapedCellValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell Numeric value from string of nodeValue.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @param \Box\Spout\Common\Escaper\XLSX $escaper
|
||||
* @return int|float The value associated with the cell
|
||||
*/
|
||||
protected function formatNumericCellValue($nodeValue)
|
||||
{
|
||||
$cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue);
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cell Boolean value from a specific node's Value.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @return bool The value associated with the cell
|
||||
*/
|
||||
protected function formatBooleanCellValue($nodeValue)
|
||||
{
|
||||
// !! is similar to boolval()
|
||||
$cellValue = !!$nodeValue;
|
||||
return $cellValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a cell's PHP Date value, associated to the given stored nodeValue.
|
||||
*
|
||||
* @param string $nodeValue
|
||||
* @param \Box\Spout\Common\Escaper\XLSX $escaper
|
||||
* @return DateTime|null The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function formatDateCellValue($nodeValue)
|
||||
{
|
||||
try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
|
||||
$cellValue = new \DateTime($nodeValue);
|
||||
return $cellValue;
|
||||
} catch (\Exception $e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @param \Box\Spout\Common\Escaper\XLSX $escaper
|
||||
* @return string|int|float|bool|null The value associated with the cell (null when the cell has an error)
|
||||
*/
|
||||
protected function getCellValue($node, $escaper)
|
||||
{
|
||||
// Default cell type is "n"
|
||||
$cellType = $node->getAttribute('t') ?: 'n';
|
||||
$vNodeValue = $this->getVNodeValue($node);
|
||||
if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) {
|
||||
return $vNodeValue;
|
||||
}
|
||||
|
||||
switch ($cellType)
|
||||
{
|
||||
case self::CELL_TYPE_INLINE_STRING:
|
||||
return $this->formatInlineStringCellValue($node, $escaper);
|
||||
case self::CELL_TYPE_SHARED_STRING:
|
||||
return $this->formatSharedStringCellValue($vNodeValue, $escaper);
|
||||
case self::CELL_TYPE_STR:
|
||||
return $this->formatStrCellValue($vNodeValue, $escaper);
|
||||
case self::CELL_TYPE_BOOLEAN:
|
||||
return $this->formatBooleanCellValue($vNodeValue);
|
||||
case self::CELL_TYPE_NUMERIC:
|
||||
return $this->formatNumericCellValue($vNodeValue);
|
||||
case self::CELL_TYPE_DATE:
|
||||
return $this->formatDateCellValue($vNodeValue);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the reader. To be used after reading the file.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function closeReader()
|
||||
{
|
||||
if ($this->xmlReader) {
|
||||
$this->xmlReader->close();
|
||||
}
|
||||
|
||||
if ($this->zip) {
|
||||
$this->zip->close();
|
||||
}
|
||||
|
||||
$this->sharedStringsHelper->cleanup();
|
||||
}
|
||||
}
|
@ -3,7 +3,7 @@
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Reader\AbstractReader2;
|
||||
use Box\Spout\Reader\AbstractReader;
|
||||
use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper;
|
||||
|
||||
/**
|
||||
@ -12,7 +12,7 @@ use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper;
|
||||
*
|
||||
* @package Box\Spout\Reader\XLSX
|
||||
*/
|
||||
class Reader extends AbstractReader2
|
||||
class Reader extends AbstractReader
|
||||
{
|
||||
/** @var string Temporary folder where the temporary files will be created */
|
||||
protected $tempFolder;
|
||||
@ -70,7 +70,7 @@ class Reader extends AbstractReader2
|
||||
*
|
||||
* @return SheetIterator To iterate over sheets
|
||||
*/
|
||||
public function getSheetIterator()
|
||||
public function getConcreteSheetIterator()
|
||||
{
|
||||
return $this->sheetIterator;
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace Box\Spout\Reader\CSV;
|
||||
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Reader\ReaderFactory2;
|
||||
use Box\Spout\Reader\ReaderFactory;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
@ -22,7 +22,17 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testOpenShouldThrowExceptionIfFileDoesNotExist()
|
||||
{
|
||||
ReaderFactory2::create(Type::CSV)->open('/path/to/fake/file.csv');
|
||||
ReaderFactory::create(Type::CSV)->open('/path/to/fake/file.csv');
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testOpenShouldThrowExceptionIfTryingToReadBeforeOpeningReader()
|
||||
{
|
||||
ReaderFactory::create(Type::CSV)->getSheetIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -39,7 +49,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
$resourcePath = $this->getResourcePath('csv_standard.csv');
|
||||
|
||||
$reader = ReaderFactory2::create(Type::CSV);
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->setGlobalFunctionsHelper($helperStub);
|
||||
$reader->open($resourcePath);
|
||||
}
|
||||
@ -162,7 +172,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
$reader = ReaderFactory2::create(Type::CSV);
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->setFieldDelimiter($fieldDelimiter);
|
||||
$reader->setFieldEnclosure($fieldEnclosure);
|
||||
|
||||
|
@ -1,208 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
* Class CSVTest
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class CSVTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Common\Exception\IOException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testOpenShouldThrowExceptionIfFileDoesNotExist()
|
||||
{
|
||||
ReaderFactory::create(Type::CSV)->open('/path/to/fake/file.csv');
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Common\Exception\IOException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testOpenShouldThrowExceptionIfFileNotReadable()
|
||||
{
|
||||
$helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper')
|
||||
->setMethods(['is_readable'])
|
||||
->getMock();
|
||||
$helperStub->method('is_readable')->willReturn(false);
|
||||
|
||||
$resourcePath = $this->getResourcePath('csv_standard.csv');
|
||||
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->setGlobalFunctionsHelper($helperStub);
|
||||
$reader->open($resourcePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldThrowExceptionIfReadBeforeReaderOpened()
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->hasNextRow();
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\EndOfFileReachedException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldThrowExceptionIfNextRowCalledAfterReadingDone()
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('csv_standard.csv');
|
||||
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
while ($reader->hasNextRow()) {
|
||||
$reader->nextRow();
|
||||
}
|
||||
|
||||
$reader->nextRow();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadStandardCSV()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_standard.csv');
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldNotStopAtCommaIfEnclosed()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_comma_enclosed.csv');
|
||||
$this->assertEquals('This is, a comma', $allRows[0][0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldKeepEmptyCells()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_empty_cells.csv');
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', '', 'csv--23'],
|
||||
['csv--31', 'csv--32', ''],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyLines()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_empty_line.csv');
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldHaveTheRightNumberOfCells()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_different_cells_number.csv');
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22'],
|
||||
['csv--31'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSupportCustomFieldDelimiter()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_delimited_with_pipes.csv', '|');
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSupportCustomFieldEnclosure()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_text_enclosed_with_pound.csv', ',', '#');
|
||||
$this->assertEquals('This is, a comma', $allRows[0][0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipUtf8Bom()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_utf8_bom.csv');
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @param string|void $fieldDelimiter
|
||||
* @param string|void $fieldEnclosure
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile($fileName, $fieldDelimiter = ",", $fieldEnclosure = '"')
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->setFieldDelimiter($fieldDelimiter);
|
||||
$reader->setFieldEnclosure($fieldEnclosure);
|
||||
|
||||
$reader->open($resourcePath);
|
||||
|
||||
while ($reader->hasNextRow()) {
|
||||
$allRows[] = $reader->nextRow();
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
return $allRows;
|
||||
}
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX;
|
||||
|
||||
/**
|
||||
* Class CellHelperTest
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX
|
||||
*/
|
||||
class CellHelperTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testFillMissingArrayIndexes()
|
||||
{
|
||||
$arrayToFill = [1 => 1, 3 => 3];
|
||||
$filledArray = CellHelper::fillMissingArrayIndexes($arrayToFill, 'FILL');
|
||||
|
||||
$expectedFilledArray = ['FILL', 1, 'FILL', 3];
|
||||
$this->assertEquals($expectedFilledArray, $filledArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestGetColumnIndexFromCellIndex()
|
||||
{
|
||||
return [
|
||||
['A1', 0],
|
||||
['Z3', 25],
|
||||
['AA5', 26],
|
||||
['AB24', 27],
|
||||
['BC5', 54],
|
||||
['BCZ99', 1455],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestGetColumnIndexFromCellIndex
|
||||
*
|
||||
* @param string $cellIndex
|
||||
* @param int $expectedColumnIndex
|
||||
* @return void
|
||||
*/
|
||||
public function testGetColumnIndexFromCellIndex($cellIndex, $expectedColumnIndex)
|
||||
{
|
||||
$this->assertEquals($expectedColumnIndex, CellHelper::getColumnIndexFromCellIndex($cellIndex));
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Common\Exception\InvalidArgumentException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testGetColumnIndexFromCellIndexShouldThrowIfInvalidCellIndex()
|
||||
{
|
||||
CellHelper::getColumnIndexFromCellIndex('InvalidCellIndex');
|
||||
}
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX\SharedStringsCaching;
|
||||
|
||||
/**
|
||||
* Class CachingStrategyFactoryTest
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX\SharedStringsCaching
|
||||
*/
|
||||
class CachingStrategyFactoryTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestGetBestCachingStrategy()
|
||||
{
|
||||
return [
|
||||
[CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE, -1, 'FileBasedStrategy'],
|
||||
[CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE + 10, -1, 'FileBasedStrategy'],
|
||||
[CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE - 10, -1, 'InMemoryStrategy'],
|
||||
[10 , CachingStrategyFactory::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB * 10, 'FileBasedStrategy'],
|
||||
[15, CachingStrategyFactory::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB * 10, 'FileBasedStrategy'],
|
||||
[5 , CachingStrategyFactory::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB * 10, 'InMemoryStrategy'],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestGetBestCachingStrategy
|
||||
*
|
||||
* @param int $sharedStringsUniqueCount
|
||||
* @param int $memoryLimitInKB
|
||||
* @param string $expectedStrategyClassName
|
||||
* @return void
|
||||
*/
|
||||
public function testGetBestCachingStrategy($sharedStringsUniqueCount, $memoryLimitInKB, $expectedStrategyClassName)
|
||||
{
|
||||
/** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */
|
||||
$factoryStub = $this
|
||||
->getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory')
|
||||
->disableOriginalConstructor()
|
||||
->setMethods(['getMemoryLimitInKB'])
|
||||
->getMock();
|
||||
|
||||
$factoryStub->method('getMemoryLimitInKB')->willReturn($memoryLimitInKB);
|
||||
|
||||
\ReflectionHelper::setStaticValue('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory', 'instance', $factoryStub);
|
||||
|
||||
$strategy = $factoryStub->getBestCachingStrategy($sharedStringsUniqueCount, null);
|
||||
|
||||
$fullExpectedStrategyClassName = 'Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\\' . $expectedStrategyClassName;
|
||||
$this->assertEquals($fullExpectedStrategyClassName, get_class($strategy));
|
||||
|
||||
$strategy->clearCache();
|
||||
\ReflectionHelper::reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestGetMemoryLimitInKB()
|
||||
{
|
||||
return [
|
||||
['-1', -1],
|
||||
['invalid', -1],
|
||||
['1024B', 1],
|
||||
['128K', 128],
|
||||
['256KB', 256],
|
||||
['512M', 512 * 1024],
|
||||
['2MB', 2 * 1024],
|
||||
['1G', 1 * 1024 * 1024],
|
||||
['10GB', 10 * 1024 * 1024],
|
||||
['2T', 2 * 1024 * 1024 * 1024],
|
||||
['5TB', 5 * 1024 * 1024 * 1024],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestGetMemoryLimitInKB
|
||||
*
|
||||
* @param string $memoryLimitFormatted
|
||||
* @param float $expectedMemoryLimitInKB
|
||||
* @return void
|
||||
*/
|
||||
public function testGetMemoryLimitInKB($memoryLimitFormatted, $expectedMemoryLimitInKB)
|
||||
{
|
||||
/** @var CachingStrategyFactory|\PHPUnit_Framework_MockObject_MockObject $factoryStub */
|
||||
$factoryStub = $this
|
||||
->getMockBuilder('\Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory')
|
||||
->disableOriginalConstructor()
|
||||
->setMethods(['getMemoryLimitFromIni'])
|
||||
->getMock();
|
||||
|
||||
$factoryStub->method('getMemoryLimitFromIni')->willReturn($memoryLimitFormatted);
|
||||
|
||||
$memoryLimitInKB = \ReflectionHelper::callMethodOnObject($factoryStub, 'getMemoryLimitInKB');
|
||||
|
||||
$this->assertEquals($expectedMemoryLimitInKB, $memoryLimitInKB);
|
||||
}
|
||||
}
|
@ -1,112 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\Helper\XLSX;
|
||||
|
||||
use Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\CachingStrategyFactory;
|
||||
use Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\FileBasedStrategy;
|
||||
use Box\Spout\Reader\Helper\XLSX\SharedStringsCaching\InMemoryStrategy;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
* Class SharedStringsHelperTest
|
||||
*
|
||||
* @package Box\Spout\Reader\Helper\XLSX
|
||||
*/
|
||||
class SharedStringsHelperTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/** @var SharedStringsHelper */
|
||||
private $sharedStringsHelper;
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function setUp()
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('one_sheet_with_shared_strings.xlsx');
|
||||
$this->sharedStringsHelper = new SharedStringsHelper($resourcePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function tearDown()
|
||||
{
|
||||
$this->sharedStringsHelper->cleanup();
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\SharedStringNotFoundException
|
||||
* @return void
|
||||
*/
|
||||
public function testGetStringAtIndexShouldThrowExceptionIfStringNotFound()
|
||||
{
|
||||
$this->sharedStringsHelper->extractSharedStrings();
|
||||
$this->sharedStringsHelper->getStringAtIndex(PHP_INT_MAX);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testGetStringAtIndexShouldReturnTheCorrectStringIfFound()
|
||||
{
|
||||
$this->sharedStringsHelper->extractSharedStrings();
|
||||
|
||||
$sharedString = $this->sharedStringsHelper->getStringAtIndex(0);
|
||||
$this->assertEquals('s1--A1', $sharedString);
|
||||
|
||||
$sharedString = $this->sharedStringsHelper->getStringAtIndex(24);
|
||||
$this->assertEquals('s1--E5', $sharedString);
|
||||
|
||||
$usedCachingStrategy = \ReflectionHelper::getValueOnObject($this->sharedStringsHelper, 'cachingStrategy');
|
||||
$this->assertTrue($usedCachingStrategy instanceof InMemoryStrategy);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testGetStringAtIndexShouldWorkWithMultilineStrings()
|
||||
{
|
||||
$resourcePath = $this->getResourcePath('one_sheet_with_shared_multiline_strings.xlsx');
|
||||
$sharedStringsHelper = new SharedStringsHelper($resourcePath);
|
||||
|
||||
$sharedStringsHelper->extractSharedStrings();
|
||||
|
||||
$sharedString = $sharedStringsHelper->getStringAtIndex(0);
|
||||
$this->assertEquals("s1\nA1", $sharedString);
|
||||
|
||||
$sharedString = $sharedStringsHelper->getStringAtIndex(24);
|
||||
$this->assertEquals("s1\nE5", $sharedString);
|
||||
|
||||
$sharedStringsHelper->cleanup();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testGetStringAtIndexWithFileBasedStrategy()
|
||||
{
|
||||
// force the file-based strategy by setting no memory limit
|
||||
$originalMemoryLimit = ini_get('memory_limit');
|
||||
ini_set('memory_limit', '-1');
|
||||
|
||||
$resourcePath = $this->getResourcePath('sheet_with_lots_of_shared_strings.xlsx');
|
||||
$sharedStringsHelper = new SharedStringsHelper($resourcePath);
|
||||
|
||||
$sharedStringsHelper->extractSharedStrings();
|
||||
|
||||
$sharedString = $sharedStringsHelper->getStringAtIndex(0);
|
||||
$this->assertEquals('str', $sharedString);
|
||||
|
||||
$sharedString = $sharedStringsHelper->getStringAtIndex(CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE + 1);
|
||||
$this->assertEquals('str', $sharedString);
|
||||
|
||||
$usedCachingStrategy = \ReflectionHelper::getValueOnObject($sharedStringsHelper, 'cachingStrategy');
|
||||
$this->assertTrue($usedCachingStrategy instanceof FileBasedStrategy);
|
||||
|
||||
$sharedStringsHelper->cleanup();
|
||||
|
||||
ini_set('memory_limit', $originalMemoryLimit);
|
||||
}
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
* Class SheetTest
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class SheetTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testNextSheetShouldReturnCorrectSheetInfos()
|
||||
{
|
||||
$sheets = $this->openFileAndReturnSheets('two_sheets_with_custom_names.xlsx');
|
||||
|
||||
$this->assertEquals('CustomName1', $sheets[0]->getName());
|
||||
$this->assertEquals(0, $sheets[0]->getIndex());
|
||||
$this->assertEquals(1, $sheets[0]->getId());
|
||||
|
||||
$this->assertEquals('CustomName2', $sheets[1]->getName());
|
||||
$this->assertEquals(1, $sheets[1]->getIndex());
|
||||
$this->assertEquals(2, $sheets[1]->getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @return Sheet[]
|
||||
*/
|
||||
private function openFileAndReturnSheets($fileName)
|
||||
{
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
$sheets = [];
|
||||
while ($reader->hasNextSheet()) {
|
||||
$sheets[] = $reader->nextSheet();
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
return $sheets;
|
||||
}
|
||||
}
|
@ -4,7 +4,7 @@ namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Reader\ReaderFactory2;
|
||||
use Box\Spout\Reader\ReaderFactory;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
@ -284,7 +284,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
$reader = ReaderFactory2::create(Type::XLSX);
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace Box\Spout\Reader\XLSX;
|
||||
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Reader\ReaderFactory2;
|
||||
use Box\Spout\Reader\ReaderFactory;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
@ -38,7 +38,7 @@ class SheetTest extends \PHPUnit_Framework_TestCase
|
||||
private function openFileAndReturnSheets($fileName)
|
||||
{
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
$reader = ReaderFactory2::create(Type::XLSX);
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
$sheets = [];
|
||||
|
@ -1,332 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader;
|
||||
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
* Class XLSXTest
|
||||
*
|
||||
* @package Box\Spout\Reader
|
||||
*/
|
||||
class XLSXTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadShouldThrowException()
|
||||
{
|
||||
return [
|
||||
['/path/to/fake/file.xlsx'],
|
||||
['file_with_no_sheets_in_content_types.xlsx'],
|
||||
['file_corrupted.xlsx'],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldThrowException
|
||||
* @expectedException \Box\Spout\Common\Exception\IOException
|
||||
*
|
||||
* @param string $filePath
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldThrowException($filePath)
|
||||
{
|
||||
$this->getAllRowsForFile($filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\ReaderNotOpenedException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testHasNextSheetShouldThrowExceptionIfReaderNotOpened()
|
||||
{
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->hasNextSheet();
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Reader\Exception\EndOfWorksheetsReachedException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testNextSheetShouldThrowExceptionIfNoMoreSheetsToRead()
|
||||
{
|
||||
$fileName = 'one_sheet_with_shared_strings.xlsx';
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
while ($reader->hasNextSheet()) {
|
||||
$reader->nextSheet();
|
||||
}
|
||||
|
||||
$reader->nextSheet();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadForAllWorksheets()
|
||||
{
|
||||
return [
|
||||
['one_sheet_with_shared_strings.xlsx', 5, 5],
|
||||
['one_sheet_with_inline_strings.xlsx', 5, 5],
|
||||
['two_sheets_with_shared_strings.xlsx', 10, 5],
|
||||
['two_sheets_with_inline_strings.xlsx', 10, 5]
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadForAllWorksheets
|
||||
*
|
||||
* @param string $resourceName
|
||||
* @param int $expectedNumOfRows
|
||||
* @param int $expectedNumOfCellsPerRow
|
||||
* @return void
|
||||
*/
|
||||
public function testReadForAllWorksheets($resourceName, $expectedNumOfRows, $expectedNumOfCellsPerRow)
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile($resourceName);
|
||||
|
||||
$this->assertEquals($expectedNumOfRows, count($allRows), "There should be $expectedNumOfRows rows");
|
||||
foreach ($allRows as $row) {
|
||||
$this->assertEquals($expectedNumOfCellsPerRow, count($row), "There should be $expectedNumOfCellsPerRow cells for every row");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSupportFilesWithoutSharedStringsFile()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_no_shared_strings_file.xlsx');
|
||||
|
||||
$expectedRows = [
|
||||
[10, 11],
|
||||
[20, 21],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSupportAllCellTypes()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.xlsx');
|
||||
|
||||
$expectedRows = [
|
||||
[
|
||||
's1--A1', 's1--A2',
|
||||
false, true,
|
||||
\DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-03 13:21:58'),
|
||||
\DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'),
|
||||
10, 10.43,
|
||||
null,
|
||||
'weird string', // valid 'str' string
|
||||
null, // invalid date
|
||||
],
|
||||
['', '', '', '', '', '', '', '', ''],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldKeepEmptyCellsAtTheEndIfDimensionsSpecified()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_without_dimensions_but_spans_and_empty_cells.xlsx');
|
||||
|
||||
$this->assertEquals(2, count($allRows), 'There should be 2 rows');
|
||||
foreach ($allRows as $row) {
|
||||
$this->assertEquals(5, count($row), 'There should be 5 cells for every row, because empty rows should be preserved');
|
||||
}
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
['s1--A2', 's1--B2', 's1--C2', '', ''],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldKeepEmptyCellsAtTheEndIfNoDimensionsButSpansSpecified()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx');
|
||||
|
||||
$this->assertEquals(2, count($allRows), 'There should be 2 rows');
|
||||
$this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row');
|
||||
$this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip');
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
['s1--A2', 's1--B2', 's1--C2'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyCellsAtTheEndIfDimensionsNotSpecified()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_without_dimensions_and_empty_cells.xlsx');
|
||||
|
||||
$this->assertEquals(2, count($allRows), 'There should be 2 rows');
|
||||
$this->assertEquals(5, count($allRows[0]), 'There should be 5 cells in the first row');
|
||||
$this->assertEquals(3, count($allRows[1]), 'There should be only 3 cells in the second row, because empty rows at the end should be skip');
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
['s1--A2', 's1--B2', 's1--C2'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipEmptyRows()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_rows.xlsx');
|
||||
|
||||
$this->assertEquals(2, count($allRows), 'There should be only 2 rows, because the empty row is skipped');
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', 's1--B1', 's1--C1', 's1--D1', 's1--E1'],
|
||||
['s1--A3', 's1--B3', 's1--C3', 's1--D3', 's1--E3'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSupportEmptySharedString()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_empty_shared_string.xlsx');
|
||||
|
||||
$expectedRows = [
|
||||
['s1--A1', '', 's1--C1'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldPreserveSpaceIfSpecified()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_preserve_space_shared_strings.xlsx');
|
||||
|
||||
$expectedRows = [
|
||||
[' s1--A1', 's1--B1 ', ' s1--C1 '],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipPronunciationData()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_pronunciation.xlsx');
|
||||
|
||||
$expectedRow = ['名前', '一二三四'];
|
||||
$this->assertEquals($expectedRow, $allRows[0], 'Pronunciation data should be removed.');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadShouldBeProtectedAgainstAttacks()
|
||||
{
|
||||
return [
|
||||
['attack_billion_laughs.xlsx'],
|
||||
['attack_quadratic_blowup.xlsx'],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldBeProtectedAgainstAttacks
|
||||
* @NOTE: The LIBXML_NOENT is used to ACTUALLY substitute entities (and should therefore not be used)
|
||||
*
|
||||
* @param string $fileName
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldBeProtectedAgainstAttacks($fileName)
|
||||
{
|
||||
$startTime = microtime(true);
|
||||
|
||||
try {
|
||||
$this->getAllRowsForFile($fileName);
|
||||
$this->fail('An exception should have been thrown');
|
||||
} catch (IOException $exception) {
|
||||
$duration = microtime(true) - $startTime;
|
||||
$this->assertLessThan(10, $duration, 'Entities should not be expanded and therefore take more than 10 seconds to be parsed.');
|
||||
|
||||
$expectedMaxMemoryUsage = 30 * 1024 * 1024; // 30MB
|
||||
$this->assertLessThan($expectedMaxMemoryUsage, memory_get_peak_usage(true), 'Entities should not be expanded and therefore consume all the memory.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldBeAbleToProcessEmptySheets()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_no_cells.xlsx');
|
||||
$this->assertEquals([], $allRows, 'Sheet with no cells should be correctly processed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipFormulas()
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('sheet_with_formulas.xlsx');
|
||||
|
||||
$expectedRows = [
|
||||
['val1', 'val2', 'total1', 'total2'],
|
||||
[10, 20, 30, 21],
|
||||
[11, 21, 32, 41],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fileName
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile($fileName)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
$reader = ReaderFactory::create(Type::XLSX);
|
||||
$reader->open($resourcePath);
|
||||
|
||||
while ($reader->hasNextSheet()) {
|
||||
$reader->nextSheet();
|
||||
|
||||
while ($reader->hasNextRow()) {
|
||||
$allRows[] = $reader->nextRow();
|
||||
}
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
return $allRows;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user