Merge branch 'master' into feature/num_format

This commit is contained in:
Rushleader 2016-05-23 08:53:19 +02:00
commit def93360c5
29 changed files with 604 additions and 845 deletions

View File

@ -16,7 +16,8 @@ install:
script:
- mkdir -p build/logs
- php vendor/bin/phpunit --coverage-clover build/logs/clover.xml
- php vendor/bin/phpunit --coverage-clover=build/logs/coverage.clover
after_script:
- if [[ $TRAVIS_PHP_VERSION != 'hhvm' && $TRAVIS_PHP_VERSION != '7.0' ]]; then php vendor/bin/ocular code-coverage:upload --format=php-clover build/logs/clover.xml; fi
- if [[ $TRAVIS_PHP_VERSION != 'hhvm' && $TRAVIS_PHP_VERSION != '7.0' ]]; then wget https://scrutinizer-ci.com/ocular.phar; fi
- if [[ $TRAVIS_PHP_VERSION != 'hhvm' && $TRAVIS_PHP_VERSION != '7.0' ]]; then php ocular.phar code-coverage:upload --format=php-clover build/logs/coverage.clover; fi

View File

@ -121,7 +121,7 @@ $reader->setEncoding('UTF-16LE');
The writer always generate CSV files encoded in UTF-8, with a BOM.
### Configuring the XLSX and ODS writers
### Configuring the XLSX and ODS readers and writers
#### Row styling
@ -163,7 +163,6 @@ Font | Bold | `StyleBuilder::setFontBold()`
| Font color | `StyleBuilder::setFontColor(Color::BLUE)`<br>`StyleBuilder::setFontColor(Color::rgb(0, 128, 255))`
Alignment | Wrap text | `StyleBuilder::setShouldWrapText()`
#### New sheet creation
It is also possible to change the behavior of the writer when the maximum number of rows (1,048,576) have been written in the current sheet:
@ -208,6 +207,20 @@ $writer->setShouldUseInlineStrings(false); // will use shared strings
> Apple's products (Numbers and the iOS previewer) don't support inline strings and display empty cells instead. Therefore, if these platforms need to be supported, make sure to use shared strings!
#### Date/Time formatting
When reading a spreadsheet containing dates or times, Spout returns the values by default as DateTime objects.
It is possible to change this behavior and have a formatted date returned instead (e.g. "2016-11-29 1:22 AM"). The format of the date corresponds to what is specified in the spreadsheet.
```php
use Box\Spout\Reader\ReaderFactory;
use Box\Spout\Common\Type;
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldFormatDates(false); // default value
$reader->setShouldFormatDates(true); // will return formatted dates
```
### Playing with sheets
When creating a XLSX or ODS file, it is possible to control which sheet the data will be written into. At any time, you can retrieve or set the current sheet:

View File

@ -18,8 +18,7 @@
"ext-simplexml": "*"
},
"require-dev": {
"phpunit/phpunit": ">=3.7",
"scrutinizer/ocular": "~1.1"
"phpunit/phpunit": "^4.8.0"
},
"suggest": {
"ext-iconv": "To handle non UTF-8 CSV files (if \"php-intl\" is not already installed or is too limited)",

815
composer.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,9 @@ abstract class AbstractReader implements ReaderInterface
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;
/**
* Returns whether stream wrappers are supported
*
@ -49,7 +52,7 @@ abstract class AbstractReader implements ReaderInterface
abstract protected function closeReader();
/**
* @param $globalFunctionsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @return AbstractReader
*/
public function setGlobalFunctionsHelper($globalFunctionsHelper)
@ -58,6 +61,18 @@ abstract class AbstractReader implements ReaderInterface
return $this;
}
/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @param bool $shouldFormatDates
* @return AbstractReader
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
return $this;
}
/**
* Prepares the reader to read the given file. It also makes sure
* that the file exists and is readable.

View File

@ -23,6 +23,7 @@ class CellValueFormatter
/** Definition of XML nodes names used to parse data */
const XML_NODE_P = 'p';
const XML_NODE_S = 'text:s';
const XML_NODE_A = 'text:a';
/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_TYPE = 'office:value-type';
@ -33,14 +34,19 @@ class CellValueFormatter
const XML_ATTRIBUTE_CURRENCY = 'office:currency';
const XML_ATTRIBUTE_C = 'text:c';
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */
protected $escaper;
/**
*
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct()
public function __construct($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\ODS();
}
@ -98,6 +104,8 @@ class CellValueFormatter
$spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C);
$numSpaces = (!empty($spaceAttribute)) ? intval($spaceAttribute) : 1;
$currentPValue .= str_repeat(' ', $numSpaces);
} else if ($childNode->nodeName === self::XML_NODE_A) {
$currentPValue .= $childNode->nodeValue;
}
}
@ -119,6 +127,7 @@ class CellValueFormatter
{
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
$nodeIntValue = intval($nodeValue);
// The "==" is intentionally not a "===" because only the value matters, not the type
$cellValue = ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
return $cellValue;
}
@ -141,15 +150,27 @@ class CellValueFormatter
* Returns the cell Date value from the given node.
*
* @param \DOMNode $node
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
*/
protected function formatDateCellValue($node)
{
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
return new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
// The XML node looks like this:
// <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
// <text:p>05/19/16 04:39 PM</text:p>
// </table:table-cell>
if ($this->shouldFormatDates) {
// The date is already formatted in the "p" tag
$nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
return $nodeWithValueAlreadyFormatted->nodeValue;
} else {
// otherwise, get it from the "date-value" attribute
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
return new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
}
}
}
@ -157,15 +178,27 @@ class CellValueFormatter
* Returns the cell Time value from the given node.
*
* @param \DOMNode $node
* @return \DateInterval|null The value associated with the cell or NULL if invalid time value
* @return \DateInterval|string|null The value associated with the cell or NULL if invalid time value
*/
protected function formatTimeCellValue($node)
{
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
return new \DateInterval($nodeValue);
} catch (\Exception $e) {
return null;
// The XML node looks like this:
// <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
// <text:p>01:24:00 PM</text:p>
// </table:table-cell>
if ($this->shouldFormatDates) {
// The date is already formatted in the "p" tag
$nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
return $nodeWithValueAlreadyFormatted->nodeValue;
} else {
// otherwise, get it from the "time-value" attribute
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
return new \DateInterval($nodeValue);
} catch (\Exception $e) {
return null;
}
}
}

View File

@ -42,7 +42,7 @@ class Reader extends AbstractReader
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
$this->sheetIterator = new SheetIterator($filePath);
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
} else {
throw new IOException("Could not open $filePath for reading.");
}

View File

@ -45,11 +45,12 @@ class RowIterator implements IteratorInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($xmlReader)
public function __construct($xmlReader, $shouldFormatDates)
{
$this->xmlReader = $xmlReader;
$this->cellValueFormatter = new CellValueFormatter();
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
}
/**
@ -186,7 +187,7 @@ class RowIterator implements IteratorInterface
/**
* empty() replacement that honours 0 as a valid value
*
* @param $value The cell value
* @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
* @return bool
*/
protected function isEmptyCellValue($value)

View File

@ -27,12 +27,13 @@ class Sheet implements SheetInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($xmlReader, $sheetIndex, $sheetName)
public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($xmlReader);
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
$this->index = $sheetIndex;
$this->name = $sheetName;
}

View File

@ -22,6 +22,9 @@ class SheetIterator implements IteratorInterface
/** @var string $filePath Path of the file to be read */
protected $filePath;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
@ -36,11 +39,13 @@ class SheetIterator implements IteratorInterface
/**
* @param string $filePath Path of the file to be read
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath)
public function __construct($filePath, $shouldFormatDates)
{
$this->filePath = $filePath;
$this->shouldFormatDates = $shouldFormatDates;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
@ -109,7 +114,7 @@ class SheetIterator implements IteratorInterface
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
$sheetName = $this->escaper->unescape($escapedSheetName);
return new Sheet($this->xmlReader, $sheetName, $this->currentSheetIndex);
return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
}
/**

View File

@ -29,6 +29,8 @@ class CellValueFormatter
/** Constants used for date formatting */
const NUM_SECONDS_IN_ONE_DAY = 86400;
const NUM_SECONDS_IN_ONE_HOUR = 3600;
const NUM_SECONDS_IN_ONE_MINUTE = 60;
/**
* February 29th, 1900 is NOT a leap year but Excel thinks it is...
@ -42,17 +44,22 @@ class CellValueFormatter
/** @var StyleHelper Helper to work with styles */
protected $styleHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
protected $escaper;
/**
* @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
* @param StyleHelper $styleHelper Helper to work with styles
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($sharedStringsHelper, $styleHelper)
public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates)
{
$this->sharedStringsHelper = $sharedStringsHelper;
$this->styleHelper = $styleHelper;
$this->shouldFormatDates = $shouldFormatDates;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\XLSX();
@ -166,7 +173,7 @@ class CellValueFormatter
$shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId);
if ($shouldFormatAsDate) {
return $this->formatExcelTimestampValue(floatval($nodeValue));
return $this->formatExcelTimestampValue(floatval($nodeValue), $cellStyleId);
} else {
$nodeIntValue = intval($nodeValue);
return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
@ -176,33 +183,86 @@ class CellValueFormatter
/**
* Returns a cell's PHP Date value, associated to the given timestamp.
* NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
* NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
*/
protected function formatExcelTimestampValue($nodeValue)
protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
{
// Fix for the erroneous leap year in Excel
if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
--$nodeValue;
}
// The value 1.0 represents 1900-01-01. Numbers below 1.0 are not valid Excel dates.
if ($nodeValue < 1.0) {
if ($nodeValue >= 1) {
// Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01.
return $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId);
} else if ($nodeValue >= 0) {
// Values between 0 and 1 represent "times".
return $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId);
} else {
// invalid date
return null;
}
}
/**
* Returns a cell's PHP DateTime value, associated to the given timestamp.
* Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date).
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
* @return \DateTime|string The value associated with the cell
*/
protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId)
{
$time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY);
$hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR);
$minutes = floor($time / self::NUM_SECONDS_IN_ONE_MINUTE) - ($hours * self::NUM_SECONDS_IN_ONE_MINUTE);
$seconds = $time - ($hours * self::NUM_SECONDS_IN_ONE_HOUR) - ($minutes * self::NUM_SECONDS_IN_ONE_MINUTE);
// using the base Excel date (Jan 1st, 1900) - not relevant here
$dateObj = new \DateTime('1900-01-01');
$dateObj->setTime($hours, $minutes, $seconds);
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;
}
}
/**
* Returns a cell's PHP Date value, associated to the given timestamp.
* NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
*/
protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId)
{
// Do not use any unix timestamps for calculation to prevent
// issues with numbers exceeding 2^31.
$secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
$secondsRemainder = round($secondsRemainder, 0);
try {
$cellValue = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
$cellValue->modify('+' . intval($nodeValue) . 'days');
$cellValue->modify('+' . $secondsRemainder . 'seconds');
$dateObj = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
$dateObj->modify('+' . intval($nodeValue) . 'days');
$dateObj->modify('+' . $secondsRemainder . 'seconds');
return $cellValue;
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;
}
} catch (\Exception $e) {
return null;
}

View File

@ -0,0 +1,122 @@
<?php
namespace Box\Spout\Reader\XLSX\Helper;
/**
* Class DateFormatHelper
* This class provides helper functions to format Excel dates
*
* @package Box\Spout\Reader\XLSX\Helper
*/
class DateFormatHelper
{
const KEY_GENERAL = 'general';
const KEY_HOUR_12 = '12h';
const KEY_HOUR_24 = '24h';
/**
* This map is used to replace Excel format characters by their PHP equivalent.
* Keys should be ordered from longest to smallest.
*
* @var array Mapping between Excel format characters and PHP format characters
*/
private static $excelDateFormatToPHPDateFormatMapping = [
self::KEY_GENERAL => [
// Time
'am/pm' => 'A', // Uppercase Ante meridiem and Post meridiem
':mm' => ':i', // Minutes with leading zeros - if preceded by a ":" (otherwise month)
'mm:' => 'i:', // Minutes with leading zeros - if followed by a ":" (otherwise month)
'ss' => 's', // Seconds, with leading zeros
'.s' => '', // Ignore (fractional seconds format does not exist in PHP)
// Date
'e' => 'Y', // Full numeric representation of a year, 4 digits
'yyyy' => 'Y', // Full numeric representation of a year, 4 digits
'yy' => 'y', // Two digit representation of a year
'mmmmm' => 'M', // Short textual representation of a month, three letters ("mmmmm" should only contain the 1st letter...)
'mmmm' => 'F', // Full textual representation of a month
'mmm' => 'M', // Short textual representation of a month, three letters
'mm' => 'm', // Numeric representation of a month, with leading zeros
'm' => 'n', // Numeric representation of a month, without leading zeros
'dddd' => 'l', // Full textual representation of the day of the week
'ddd' => 'D', // Textual representation of a day, three letters
'dd' => 'd', // Day of the month, 2 digits with leading zeros
'd' => 'j', // Day of the month without leading zeros
],
self::KEY_HOUR_12 => [
'hh' => 'h', // 12-hour format of an hour without leading zeros
'h' => 'g', // 12-hour format of an hour without leading zeros
],
self::KEY_HOUR_24 => [
'hh' => 'H', // 24-hour hours with leading zero
'h' => 'G', // 24-hour format of an hour without leading zeros
],
];
/**
* Converts the given Excel date format to a format understandable by the PHP date function.
*
* @param string $excelDateFormat Excel date format
* @return string PHP date format (as defined here: http://php.net/manual/en/function.date.php)
*/
public static function toPHPDateFormat($excelDateFormat)
{
// Remove brackets potentially present at the beginning of the format string
$dateFormat = preg_replace('/^(\[\$[^\]]+?\])/i', '', $excelDateFormat);
// Double quotes are used to escape characters that must not be interpreted.
// For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y"
// By exploding the format string using double quote as a delimiter, we can get all parts
// that must be transformed (even indexes) and all parts that must not be (odd indexes).
$dateFormatParts = explode('"', $dateFormat);
foreach ($dateFormatParts as $partIndex => $dateFormatPart) {
// do not look at odd indexes
if ($partIndex % 2 === 1) {
continue;
}
// Make sure all characters are lowercase, as the mapping table is using lowercase characters
$transformedPart = strtolower($dateFormatPart);
// Remove escapes related to non-format characters
$transformedPart = str_replace('\\', '', $transformedPart);
// Apply general transformation first...
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]);
// ... then apply hour transformation, for 12-hour or 24-hour format
if (self::has12HourFormatMarker($dateFormatPart)) {
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]);
} else {
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]);
}
// overwrite the parts array with the new transformed part
$dateFormatParts[$partIndex] = $transformedPart;
}
// Merge all transformed parts back together
$phpDateFormat = implode('"', $dateFormatParts);
// Finally, to have the date format compatible with the DateTime::format() function, we need to escape
// all characters that are inside double quotes (and double quotes must be removed).
// For instance, ["Day " dd] should become [\D\a\y\ dd]
$phpDateFormat = preg_replace_callback('/"(.+?)"/', function($matches) {
$stringToEscape = $matches[1];
$letters = preg_split('//u', $stringToEscape, -1, PREG_SPLIT_NO_EMPTY);
return '\\' . implode('\\', $letters);
}, $phpDateFormat);
return $phpDateFormat;
}
/**
* @param string $excelDateFormat Date format as defined by Excel
* @return bool Whether the given date format has the 12-hour format marker
*/
private static function has12HourFormatMarker($excelDateFormat)
{
return (stripos($excelDateFormat, 'am/pm') !== false);
}
}

View File

@ -14,18 +14,13 @@ use Box\Spout\Reader\XLSX\Sheet;
class SheetHelper
{
/** Paths of XML files relative to the XLSX file root */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
/** Namespaces for the XML files */
const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/** Value of the Override attribute used in [Content_Types].xml to define sheets */
const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml';
/** @var string Path of the XLSX file being read */
protected $filePath;
@ -35,6 +30,9 @@ class SheetHelper
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml.rels file */
protected $workbookXMLRelsAsXMLElement;
@ -45,12 +43,14 @@ class SheetHelper
* @param string $filePath Path of the XLSX file being read
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper)
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
{
$this->filePath = $filePath;
$this->sharedStringsHelper = $sharedStringsHelper;
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->shouldFormatDates = $shouldFormatDates;
}
/**
@ -63,66 +63,52 @@ class SheetHelper
{
$sheets = [];
$contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::CONTENT_TYPES_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML
);
// Starting from "workbook.xml" as this file is the source of truth for the sheets order
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet');
// find all nodes defining a sheet
$sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
$numSheetNodes = count($sheetNodes);
for ($i = 0; $i < $numSheetNodes; $i++) {
$sheetNode = $sheetNodes[$i];
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath);
foreach ($sheetNodes as $sheetIndex => $sheetNode) {
$sheets[] = $this->getSheetFromSheetXMLNode($sheetNode, $sheetIndex);
}
// make sure the sheets are sorted by index
// (as the sheets are not necessarily in this order in the XML file)
usort($sheets, function ($sheet1, $sheet2) {
return ($sheet1->getIndex() - $sheet2->getIndex());
});
return $sheets;
}
/**
* Returns an instance of a sheet, given the path of its data XML file.
* We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet.
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
* The entry contains the ID and name of the sheet.
* Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
* We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
* ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $sheetNode XML Node describing the sheet, as defined in "workbook.xml"
* @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based)
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
*/
protected function getSheetFromXML($sheetDataXMLFilePath)
protected function getSheetFromSheetXMLNode($sheetNode, $sheetIndexZeroBased)
{
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
// find the node associated to the given file path
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
$relationshipNode = $relationshipNodes[0];
$relationshipSheetId = $relationshipNode->getAttribute('Id');
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $relationshipSheetId . '"]');
$sheetNode = $sheetNodes[0];
// To retrieve namespaced attributes, some versions of LibXML will accept prefixing the attribute
// with the namespace directly (tested on LibXML 2.9.3). For older versions (tested on LibXML 2.7.8),
// attributes need to be retrieved without the namespace hint.
$sheetId = $sheetNode->getAttribute('r:id');
if ($sheetId === null) {
$sheetId = $sheetNode->getAttribute('id');
}
$escapedSheetName = $sheetNode->getAttribute('name');
$sheetIdOneBased = $sheetNode->getAttribute('sheetId');
$sheetIndexZeroBased = $sheetIdOneBased - 1;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$sheetName = $escaper->unescape($escapedSheetName);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
// find the file path of the sheet, by looking at the "workbook.xml.res" file
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Id="' . $sheetId . '"]');
$relationshipNode = $relationshipNodes[0];
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
$sheetDataXMLFilePath = '/xl/' . $relationshipNode->getAttribute('Target');
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName);
}
/**

View File

@ -30,6 +30,25 @@ class StyleHelper
/** By convention, default style ID is 0 */
const DEFAULT_STYLE_ID = 0;
/**
* @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
* @var array Mapping between built-in numFmtId and the associated format - for dates only
*/
protected static $builtinNumFmtIdToNumFormatMapping = [
14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
15 => 'd-mmm-yy',
16 => 'd-mmm',
17 => 'mmm-yy',
18 => 'h:mm AM/PM',
19 => 'h:mm:ss AM/PM',
20 => 'h:mm',
21 => 'h:mm:ss',
22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm',
45 => 'mm:ss',
46 => '[h]:mm:ss',
47 => 'mm:ss.0', // @NOTE: ECMA spec is 'mmss.0',
];
/** @var string Path of the XLSX file being read */
protected $filePath;
@ -171,18 +190,30 @@ class StyleHelper
protected function doesNumFmtIdIndicateDate($numFmtId)
{
return (
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
$this->isNumFmtIdCustomDateFormat($numFmtId)
!$this->doesNumFmtIdIndicateGeneralFormat($numFmtId) &&
(
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
$this->isNumFmtIdCustomDateFormat($numFmtId)
)
);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates the "General" format (0 by convention)
*/
protected function doesNumFmtIdIndicateGeneralFormat($numFmtId)
{
return ($numFmtId === 0);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
*/
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
$builtInDateFormatIds = [14, 15, 16, 17, 18, 19, 20, 21, 22, 45, 46, 47];
$builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
return in_array($numFmtId, $builtInDateFormatIds);
}
@ -223,4 +254,27 @@ class StyleHelper
return $hasFoundDateFormatCharacter;
}
/**
* Returns the format as defined in "styles.xml" of the given style.
* NOTE: It is assumed that the style DOES have a number format associated to it.
*
* @param int $styleId Zero-based style ID
* @return string The number format associated with the given style
*/
public function getNumberFormat($styleId)
{
$stylesAttributes = $this->getStylesAttributes();
$styleAttributes = $stylesAttributes[$styleId];
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
$numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
} else {
$customNumberFormats = $this->getCustomNumberFormats();
$numberFormat = $customNumberFormats[$numFmtId];
}
return $numberFormat;
}
}

View File

@ -69,7 +69,7 @@ class Reader extends AbstractReader
$this->sharedStringsHelper->extractSharedStrings();
}
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper);
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates);
} else {
throw new IOException("Could not open $filePath for reading.");
}

View File

@ -59,8 +59,9 @@ class RowIterator implements IteratorInterface
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
{
$this->filePath = $filePath;
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
@ -68,7 +69,7 @@ class RowIterator implements IteratorInterface
$this->xmlReader = new XMLReader();
$this->styleHelper = new StyleHelper($filePath);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
}
/**

View File

@ -25,12 +25,13 @@ class Sheet implements SheetInterface
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper);
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates);
$this->index = $sheetIndex;
$this->name = $sheetName;
}

View File

@ -24,12 +24,13 @@ class SheetIterator implements IteratorInterface
* @param string $filePath Path of the file to be read
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper)
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
{
// Fetch all available sheets
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper);
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates);
$this->sheets = $sheetHelper->getSheets();
if (count($this->sheets) === 0) {

View File

@ -164,6 +164,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportFormatDatesAndTimesIfSpecified()
{
$shouldFormatDates = true;
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $shouldFormatDates);
$expectedRows = [
['05/19/2016', '5/19/16', '05/19/2016 16:39:00', '05/19/16 04:39 PM', '5/19/2016'],
['11:29', '13:23:45', '01:23:45', '01:23:45 AM', '01:23:45 PM'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
@ -436,16 +451,35 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows, 'Cell values should not be trimmed');
}
/**
* https://github.com/box/spout/issues/218
* @return void
*/
public function testReaderShouldReadTextInHyperlinks()
{
$allRows = $this->getAllRowsForFile('sheet_with_hyperlinks.ods');
$expectedRows = [
['email', 'text'],
['1@example.com', 'text'],
['2@example.com', 'text and https://github.com/box/spout/issues/218 and text'],
];
$this->assertEquals($expectedRows, $allRows, 'Text in hyperlinks should be read');
}
/**
* @param string $fileName
* @param bool|void $shouldFormatDates
* @return array All the read rows the given file
*/
private function getAllRowsForFile($fileName)
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderFactory::create(Type::ODS);
$reader->setShouldFormatDates($shouldFormatDates);
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {

View File

@ -18,8 +18,11 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase
[CellValueFormatter::CELL_TYPE_NUMERIC, 42429, '2016-02-29 00:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, '146098', '2299-12-31 00:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, -700, null],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0, null],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0.5, null],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0, '1900-01-01 00:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0.25, '1900-01-01 06:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0.5, '1900-01-01 12:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0.75, '1900-01-01 18:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 0.99999, '1900-01-01 23:59:59'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 1, '1900-01-01 00:00:00'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 59.999988425926, '1900-02-28 23:59:59'],
[CellValueFormatter::CELL_TYPE_NUMERIC, 60.458333333333, '1900-02-28 11:00:00'],
@ -68,7 +71,7 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase
->with(123)
->will($this->returnValue(true));
$formatter = new CellValueFormatter(null, $styleHelperMock);
$formatter = new CellValueFormatter(null, $styleHelperMock, false);
$result = $formatter->extractAndFormatNodeValue($nodeMock);
if ($expectedDateAsString === null) {
@ -117,7 +120,7 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase
->method('shouldFormatNumericValueAsDate')
->will($this->returnValue(false));
$formatter = new CellValueFormatter(null, $styleHelperMock);
$formatter = new CellValueFormatter(null, $styleHelperMock, false);
$formattedValue = \ReflectionHelper::callMethodOnObject($formatter, 'formatNumericCellValue', $value, 0);
$this->assertEquals($expectedFormattedValue, $formattedValue);
@ -160,7 +163,7 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase
->with(CellValueFormatter::XML_NODE_INLINE_STRING_VALUE)
->will($this->returnValue($nodeListMock));
$formatter = new CellValueFormatter(null, null);
$formatter = new CellValueFormatter(null, null, false);
$formattedValue = \ReflectionHelper::callMethodOnObject($formatter, 'formatInlineStringCellValue', $nodeMock);
$this->assertEquals($expectedFormattedValue, $formattedValue);

View File

@ -0,0 +1,47 @@
<?php
namespace Box\Spout\Reader\XLSX\Helper;
/**
* Class DateFormatHelperTest
*
* @package Box\Spout\Reader\XLSX\Helper
*/
class DateFormatHelperTest extends \PHPUnit_Framework_TestCase
{
/**
* @return array
*/
public function dataProviderForTestToPHPDateFormat()
{
return [
// Excel date format, expected PHP date format
['m/d/yy hh:mm', 'n/j/y H:i'],
['mmm-yy', 'M-y'],
['d-mmm-yy', 'j-M-y'],
['m/dd/yyyy', 'n/d/Y'],
['e mmmmm dddd', 'Y M l'],
['MMMM DDD', 'F D'],
['hh:mm:ss.s', 'H:i:s'],
['h:mm:ss AM/PM', 'g:i:s A'],
['hh:mm AM/PM', 'h:i A'],
['[$-409]hh:mm AM/PM', 'h:i A'],
['[$USD-F480]hh:mm AM/PM', 'h:i A'],
['"Day " d', '\\D\\a\\y\\ j'],
['yy "Year" m "Month"', 'y \\Y\\e\\a\\r n \\M\\o\\n\\t\\h'],
];
}
/**
* @dataProvider dataProviderForTestToPHPDateFormat
*
* @param string $excelDateFormat
* @param string $expectedPHPDateFormat
* @return void
*/
public function testToPHPDateFormat($excelDateFormat, $expectedPHPDateFormat)
{
$phpDateFormat = DateFormatHelper::toPHPDateFormat($excelDateFormat);
$this->assertEquals($expectedPHPDateFormat, $phpDateFormat);
}
}

View File

@ -59,6 +59,16 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWithGeneralFormat()
{
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => true, 'numFmtId' => 0]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/

View File

@ -23,7 +23,7 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
{
return [
['/path/to/fake/file.xlsx'],
['file_with_no_sheets_in_content_types.xlsx'],
['file_with_no_sheets_in_workbook_xml.xlsx'],
['file_with_sheet_xml_not_matching_content_types.xlsx'],
['file_corrupted.xlsx'],
];
@ -181,6 +181,43 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportDifferentTimesAsNumericTimestamp()
{
// make sure dates are always created with the same timezone
date_default_timezone_set('UTC');
$allRows = $this->getAllRowsForFile('sheet_with_different_numeric_value_times.xlsx');
$expectedRows = [
[
\DateTime::createFromFormat('Y-m-d H:i:s', '1900-01-01 00:00:00'),
\DateTime::createFromFormat('Y-m-d H:i:s', '1900-01-01 11:29:00'),
\DateTime::createFromFormat('Y-m-d H:i:s', '1900-01-01 23:29:00'),
\DateTime::createFromFormat('Y-m-d H:i:s', '1900-01-01 01:42:25'),
\DateTime::createFromFormat('Y-m-d H:i:s', '1900-01-01 13:42:25'),
]
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldSupportFormatDatesAndTimesIfSpecified()
{
$shouldFormatDates = true;
$allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $shouldFormatDates);
$expectedRows = [
['1/13/2016', '01/13/2016', '13-Jan-16', 'Wednesday January 13, 16', 'Today is 1/13/2016'],
['4:43:25', '04:43', '4:43', '4:43:25 AM', '4:43:25 PM'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
@ -481,14 +518,16 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
/**
* @param string $fileName
* @param bool|void $shouldFormatDates
* @return array All the read rows the given file
*/
private function getAllRowsForFile($fileName)
private function getAllRowsForFile($fileName, $shouldFormatDates = false)
{
$allRows = [];
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldFormatDates($shouldFormatDates);
$reader->open($resourcePath);
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {

Binary file not shown.

Binary file not shown.

Binary file not shown.