diff --git a/README.md b/README.md index 6bb73aa..a34c365 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ $reader->setEncoding('UTF-16LE'); The writer always generate CSV files encoded in UTF-8, with a BOM. -### Configuring the XLSX and ODS writers +### Configuring the XLSX and ODS readers and writers #### Row styling @@ -163,7 +163,6 @@ Font | Bold | `StyleBuilder::setFontBold()` | Font color | `StyleBuilder::setFontColor(Color::BLUE)`
`StyleBuilder::setFontColor(Color::rgb(0, 128, 255))` Alignment | Wrap text | `StyleBuilder::setShouldWrapText()` - #### New sheet creation It is also possible to change the behavior of the writer when the maximum number of rows (1,048,576) have been written in the current sheet: @@ -208,6 +207,20 @@ $writer->setShouldUseInlineStrings(false); // will use shared strings > Apple's products (Numbers and the iOS previewer) don't support inline strings and display empty cells instead. Therefore, if these platforms need to be supported, make sure to use shared strings! +#### Date/Time formatting + +When reading a spreadsheet containing dates or times, Spout returns the values by default as DateTime objects. +It is possible to change this behavior and have a formatted date returned instead (e.g. "2016-11-29 1:22 AM"). The format of the date corresponds to what is specified in the spreadsheet. + +```php +use Box\Spout\Reader\ReaderFactory; +use Box\Spout\Common\Type; + +$reader = ReaderFactory::create(Type::XLSX); +$reader->setShouldFormatDates(false); // default value +$reader->setShouldFormatDates(true); // will return formatted dates +``` + ### Playing with sheets When creating a XLSX or ODS file, it is possible to control which sheet the data will be written into. At any time, you can retrieve or set the current sheet: diff --git a/src/Spout/Reader/AbstractReader.php b/src/Spout/Reader/AbstractReader.php index d6d38e2..cb476ab 100644 --- a/src/Spout/Reader/AbstractReader.php +++ b/src/Spout/Reader/AbstractReader.php @@ -19,6 +19,9 @@ abstract class AbstractReader implements ReaderInterface /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; + /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ + protected $shouldFormatDates = false; + /** * Returns whether stream wrappers are supported * @@ -49,7 +52,7 @@ abstract class AbstractReader implements ReaderInterface abstract protected function closeReader(); /** - * @param $globalFunctionsHelper + * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper * @return AbstractReader */ public function setGlobalFunctionsHelper($globalFunctionsHelper) @@ -58,6 +61,18 @@ abstract class AbstractReader implements ReaderInterface return $this; } + /** + * Sets whether date/time values should be returned as PHP objects or be formatted as strings. + * + * @param bool $shouldFormatDates + * @return AbstractReader + */ + public function setShouldFormatDates($shouldFormatDates) + { + $this->shouldFormatDates = $shouldFormatDates; + return $this; + } + /** * Prepares the reader to read the given file. It also makes sure * that the file exists and is readable. diff --git a/src/Spout/Reader/ODS/Helper/CellValueFormatter.php b/src/Spout/Reader/ODS/Helper/CellValueFormatter.php index 3eb1918..b39af21 100644 --- a/src/Spout/Reader/ODS/Helper/CellValueFormatter.php +++ b/src/Spout/Reader/ODS/Helper/CellValueFormatter.php @@ -34,14 +34,19 @@ class CellValueFormatter const XML_ATTRIBUTE_CURRENCY = 'office:currency'; const XML_ATTRIBUTE_C = 'text:c'; + /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ + protected $shouldFormatDates; + /** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */ protected $escaper; /** - * + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct() + public function __construct($shouldFormatDates) { + $this->shouldFormatDates = $shouldFormatDates; + /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->escaper = new \Box\Spout\Common\Escaper\ODS(); } @@ -122,6 +127,7 @@ class CellValueFormatter { $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); $nodeIntValue = intval($nodeValue); + // The "==" is intentionally not a "===" because only the value matters, not the type $cellValue = ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue); return $cellValue; } @@ -144,15 +150,27 @@ class CellValueFormatter * Returns the cell Date value from the given node. * * @param \DOMNode $node - * @return \DateTime|null The value associated with the cell or NULL if invalid date value + * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value */ protected function formatDateCellValue($node) { - try { - $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE); - return new \DateTime($nodeValue); - } catch (\Exception $e) { - return null; + // The XML node looks like this: + // + // 05/19/16 04:39 PM + // + + if ($this->shouldFormatDates) { + // The date is already formatted in the "p" tag + $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0); + return $nodeWithValueAlreadyFormatted->nodeValue; + } else { + // otherwise, get it from the "date-value" attribute + try { + $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE); + return new \DateTime($nodeValue); + } catch (\Exception $e) { + return null; + } } } @@ -160,15 +178,27 @@ class CellValueFormatter * Returns the cell Time value from the given node. * * @param \DOMNode $node - * @return \DateInterval|null The value associated with the cell or NULL if invalid time value + * @return \DateInterval|string|null The value associated with the cell or NULL if invalid time value */ protected function formatTimeCellValue($node) { - try { - $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE); - return new \DateInterval($nodeValue); - } catch (\Exception $e) { - return null; + // The XML node looks like this: + // + // 01:24:00 PM + // + + if ($this->shouldFormatDates) { + // The date is already formatted in the "p" tag + $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0); + return $nodeWithValueAlreadyFormatted->nodeValue; + } else { + // otherwise, get it from the "time-value" attribute + try { + $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE); + return new \DateInterval($nodeValue); + } catch (\Exception $e) { + return null; + } } } diff --git a/src/Spout/Reader/ODS/Reader.php b/src/Spout/Reader/ODS/Reader.php index b4093ae..a52bafa 100644 --- a/src/Spout/Reader/ODS/Reader.php +++ b/src/Spout/Reader/ODS/Reader.php @@ -42,7 +42,7 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sheetIterator = new SheetIterator($filePath); + $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index aa7a496..e91ad90 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -45,11 +45,12 @@ class RowIterator implements IteratorInterface /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct($xmlReader) + public function __construct($xmlReader, $shouldFormatDates) { $this->xmlReader = $xmlReader; - $this->cellValueFormatter = new CellValueFormatter(); + $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); } /** @@ -186,7 +187,7 @@ class RowIterator implements IteratorInterface /** * empty() replacement that honours 0 as a valid value * - * @param $value The cell value + * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value * @return bool */ protected function isEmptyCellValue($value) diff --git a/src/Spout/Reader/ODS/Sheet.php b/src/Spout/Reader/ODS/Sheet.php index c78e4aa..98d00b1 100644 --- a/src/Spout/Reader/ODS/Sheet.php +++ b/src/Spout/Reader/ODS/Sheet.php @@ -27,12 +27,13 @@ class Sheet implements SheetInterface /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($xmlReader, $sheetIndex, $sheetName) + public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($xmlReader); + $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/ODS/SheetIterator.php b/src/Spout/Reader/ODS/SheetIterator.php index f8683f0..d0010bd 100644 --- a/src/Spout/Reader/ODS/SheetIterator.php +++ b/src/Spout/Reader/ODS/SheetIterator.php @@ -22,6 +22,9 @@ class SheetIterator implements IteratorInterface /** @var string $filePath Path of the file to be read */ protected $filePath; + /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ + protected $shouldFormatDates; + /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -36,11 +39,13 @@ class SheetIterator implements IteratorInterface /** * @param string $filePath Path of the file to be read + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath) + public function __construct($filePath, $shouldFormatDates) { $this->filePath = $filePath; + $this->shouldFormatDates = $shouldFormatDates; $this->xmlReader = new XMLReader(); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ @@ -109,7 +114,7 @@ class SheetIterator implements IteratorInterface $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); - return new Sheet($this->xmlReader, $sheetName, $this->currentSheetIndex); + return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex); } /** diff --git a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php index 046336a..286d348 100644 --- a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php +++ b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php @@ -44,17 +44,22 @@ class CellValueFormatter /** @var StyleHelper Helper to work with styles */ protected $styleHelper; + /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ + protected $shouldFormatDates; + /** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */ protected $escaper; /** * @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings * @param StyleHelper $styleHelper Helper to work with styles + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct($sharedStringsHelper, $styleHelper) + public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates) { $this->sharedStringsHelper = $sharedStringsHelper; $this->styleHelper = $styleHelper; + $this->shouldFormatDates = $shouldFormatDates; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->escaper = new \Box\Spout\Common\Escaper\XLSX(); @@ -168,7 +173,7 @@ class CellValueFormatter $shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId); if ($shouldFormatAsDate) { - return $this->formatExcelTimestampValue(floatval($nodeValue)); + return $this->formatExcelTimestampValue(floatval($nodeValue), $cellStyleId); } else { $nodeIntValue = intval($nodeValue); return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue); @@ -181,9 +186,10 @@ class CellValueFormatter * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1. * * @param float $nodeValue + * @param int $cellStyleId 0 being the default style * @return \DateTime|null The value associated with the cell or NULL if invalid date value */ - protected function formatExcelTimestampValue($nodeValue) + protected function formatExcelTimestampValue($nodeValue, $cellStyleId) { // Fix for the erroneous leap year in Excel if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) { @@ -192,10 +198,10 @@ class CellValueFormatter if ($nodeValue >= 1) { // Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01. - return $this->formatExcelTimestampValueAsDateValue($nodeValue); + return $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId); } else if ($nodeValue >= 0) { // Values between 0 and 1 represent "times". - return $this->formatExcelTimestampValueAsTimeValue($nodeValue); + return $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId); } else { // invalid date return null; @@ -207,9 +213,10 @@ class CellValueFormatter * Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date). * * @param float $nodeValue - * @return \DateTime The value associated with the cell + * @param int $cellStyleId 0 being the default style + * @return \DateTime|string The value associated with the cell */ - protected function formatExcelTimestampValueAsTimeValue($nodeValue) + protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId) { $time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY); $hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR); @@ -220,7 +227,13 @@ class CellValueFormatter $dateObj = new \DateTime('1900-01-01'); $dateObj->setTime($hours, $minutes, $seconds); - return $dateObj; + if ($this->shouldFormatDates) { + $styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId); + $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat); + return $dateObj->format($phpDateFormat); + } else { + return $dateObj; + } } /** @@ -228,9 +241,10 @@ class CellValueFormatter * NOTE: The timestamp is a float representing the number of days since January 1st, 1900. * * @param float $nodeValue - * @return \DateTime|null The value associated with the cell or NULL if invalid date value + * @param int $cellStyleId 0 being the default style + * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value */ - protected function formatExcelTimestampValueAsDateValue($nodeValue) + protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId) { // Do not use any unix timestamps for calculation to prevent // issues with numbers exceeding 2^31. @@ -242,7 +256,13 @@ class CellValueFormatter $dateObj->modify('+' . intval($nodeValue) . 'days'); $dateObj->modify('+' . $secondsRemainder . 'seconds'); - return $dateObj; + if ($this->shouldFormatDates) { + $styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId); + $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat); + return $dateObj->format($phpDateFormat); + } else { + return $dateObj; + } } catch (\Exception $e) { return null; } diff --git a/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php b/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php new file mode 100644 index 0000000..4acbef7 --- /dev/null +++ b/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php @@ -0,0 +1,122 @@ + [ + // Time + 'am/pm' => 'A', // Uppercase Ante meridiem and Post meridiem + ':mm' => ':i', // Minutes with leading zeros - if preceded by a ":" (otherwise month) + 'mm:' => 'i:', // Minutes with leading zeros - if followed by a ":" (otherwise month) + 'ss' => 's', // Seconds, with leading zeros + '.s' => '', // Ignore (fractional seconds format does not exist in PHP) + + // Date + 'e' => 'Y', // Full numeric representation of a year, 4 digits + 'yyyy' => 'Y', // Full numeric representation of a year, 4 digits + 'yy' => 'y', // Two digit representation of a year + 'mmmmm' => 'M', // Short textual representation of a month, three letters ("mmmmm" should only contain the 1st letter...) + 'mmmm' => 'F', // Full textual representation of a month + 'mmm' => 'M', // Short textual representation of a month, three letters + 'mm' => 'm', // Numeric representation of a month, with leading zeros + 'm' => 'n', // Numeric representation of a month, without leading zeros + 'dddd' => 'l', // Full textual representation of the day of the week + 'ddd' => 'D', // Textual representation of a day, three letters + 'dd' => 'd', // Day of the month, 2 digits with leading zeros + 'd' => 'j', // Day of the month without leading zeros + ], + self::KEY_HOUR_12 => [ + 'hh' => 'h', // 12-hour format of an hour without leading zeros + 'h' => 'g', // 12-hour format of an hour without leading zeros + ], + self::KEY_HOUR_24 => [ + 'hh' => 'H', // 24-hour hours with leading zero + 'h' => 'G', // 24-hour format of an hour without leading zeros + ], + ]; + + /** + * Converts the given Excel date format to a format understandable by the PHP date function. + * + * @param string $excelDateFormat Excel date format + * @return string PHP date format (as defined here: http://php.net/manual/en/function.date.php) + */ + public static function toPHPDateFormat($excelDateFormat) + { + // Remove brackets potentially present at the beginning of the format string + $dateFormat = preg_replace('/^(\[\$[^\]]+?\])/i', '', $excelDateFormat); + + // Double quotes are used to escape characters that must not be interpreted. + // For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y" + // By exploding the format string using double quote as a delimiter, we can get all parts + // that must be transformed (even indexes) and all parts that must not be (odd indexes). + $dateFormatParts = explode('"', $dateFormat); + + foreach ($dateFormatParts as $partIndex => $dateFormatPart) { + // do not look at odd indexes + if ($partIndex % 2 === 1) { + continue; + } + + // Make sure all characters are lowercase, as the mapping table is using lowercase characters + $transformedPart = strtolower($dateFormatPart); + + // Remove escapes related to non-format characters + $transformedPart = str_replace('\\', '', $transformedPart); + + // Apply general transformation first... + $transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]); + + // ... then apply hour transformation, for 12-hour or 24-hour format + if (self::has12HourFormatMarker($dateFormatPart)) { + $transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]); + } else { + $transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]); + } + + // overwrite the parts array with the new transformed part + $dateFormatParts[$partIndex] = $transformedPart; + } + + // Merge all transformed parts back together + $phpDateFormat = implode('"', $dateFormatParts); + + // Finally, to have the date format compatible with the DateTime::format() function, we need to escape + // all characters that are inside double quotes (and double quotes must be removed). + // For instance, ["Day " dd] should become [\D\a\y\ dd] + $phpDateFormat = preg_replace_callback('/"(.+?)"/', function($matches) { + $stringToEscape = $matches[1]; + $letters = preg_split('//u', $stringToEscape, -1, PREG_SPLIT_NO_EMPTY); + return '\\' . implode('\\', $letters); + }, $phpDateFormat); + + return $phpDateFormat; + } + + /** + * @param string $excelDateFormat Date format as defined by Excel + * @return bool Whether the given date format has the 12-hour format marker + */ + private static function has12HourFormatMarker($excelDateFormat) + { + return (stripos($excelDateFormat, 'am/pm') !== false); + } +} diff --git a/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/src/Spout/Reader/XLSX/Helper/SheetHelper.php index 23a2b08..5f74f44 100644 --- a/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -30,6 +30,9 @@ class SheetHelper /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; + /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ + protected $shouldFormatDates; + /** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml.rels file */ protected $workbookXMLRelsAsXMLElement; @@ -40,12 +43,14 @@ class SheetHelper * @param string $filePath Path of the XLSX file being read * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) { $this->filePath = $filePath; $this->sharedStringsHelper = $sharedStringsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper; + $this->shouldFormatDates = $shouldFormatDates; } /** @@ -103,7 +108,7 @@ class SheetHelper // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" $sheetDataXMLFilePath = '/xl/' . $relationshipNode->getAttribute('Target'); - return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName); + return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName); } /** diff --git a/src/Spout/Reader/XLSX/Helper/StyleHelper.php b/src/Spout/Reader/XLSX/Helper/StyleHelper.php index 19014b5..462433c 100644 --- a/src/Spout/Reader/XLSX/Helper/StyleHelper.php +++ b/src/Spout/Reader/XLSX/Helper/StyleHelper.php @@ -30,6 +30,25 @@ class StyleHelper /** By convention, default style ID is 0 */ const DEFAULT_STYLE_ID = 0; + /** + * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx + * @var array Mapping between built-in numFmtId and the associated format - for dates only + */ + protected static $builtinNumFmtIdToNumFormatMapping = [ + 14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy' + 15 => 'd-mmm-yy', + 16 => 'd-mmm', + 17 => 'mmm-yy', + 18 => 'h:mm AM/PM', + 19 => 'h:mm:ss AM/PM', + 20 => 'h:mm', + 21 => 'h:mm:ss', + 22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm', + 45 => 'mm:ss', + 46 => '[h]:mm:ss', + 47 => 'mm:ss.0', // @NOTE: ECMA spec is 'mmss.0', + ]; + /** @var string Path of the XLSX file being read */ protected $filePath; @@ -194,7 +213,7 @@ class StyleHelper */ protected function isNumFmtIdBuiltInDateFormat($numFmtId) { - $builtInDateFormatIds = [14, 15, 16, 17, 18, 19, 20, 21, 22, 45, 46, 47]; + $builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping); return in_array($numFmtId, $builtInDateFormatIds); } @@ -235,4 +254,27 @@ class StyleHelper return $hasFoundDateFormatCharacter; } + + /** + * Returns the format as defined in "styles.xml" of the given style. + * NOTE: It is assumed that the style DOES have a number format associated to it. + * + * @param int $styleId Zero-based style ID + * @return string The number format associated with the given style + */ + public function getNumberFormat($styleId) + { + $stylesAttributes = $this->getStylesAttributes(); + $styleAttributes = $stylesAttributes[$styleId]; + $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; + + if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) { + $numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId]; + } else { + $customNumberFormats = $this->getCustomNumberFormats(); + $numberFormat = $customNumberFormats[$numFmtId]; + } + + return $numberFormat; + } } diff --git a/src/Spout/Reader/XLSX/Reader.php b/src/Spout/Reader/XLSX/Reader.php index 42c6f02..bcf02cc 100644 --- a/src/Spout/Reader/XLSX/Reader.php +++ b/src/Spout/Reader/XLSX/Reader.php @@ -69,7 +69,7 @@ class Reader extends AbstractReader $this->sharedStringsHelper->extractSharedStrings(); } - $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper); + $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates); } else { throw new IOException("Could not open $filePath for reading."); } diff --git a/src/Spout/Reader/XLSX/RowIterator.php b/src/Spout/Reader/XLSX/RowIterator.php index d1913bd..c7491ac 100644 --- a/src/Spout/Reader/XLSX/RowIterator.php +++ b/src/Spout/Reader/XLSX/RowIterator.php @@ -59,8 +59,9 @@ class RowIterator implements IteratorInterface * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); @@ -68,7 +69,7 @@ class RowIterator implements IteratorInterface $this->xmlReader = new XMLReader(); $this->styleHelper = new StyleHelper($filePath); - $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper); + $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates); } /** diff --git a/src/Spout/Reader/XLSX/Sheet.php b/src/Spout/Reader/XLSX/Sheet.php index 85a4dc9..a1c7d95 100644 --- a/src/Spout/Reader/XLSX/Sheet.php +++ b/src/Spout/Reader/XLSX/Sheet.php @@ -25,12 +25,13 @@ class Sheet implements SheetInterface * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param Helper\SharedStringsHelper Helper to work with shared strings + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName) { - $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper); + $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates); $this->index = $sheetIndex; $this->name = $sheetName; } diff --git a/src/Spout/Reader/XLSX/SheetIterator.php b/src/Spout/Reader/XLSX/SheetIterator.php index 7b3d3dd..f7a3f59 100644 --- a/src/Spout/Reader/XLSX/SheetIterator.php +++ b/src/Spout/Reader/XLSX/SheetIterator.php @@ -24,12 +24,13 @@ class SheetIterator implements IteratorInterface * @param string $filePath Path of the file to be read * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper + * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper) + public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) { // Fetch all available sheets - $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper); + $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates); $this->sheets = $sheetHelper->getSheets(); if (count($this->sheets) === 0) { diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index 4c95fd9..759d842 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -164,6 +164,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadShouldSupportFormatDatesAndTimesIfSpecified() + { + $shouldFormatDates = true; + $allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.ods', $shouldFormatDates); + + $expectedRows = [ + ['05/19/2016', '5/19/16', '05/19/2016 16:39:00', '05/19/16 04:39 PM', '5/19/2016'], + ['11:29', '13:23:45', '01:23:45', '01:23:45 AM', '01:23:45 PM'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ @@ -455,14 +470,16 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName + * @param bool|void $shouldFormatDates * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName) + private function getAllRowsForFile($fileName, $shouldFormatDates = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); $reader = ReaderFactory::create(Type::ODS); + $reader->setShouldFormatDates($shouldFormatDates); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php b/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php index 6ea5b92..92831ab 100644 --- a/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php +++ b/tests/Spout/Reader/XLSX/Helper/CellValueFormatterTest.php @@ -71,7 +71,7 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase ->with(123) ->will($this->returnValue(true)); - $formatter = new CellValueFormatter(null, $styleHelperMock); + $formatter = new CellValueFormatter(null, $styleHelperMock, false); $result = $formatter->extractAndFormatNodeValue($nodeMock); if ($expectedDateAsString === null) { @@ -120,7 +120,7 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase ->method('shouldFormatNumericValueAsDate') ->will($this->returnValue(false)); - $formatter = new CellValueFormatter(null, $styleHelperMock); + $formatter = new CellValueFormatter(null, $styleHelperMock, false); $formattedValue = \ReflectionHelper::callMethodOnObject($formatter, 'formatNumericCellValue', $value, 0); $this->assertEquals($expectedFormattedValue, $formattedValue); @@ -163,7 +163,7 @@ class CellValueFormatterTest extends \PHPUnit_Framework_TestCase ->with(CellValueFormatter::XML_NODE_INLINE_STRING_VALUE) ->will($this->returnValue($nodeListMock)); - $formatter = new CellValueFormatter(null, null); + $formatter = new CellValueFormatter(null, null, false); $formattedValue = \ReflectionHelper::callMethodOnObject($formatter, 'formatInlineStringCellValue', $nodeMock); $this->assertEquals($expectedFormattedValue, $formattedValue); diff --git a/tests/Spout/Reader/XLSX/Helper/DateFormatHelperTest.php b/tests/Spout/Reader/XLSX/Helper/DateFormatHelperTest.php new file mode 100644 index 0000000..b6d852c --- /dev/null +++ b/tests/Spout/Reader/XLSX/Helper/DateFormatHelperTest.php @@ -0,0 +1,47 @@ +assertEquals($expectedPHPDateFormat, $phpDateFormat); + } +} diff --git a/tests/Spout/Reader/XLSX/ReaderTest.php b/tests/Spout/Reader/XLSX/ReaderTest.php index ee36266..8620ed5 100644 --- a/tests/Spout/Reader/XLSX/ReaderTest.php +++ b/tests/Spout/Reader/XLSX/ReaderTest.php @@ -203,6 +203,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals($expectedRows, $allRows); } + /** + * @return void + */ + public function testReadShouldSupportFormatDatesAndTimesIfSpecified() + { + $shouldFormatDates = true; + $allRows = $this->getAllRowsForFile('sheet_with_dates_and_times.xlsx', $shouldFormatDates); + + $expectedRows = [ + ['1/13/2016', '01/13/2016', '13-Jan-16', 'Wednesday January 13, 16', 'Today is 1/13/2016'], + ['4:43:25', '04:43', '4:43', '4:43:25 AM', '4:43:25 PM'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + /** * @return void */ @@ -503,14 +518,16 @@ class ReaderTest extends \PHPUnit_Framework_TestCase /** * @param string $fileName + * @param bool|void $shouldFormatDates * @return array All the read rows the given file */ - private function getAllRowsForFile($fileName) + private function getAllRowsForFile($fileName, $shouldFormatDates = false) { $allRows = []; $resourcePath = $this->getResourcePath($fileName); $reader = ReaderFactory::create(Type::XLSX); + $reader->setShouldFormatDates($shouldFormatDates); $reader->open($resourcePath); foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) { diff --git a/tests/resources/ods/sheet_with_dates_and_times.ods b/tests/resources/ods/sheet_with_dates_and_times.ods new file mode 100644 index 0000000..0e0fb5f Binary files /dev/null and b/tests/resources/ods/sheet_with_dates_and_times.ods differ diff --git a/tests/resources/xlsx/sheet_with_dates_and_times.xlsx b/tests/resources/xlsx/sheet_with_dates_and_times.xlsx new file mode 100644 index 0000000..769e03b Binary files /dev/null and b/tests/resources/xlsx/sheet_with_dates_and_times.xlsx differ