Better support for Date custom format (#316)

- To determine if a style should apply a date format, the presence of "applyNumberFormat" attribute on the "cellXfs" section of styles.xml is now optional. We only look at the "numFmtId" attribute (but early return if "applyNumberFormat" is set to "0").
- The format code can contain lowercase AND now uppercase characters as its pattern.
- "General" format code used as a custom format is now supported. It seems to be used by a bunch of programs...
This commit is contained in:
Adrien Loison 2016-09-24 10:46:42 -07:00 committed by GitHub
parent 30aa1b87e2
commit cc07072cbb
6 changed files with 174 additions and 80 deletions

View File

@ -152,7 +152,7 @@ class SimpleXMLElement
/**
* Returns the immediate children.
*
* @return array The children
* @return SimpleXMLElement[] The children
*/
public function children()
{

View File

@ -228,8 +228,8 @@ class CellValueFormatter
$dateObj->setTime($hours, $minutes, $seconds);
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
$styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;
@ -257,8 +257,8 @@ class CellValueFormatter
$dateObj->modify('+' . $secondsRemainder . 'seconds');
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
$styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;

View File

@ -29,6 +29,8 @@ class StyleHelper
/** By convention, default style ID is 0 */
const DEFAULT_STYLE_ID = 0;
const NUMBER_FORMAT_GENERAL = 'General';
/**
* @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
* @var array Mapping between built-in numFmtId and the associated format - for dates only
@ -51,18 +53,48 @@ class StyleHelper
/** @var string Path of the XLSX file being read */
protected $filePath;
/** @var array Array containing the IDs of built-in number formats indicating a date */
protected $builtinNumFmtIdIndicatingDates;
/** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
protected $customNumberFormats;
/** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
protected $stylesAttributes;
/** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
protected $numFmtIdToIsDateFormatCache = [];
/**
* @param string $filePath Path of the XLSX file being read
*/
public function __construct($filePath)
{
$this->filePath = $filePath;
$this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
}
/**
* Returns whether the style with the given ID should consider
* numeric values as timestamps and format the cell as a date.
*
* @param int $styleId Zero-based style ID
* @return bool Whether the cell with the given cell should display a date instead of a numeric value
*/
public function shouldFormatNumericValueAsDate($styleId)
{
$stylesAttributes = $this->getStylesAttributes();
// Default style (0) does not format numeric values as timestamps. Only custom styles do.
// Also if the style ID does not exist in the styles.xml file, format as numeric value.
// Using isset here because it is way faster than array_key_exists...
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
return false;
}
$styleAttributes = $stylesAttributes[$styleId];
return $this->doesStyleIndicateDate($styleAttributes);
}
/**
@ -125,9 +157,15 @@ class StyleHelper
{
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
$numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
$normalizedNumFmtId = ($numFmtId !== null) ? intval($numFmtId) : null;
$applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT);
$normalizedApplyNumberFormat = ($applyNumberFormat !== null) ? !!$applyNumberFormat : null;
$this->stylesAttributes[] = [
self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)),
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!($xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)),
self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId,
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat,
];
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
// Once done reading "cellXfs" node's children
@ -161,86 +199,92 @@ class StyleHelper
}
/**
* Returns whether the style with the given ID should consider
* numeric values as timestamps and format the cell as a date.
*
* @param int $styleId Zero-based style ID
* @return bool Whether the cell with the given cell should display a date instead of a numeric value
* @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
* @return bool Whether the style with the given attributes indicates that the number is a date
*/
public function shouldFormatNumericValueAsDate($styleId)
protected function doesStyleIndicateDate($styleAttributes)
{
$stylesAttributes = $this->getStylesAttributes();
// Default style (0) does not format numeric values as timestamps. Only custom styles do.
// Also if the style ID does not exist in the styles.xml file, format as numeric value.
// Using isset here because it is way faster than array_key_exists...
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
return false;
}
$styleAttributes = $stylesAttributes[$styleId];
$applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
if (!$applyNumberFormat) {
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
// A style may apply a date format if it has:
// - "applyNumberFormat" attribute not set to "false"
// - "numFmtId" attribute set
// This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format,
// but this is not necessarily a date.
if ($applyNumberFormat === false || $numFmtId === null) {
return false;
}
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
return $this->doesNumFmtIdIndicateDate($numFmtId);
}
/**
* Returns whether the number format ID indicates that the number is a date.
* The result is cached to avoid recomputing the same thing over and over, as
* "numFmtId" attributes can be shared between multiple styles.
*
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
* @return bool Whether the number format ID indicates that the number is a date
*/
protected function doesNumFmtIdIndicateDate($numFmtId)
{
return (
!$this->doesNumFmtIdIndicateGeneralFormat($numFmtId) &&
(
if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) {
$formatCode = $this->getFormatCodeForNumFmtId($numFmtId);
$this->numFmtIdToIsDateFormatCache[$numFmtId] = (
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
$this->isNumFmtIdCustomDateFormat($numFmtId)
)
);
$this->isFormatCodeCustomDateFormat($formatCode)
);
}
return $this->numFmtIdToIsDateFormatCache[$numFmtId];
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates the "General" format (0 by convention)
* @return string|null The custom number format or NULL if none defined for the given numFmtId
*/
protected function doesNumFmtIdIndicateGeneralFormat($numFmtId)
{
return ($numFmtId === 0);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
*/
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
$builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
return in_array($numFmtId, $builtInDateFormatIds);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
*/
protected function isNumFmtIdCustomDateFormat($numFmtId)
protected function getFormatCodeForNumFmtId($numFmtId)
{
$customNumberFormats = $this->getCustomNumberFormats();
// Using isset here because it is way faster than array_key_exists...
if (!isset($customNumberFormats[$numFmtId])) {
return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null;
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a date
*/
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
return in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates);
}
/**
* @param string|null $formatCode
* @return bool Whether the given format code indicates that the number is a date
*/
protected function isFormatCodeCustomDateFormat($formatCode)
{
// if no associated format code or if using the default "General" format
if ($formatCode === null || strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL) === 0) {
return false;
}
$customNumberFormat = $customNumberFormats[$numFmtId];
return $this->isFormatCodeMatchingDateFormatPattern($formatCode);
}
/**
* @param string $formatCode
* @return bool Whether the given format code matches a date format pattern
*/
protected function isFormatCodeMatchingDateFormatPattern($formatCode)
{
// Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
$pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
$customNumberFormat = preg_replace($pattern, '', $customNumberFormat);
$formatCode = preg_replace($pattern, '', $formatCode);
// custom date formats contain specific characters to represent the date:
// e - yy - m - d - h - s
@ -249,10 +293,10 @@ class StyleHelper
$hasFoundDateFormatCharacter = false;
foreach ($dateFormatCharacters as $dateFormatCharacter) {
// character not preceded by "\"
$pattern = '/(?<!\\\)' . $dateFormatCharacter . '/';
// character not preceded by "\" (case insensitive)
$pattern = '/(?<!\\\)' . $dateFormatCharacter . '/i';
if (preg_match($pattern, $customNumberFormat)) {
if (preg_match($pattern, $formatCode)) {
$hasFoundDateFormatCharacter = true;
break;
}
@ -266,21 +310,21 @@ class StyleHelper
* NOTE: It is assumed that the style DOES have a number format associated to it.
*
* @param int $styleId Zero-based style ID
* @return string The number format associated with the given style
* @return string The number format code associated with the given style
*/
public function getNumberFormat($styleId)
public function getNumberFormatCode($styleId)
{
$stylesAttributes = $this->getStylesAttributes();
$styleAttributes = $stylesAttributes[$styleId];
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
$numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
$numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
} else {
$customNumberFormats = $this->getCustomNumberFormats();
$numberFormat = $customNumberFormats[$numFmtId];
$numberFormatCode = $customNumberFormats[$numFmtId];
}
return $numberFormat;
return $numberFormatCode;
}
}

View File

@ -11,16 +11,16 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
{
/**
* @param array $styleAttributes
* @param array|void $styleAttributes
* @param array|void $customNumberFormats
* @return StyleHelper
*/
private function getStyleHelperMock($styleAttributes, $customNumberFormats = [])
private function getStyleHelperMock($styleAttributes = [], $customNumberFormats = [])
{
/** @var StyleHelper $styleHelper */
$styleHelper = $this->getMockBuilder('\Box\Spout\Reader\XLSX\Helper\StyleHelper')
->setConstructorArgs(['/path/to/file.xlsx'])
->setMethods(['getCustomNumberFormats', 'getStylesAttributes'])
->disableOriginalConstructor()
->getMock();
$styleHelper->method('getStylesAttributes')->willReturn($styleAttributes);
@ -34,27 +34,17 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
*/
public function testShouldFormatNumericValueAsDateWithDefaultStyle()
{
$styleHelper = $this->getStyleHelperMock([]);
$styleHelper = $this->getStyleHelperMock();
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(0);
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWhenStyleIdNotListed()
{
$styleHelper = $this->getStyleHelperMock([['applyNumberFormat' => true]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWhenShouldNotApplyNumberFormat()
{
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => false]]);
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => false, 'numFmtId' => 14]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertFalse($shouldFormatAsDate);
}
@ -69,6 +59,26 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWithNonDateBuiltinFormat()
{
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => true, 'numFmtId' => 9]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWithNoNumFmtId()
{
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => true, 'numFmtId' => null]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
@ -84,6 +94,28 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
}
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWhenApplyNumberFormatNotSetAndUsingBuiltinDateFormat()
{
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => null, 'numFmtId' => 14]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertTrue($shouldFormatAsDate);
}
/**
* @return void
*/
public function testShouldFormatNumericValueAsDateWhenApplyNumberFormatNotSetAndUsingBuiltinNonDateFormat()
{
$styleHelper = $this->getStyleHelperMock([[], ['applyNumberFormat' => null, 'numFmtId' => 9]]);
$shouldFormatAsDate = $styleHelper->shouldFormatNumericValueAsDate(1);
$this->assertFalse($shouldFormatAsDate);
}
/**
* @return void
*/
@ -106,6 +138,7 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
['[$-409]d\-mmm\-yy;@', true],
['[$-409]d\-mmm\-yyyy;@', true],
['mm/dd/yy;@', true],
['MM/DD/YY;@', true],
['[$-F800]dddd\,\ mmmm\ dd\,\ yyyy', true],
['m/d;@', true],
['m/d/yy;@', true],
@ -117,9 +150,11 @@ class StyleHelperTest extends \PHPUnit_Framework_TestCase
['[$-409]m/d/yy\ h:mm\ AM/PM;@', true],
['m/d/yy\ h:mm;@', true],
['[$-409]mmmmm;@', true],
['[$-409]MMmmM;@', true],
['[$-409]mmmmm\-yy;@', true],
['m/d/yyyy;@', true],
['[$-409]m/d/yy\--h:mm;@', true],
['General', false],
['GENERAL', false],
['\ma\yb\e', false],
['[Red]foo;', false],

View File

@ -279,6 +279,21 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/
public function testReadShouldApplyCustomDateFormatNumberEvenIfApplyNumberFormatNotSpecified()
{
$shouldFormatDates = true;
$allRows = $this->getAllRowsForFile('sheet_with_custom_date_formats_and_no_apply_number_format.xlsx', $shouldFormatDates);
$expectedRows = [
// "General", "GENERAL", "MM/DD/YYYY", "MM/dd/YYYY", "H:MM:SS"
['42382', '42382', '01/13/2016', '01/13/2016', '4:43:25'],
];
$this->assertEquals($expectedRows, $allRows);
}
/**
* @return void
*/