Merge pull request #60 from Lewiscowles1986/Update-XLSX-Reader

Separated getCellValue into multiple functions
This commit is contained in:
Adrien Loison 2015-07-06 10:12:35 -07:00
commit 3edb056286
3 changed files with 139 additions and 38 deletions

View File

@ -19,6 +19,14 @@ use Box\Spout\Reader\Helper\XLSX\WorksheetHelper;
*/
class XLSX extends AbstractReader
{
const CELL_TYPE_INLINE_STRING = 'inlineStr';
const CELL_TYPE_STR = 'str';
const CELL_TYPE_SHARED_STRING = 's';
const CELL_TYPE_BOOLEAN = 'b';
const CELL_TYPE_NUMERIC = 'n';
const CELL_TYPE_DATE = 'd';
const CELL_TYPE_ERROR = 'e';
/** @var string Real path of the file to read */
protected $filePath;
@ -224,7 +232,115 @@ class XLSX extends AbstractReader
}
/**
* Returns the (unescaped) cell value associated to the given XML node.
* Returns the cell's string value from a node's nested value node
*
* @param \DOMNode $node
* @return string The value associated with the cell
*/
protected function getVNodeValue($node)
{
// for cell types having a "v" tag containing the value.
// if not, the returned value should be empty string.
$vNode = $node->getElementsByTagName('v')->item(0);
if ($vNode !== null) {
return $vNode->nodeValue;
}
return "";
}
/**
* Returns the cell String value where string is inline.
*
* @param \DOMNode $node
* @param \Box\Spout\Common\Escaper\XLSX $escaper
* @return string The value associated with the cell (null when the cell has an error)
*/
protected function formatInlineStringCellValue($node, $escaper)
{
// inline strings are formatted this way:
// <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
$tNode = $node->getElementsByTagName('t')->item(0);
$escapedCellValue = trim($tNode->nodeValue);
$cellValue = $escaper->unescape($escapedCellValue);
return $cellValue;
}
/**
* Returns the cell String value from shared-strings file using nodeValue index.
*
* @param string $nodeValue
* @param \Box\Spout\Common\Escaper\XLSX $escaper
* @return string The value associated with the cell (null when the cell has an error)
*/
protected function formatSharedStringCellValue($nodeValue, $escaper)
{
// shared strings are formatted this way:
// <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
$sharedStringIndex = intval($nodeValue);
$escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
$cellValue = $escaper->unescape($escapedCellValue);
return $cellValue;
}
/**
* Returns the cell String value, where string is stored in value node.
*
* @param string $nodeValue
* @param \Box\Spout\Common\Escaper\XLSX $escaper
* @return string The value associated with the cell (null when the cell has an error)
*/
protected function formatStrCellValue($nodeValue, $escaper)
{
$escapedCellValue = trim($nodeValue);
$cellValue = $escaper->unescape($escapedCellValue);
return $cellValue;
}
/**
* Returns the cell Numeric value from string of nodeValue.
*
* @param string $nodeValue
* @param \Box\Spout\Common\Escaper\XLSX $escaper
* @return int|float The value associated with the cell
*/
protected function formatNumericCellValue($nodeValue)
{
$cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue);
return $cellValue;
}
/**
* Returns the cell Boolean value from a specific node's Value.
*
* @param string $nodeValue
* @return bool The value associated with the cell
*/
protected function formatBooleanCellValue($nodeValue)
{
// !! is similar to boolval()
$cellValue = !!$nodeValue;
return $cellValue;
}
/**
* Returns a cell's PHP Date value, associated to the given stored nodeValue.
*
* @param string $nodeValue
* @param \Box\Spout\Common\Escaper\XLSX $escaper
* @return DateTime|null The value associated with the cell (null when the cell has an error)
*/
protected function formatDateCellValue($nodeValue)
{
try { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
$cellValue = new \DateTime($nodeValue);
return $cellValue;
} catch (\Exception $e) {
return null;
}
}
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMNode $node
* @param \Box\Spout\Common\Escaper\XLSX $escaper
@ -232,47 +348,30 @@ class XLSX extends AbstractReader
*/
protected function getCellValue($node, $escaper)
{
$cellValue = '';
// Default cell type is "n"
$cellType = $node->getAttribute('t') ?: 'n';
if ($cellType === 'inlineStr') {
// inline strings are formatted this way:
// <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
$tNode = $node->getElementsByTagName('t')->item(0);
$escapedCellValue = trim($tNode->nodeValue);
$cellValue = $escaper->unescape($escapedCellValue);
} else {
// all other cell types should have a "v" tag containing the value.
// if not, the returned value should be empty string.
$vNode = $node->getElementsByTagName('v')->item(0);
if ($vNode !== null) {
if ($cellType === 's') {
// shared strings are formatted this way:
// <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
$sharedStringIndex = intval($vNode->nodeValue);
$escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
$cellValue = $escaper->unescape($escapedCellValue);
} else if ($cellType === 'b') {
// !! is similar to boolval()
$cellValue = !!$vNode->nodeValue;
} else if ($cellType === 'n') {
$nodeValue = $vNode->nodeValue;
$cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue);
} else if ($cellType === 'd') {
$cellValue = new \DateTime($vNode->nodeValue);
} else if ($cellType === 'e') {
$cellValue = null;
} else if ($cellType === 'str') {
$escapedCellValue = trim($vNode->nodeValue);
$cellValue = $escaper->unescape($escapedCellValue);
}
}
$vNodeValue = $this->getVNodeValue($node);
if ( ($vNodeValue === "") && ($cellType !== self::CELL_TYPE_INLINE_STRING) ) {
return $vNodeValue;
}
return $cellValue;
switch ($cellType)
{
case self::CELL_TYPE_INLINE_STRING:
return $this->formatInlineStringCellValue($node, $escaper);
case self::CELL_TYPE_SHARED_STRING:
return $this->formatSharedStringCellValue($vNodeValue, $escaper);
case self::CELL_TYPE_STR:
return $this->formatStrCellValue($vNodeValue, $escaper);
case self::CELL_TYPE_BOOLEAN:
return $this->formatBooleanCellValue($vNodeValue);
case self::CELL_TYPE_NUMERIC:
return $this->formatNumericCellValue($vNodeValue);
case self::CELL_TYPE_DATE:
return $this->formatDateCellValue($vNodeValue);
default:
return null;
}
}
/**

View File

@ -129,6 +129,8 @@ class XLSXTest extends \PHPUnit_Framework_TestCase
\DateTime::createFromFormat('Y-m-d H:i:s', '2015-06-01 00:00:00'),
10, 10.43,
null,
'weird string', // valid 'str' string
null, // invalid date
],
['', '', '', '', '', '', '', '', ''],
];