From 818ec2488c8009b02924190e245702cd622fa1fe Mon Sep 17 00:00:00 2001 From: Adrien Loison Date: Wed, 2 Sep 2015 13:49:03 -0700 Subject: [PATCH] Support all ODS cell types Including: - date / time - currency - percentage - void And improved support for boolean --- .../Reader/ODS/Helper/CellValueFormatter.php | 123 +++++++++++++----- src/Spout/Reader/ODS/RowIterator.php | 2 +- .../Reader/XLSX/Helper/CellValueFormatter.php | 2 +- src/Spout/Writer/ODS/Internal/Worksheet.php | 2 +- tests/Spout/Reader/ODS/ReaderTest.php | 17 +++ .../ods/sheet_with_all_cell_types.ods | Bin 2594 -> 2774 bytes .../ods/sheet_with_invalid_date_time.ods | Bin 0 -> 2582 bytes .../sheet_with_number_columns_repeated.ods | Bin 2592 -> 2599 bytes 8 files changed, 112 insertions(+), 34 deletions(-) create mode 100644 tests/resources/ods/sheet_with_invalid_date_time.ods diff --git a/src/Spout/Reader/ODS/Helper/CellValueFormatter.php b/src/Spout/Reader/ODS/Helper/CellValueFormatter.php index 15a8cad..0a2f18d 100644 --- a/src/Spout/Reader/ODS/Helper/CellValueFormatter.php +++ b/src/Spout/Reader/ODS/Helper/CellValueFormatter.php @@ -12,8 +12,13 @@ class CellValueFormatter { /** Definition of all possible cell types */ const CELL_TYPE_STRING = 'string'; - const CELL_TYPE_BOOLEAN = 'boolean'; const CELL_TYPE_FLOAT = 'float'; + const CELL_TYPE_BOOLEAN = 'boolean'; + const CELL_TYPE_DATE = 'date'; + const CELL_TYPE_TIME = 'time'; + const CELL_TYPE_CURRENCY = 'currency'; + const CELL_TYPE_PERCENTAGE = 'percentage'; + const CELL_TYPE_VOID = 'void'; /** Definition of XML nodes names used to parse data */ const XML_NODE_P = 'p'; @@ -21,6 +26,11 @@ class CellValueFormatter /** Definition of XML attribute used to parse data */ const XML_ATTRIBUTE_TYPE = 'office:value-type'; + const XML_ATTRIBUTE_VALUE = 'office:value'; + const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value'; + const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value'; + const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value'; + const XML_ATTRIBUTE_CURRENCY = 'office:currency'; const XML_ATTRIBUTE_C = 'text:c'; /** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */ @@ -38,45 +48,36 @@ class CellValueFormatter /** * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * @TODO Add other types !! + * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13 * * @param \DOMNode $node - * @return string|int|float|bool The value associated with the cell (or empty string if cell's type is undefined) + * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error */ public function extractAndFormatNodeValue($node) { $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE); - $pNodeValue = $this->getFirstPNodeValue($node); switch ($cellType) { case self::CELL_TYPE_STRING: return $this->formatStringCellValue($node); case self::CELL_TYPE_FLOAT: - return $this->formatFloatCellValue($pNodeValue); + return $this->formatFloatCellValue($node); case self::CELL_TYPE_BOOLEAN: - return $this->formatBooleanCellValue($pNodeValue); + return $this->formatBooleanCellValue($node); + case self::CELL_TYPE_DATE: + return $this->formatDateCellValue($node); + case self::CELL_TYPE_TIME: + return $this->formatTimeCellValue($node); + case self::CELL_TYPE_CURRENCY: + return $this->formatCurrencyCellValue($node); + case self::CELL_TYPE_PERCENTAGE: + return $this->formatPercentageCellValue($node); + case self::CELL_TYPE_VOID: default: return ''; } } - /** - * Returns the value of the first "" node within the given node. - * - * @param \DOMNode $node - * @return string Value for the first "" node or empty string if no "" found - */ - protected function getFirstPNodeValue($node) - { - $nodeValue = ''; - $pNodes = $node->getElementsByTagName(self::XML_NODE_P); - - if ($pNodes->length > 0) { - $nodeValue = $pNodes->item(0)->nodeValue; - } - - return $nodeValue; - } - /** * Returns the cell String value. * @@ -110,27 +111,87 @@ class CellValueFormatter } /** - * Returns the cell Numeric value from string of nodeValue. + * Returns the cell Numeric value from the given node. * - * @param string $pNodeValue + * @param \DOMNode $node * @return int|float The value associated with the cell */ - protected function formatFloatCellValue($pNodeValue) + protected function formatFloatCellValue($node) { - $cellValue = is_int($pNodeValue) ? intval($pNodeValue) : floatval($pNodeValue); + $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); + $cellValue = is_int($nodeValue) ? intval($nodeValue) : floatval($nodeValue); return $cellValue; } /** - * Returns the cell Boolean value from a specific node's Value. + * Returns the cell Boolean value from the given node. * - * @param string $pNodeValue + * @param \DOMNode $node * @return bool The value associated with the cell */ - protected function formatBooleanCellValue($pNodeValue) + protected function formatBooleanCellValue($node) { + $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE); // !! is similar to boolval() - $cellValue = !!$pNodeValue; + $cellValue = !!$nodeValue; return $cellValue; } + + /** + * Returns the cell Date value from the given node. + * + * @param \DOMNode $node + * @return \DateTime|null The value associated with the cell or NULL if invalid date value + */ + protected function formatDateCellValue($node) + { + try { + $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE); + return new \DateTime($nodeValue); + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns the cell Time value from the given node. + * + * @param \DOMNode $node + * @return \DateInterval|null The value associated with the cell or NULL if invalid time value + */ + protected function formatTimeCellValue($node) + { + try { + $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE); + return new \DateInterval($nodeValue); + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns the cell Currency value from the given node. + * + * @param \DOMNode $node + * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR") + */ + protected function formatCurrencyCellValue($node) + { + $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); + $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY); + + return "$value $currency"; + } + + /** + * Returns the cell Percentage value from the given node. + * + * @param \DOMNode $node + * @return int|float The value associated with the cell + */ + protected function formatPercentageCellValue($node) + { + // percentages are formatted like floats + return $this->formatFloatCellValue($node); + } } diff --git a/src/Spout/Reader/ODS/RowIterator.php b/src/Spout/Reader/ODS/RowIterator.php index 7a3745f..def7712 100644 --- a/src/Spout/Reader/ODS/RowIterator.php +++ b/src/Spout/Reader/ODS/RowIterator.php @@ -174,7 +174,7 @@ class RowIterator implements IteratorInterface * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node - * @return string|int|float|bool The value associated with the cell (or empty string if cell's type is undefined) + * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error */ protected function getCellValue($node) { diff --git a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php index 99d4920..79f92e7 100644 --- a/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php +++ b/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php @@ -165,7 +165,7 @@ class CellValueFormatter * Returns a cell's PHP Date value, associated to the given stored nodeValue. * * @param string $nodeValue - * @return \DateTime|null The value associated with the cell (null when the cell has an error) + * @return \DateTime|null The value associated with the cell or NULL if invalid date value */ protected function formatDateCellValue($nodeValue) { diff --git a/src/Spout/Writer/ODS/Internal/Worksheet.php b/src/Spout/Writer/ODS/Internal/Worksheet.php index a3d5d76..19305f0 100644 --- a/src/Spout/Writer/ODS/Internal/Worksheet.php +++ b/src/Spout/Writer/ODS/Internal/Worksheet.php @@ -194,7 +194,7 @@ class Worksheet implements WorksheetInterface $data .= ''; } else if (CellHelper::isBoolean($cellValue)) { - $data .= ' office:value-type="boolean" calcext:value-type="boolean" office:value="' . $cellValue . '">'; + $data .= ' office:value-type="boolean" calcext:value-type="boolean" office:boolean-value="' . $cellValue . '">'; $data .= '' . $cellValue . ''; $data .= ''; } else if (CellHelper::isNumeric($cellValue)) { diff --git a/tests/Spout/Reader/ODS/ReaderTest.php b/tests/Spout/Reader/ODS/ReaderTest.php index 81d808e..df04ffc 100644 --- a/tests/Spout/Reader/ODS/ReaderTest.php +++ b/tests/Spout/Reader/ODS/ReaderTest.php @@ -143,6 +143,9 @@ class ReaderTest extends \PHPUnit_Framework_TestCase */ public function testReadShouldSupportAllCellTypes() { + $utcTz = new \DateTimeZone('UTC'); + $honoluluTz = new \DateTimeZone('Pacific/Honolulu'); // UTC-10 + $allRows = $this->getAllRowsForFile('sheet_with_all_cell_types.ods'); $expectedRows = [ @@ -150,6 +153,11 @@ class ReaderTest extends \PHPUnit_Framework_TestCase 'ods--11', 'ods--12', true, false, 0, 10.43, + new \DateTime('1987-11-29T00:00:00', $utcTz), new \DateTime('1987-11-29T13:37:00', $utcTz), + new \DateTime('1987-11-29T13:37:00', $utcTz), new \DateTime('1987-11-29T13:37:00', $honoluluTz), + new \DateInterval('PT13H37M00S'), + 0, 0.42, + '42 USD', '9.99 EUR', '', ], ]; @@ -165,6 +173,15 @@ class ReaderTest extends \PHPUnit_Framework_TestCase $this->assertEquals([['ods--11', '', 'ods--13']], $allRows); } + /** + * @return void + */ + public function testReadShouldReturnNullOnInvalidDateOrTime() + { + $allRows = $this->getAllRowsForFile('sheet_with_invalid_date_time.ods'); + $this->assertEquals([[null, null]], $allRows); + } + /** * @return void */ diff --git a/tests/resources/ods/sheet_with_all_cell_types.ods b/tests/resources/ods/sheet_with_all_cell_types.ods index 5843ac88d8de8c682c9cd2686de6d12c6dd3884b..440f21c87ff810bebb710aa0986932213deeb6de 100644 GIT binary patch delta 1078 zcmV-61j+lN6xJ1gP)h>@6aWAK2mpC*B1ekI4r#3e0052?000XB003ieZggdCbS`*p zY~5H*Z`(K!z4uoL0`^FK*eOyAaf$`HMK3Lirn&S+QzKg_B?=@J+yA~piIgQpN{kjY zlJ&s`IWuqG8xASaH}4Mx_Z~p1T$F1+4kF)!l8H1gw`>1@{`*Jr&i9n2WlFgyVeKD5 z`R{Mv+=wj886>G-bpfR&Oq3dbJv-k+T=*X5C{>cOwZGH4O2V+JB@cw$hA9hygF#SX z9K@mXWt1~G=(S(VGC{{KQ6_5Sq7geOX#pyMLj^K7Cd!+zWPp$~k}Ls9_ZE?=Tv-6k z5>Y|v5+o;oU#vktJQL>z9~38j&h3FBK9Eq?xd2qf>P(8 zyi6p28wvC$*#Pb!j5bv<_4{2$QtXFxw>xaf_-&=#e64_pO6LXT zr0=6;4S%4+!Q-i;L6VAY-J%s$JQS$utiYbQc6u*!jRz(vM2eAI620-gcH^bFsyID9 z$-HaonclV5jYy9@Ms`+JSlm?Y05l@CNt^8S9o_7N zL8~_#;c*_*t1eG-Vp}*xBve2(q-M3dYV`BsVov~$x@|xdIZaV8%-jQ+@*0RfRyeP; z#Fg?r_lTUZof5Ulj9YapQbkA{--LF2ViS((ya=X?%Q4*u!GV_3R6AqW#m$&BKG+6- zGa};GK|h+IA6<@q#s$^WSzaem^el7FdA?W)KmG?ANE}^sF=qDelszOk@afJT!D%b~Cix~k|i%TkCwZzL4 zJ5KU{WONg2|Nid7Skn`VJam5rqW-)8HWsxCR&!4hWAZTM?+ELzDWf-Yb5-e0^^;EB zMh2Gl_R1QY-O0001cZX&ZU1Cj*}d2S*{ipUOWtpfl6juVs52rC17 wZX%QY2pa*`lNkv%0sWIu2|@xL1(TEsGy+8ilgbG+0+$Ao3LFM$2><{907GBt(f|Me delta 921 zcmV;K17`fz6`~Y>P)h>@6aWAK2msZCA4gZ1OGxeS4fCQUVsm9oJkw4Z?c_U{vJEAQ>Qd+1!A@Ip;hK` z&YAIeqOP8foMn5^rj=riip3ITAgHWcv0bAd-@o9GC^MFSh?+1dV2w^-(9`B_C7Y(D zP}Y)m9E8PG3hREeU{4V5QRX;=DQmh$J8M;$=c?0eA@w$|X$}lLg2{_Tk;hm{7=@!< zqfU#`1&&0G?3@A;laGdf;YPZiaOI>{3{n_wy4Dh!?gdjO2Lb71`kI1yFnsfAvCcGqIm}cnVwmXuCQL@XzX-qb_*soS{l*S( z25V42EYxfCVQfZ9>(2TL-o>y@bUI1b6nJ6reN0#f*q)TL8>`(xp)9^3ulUYH1DOv~ z8(ABSi(-|B=d+spQqG%DdjEP#D=8Teab4=@OcUK*pAlcYZQ08i*~{0H-7rb)b<_oG%uVU(ksuCB3$TC!S$vkB^ss88$xwP)h>@6aWAK007p5 zAG6~Fk_8UcgC9p%m`h0c0ssJn43mQiD+AVpACs{O8v!Dd%?UOENs|N$LIP<7lSm3Q v0+9uiehM@s;{^Z!b98xZWpgfgZER3W1qJ{B000I61^`z9006ZJ00000w{M>B diff --git a/tests/resources/ods/sheet_with_invalid_date_time.ods b/tests/resources/ods/sheet_with_invalid_date_time.ods new file mode 100644 index 0000000000000000000000000000000000000000..b823ee5a6971ad51be0e9aa1b1355c4d34438850 GIT binary patch literal 2582 zcmZ{mc{mi@9>>SnvsALLb8XpTD0@QowJ|~rH-jO}jP49Il(J753U7?Z?$mwnRi5{r^PGRqIp5!Tp6_$MpK~BK%q;u>0DuGV7%goPkYw$& z0t5i^H~|1I#;kV`Iv9ZtR>h%^Pn_SON5RLM=apHg{$`3t&YYhC8M%RsvTXYP$(89g zeBLq{q-ePR+~#hLVuyXg0K3>oN7pijNFU#Gjz{vmt}7u~syE4t1iMpE&r07sB?fJB zb(VU2bjK_`l-949xpga~FtcD5G7$fnPtT%ZNqpb8EeAwR;?~uJ4KzzD&j)ABH)?wo zbZI;dL$1*cUitGxM;oG_k(Emirr8cY;~SB{k_ z4h591F5>~*BU+mXalSQC*#_AK^*`x^DPPI2I0?<~kB#6rcdHfHvA^^j*tu+5H>N-m z)qYuebJ>d*#w#84Aero`-tuV{*wr2a?kF9#lS^19o{y2SqqDc#^i5QZZqcX5w&=D( z5-@jFI7g0;lb0E0_*jGiSYE3fsPAsojqSfMt~o?pxMWyEGPA$E#;Bu5Fr8mJQ@bDp2Zt61bn(YEvUa|1C&5Upqh!)e z!+t+sZJbaERW3e?5`aXbTmZbu<{zi@IV?~14f1#8PZeGugcNkb3%rigOpj)PMYiC_ zsq`_Xne}b-dyd!+c*IBo>CSyt9XwU#_~KPKmxyRL&4Ghz6k8k7}ch<1-t}v$E}4Q zq1~8X(XvgcGU3uk7OpaxmX@=@w34Bj&vH*MRH(N@zIgr*q z>6ayan>kCa{kB!{RIYw<>D_mkDd>v>Ti#_9R@-boxjYsrWc3nSlIY0`b&8^UfR>_7AV#t%UW7O$l6T zi)L-ZIS3wZi4Ttv2+gS;dP2FE5Tx5?3RZTY9C=_OQc9xgkK&`s9u%I!ScEoT%tN#) z(WW)0@IGqx((F0L*KkwfI%-2|tKF|#=mZ3tpBUYu;p@AlLq5eKb>meS{-`p>PoMl5 zPK91Yu(m~V0ze!rt%*qa5BM0URp9=r4mRT|>`(8V?XX$kW8`@k@RG=h5u2ARetLb2m4(~vZ@k(9Hrp&; zyx!aLI0lW85XEj(GzA$_&v6>IBtU&mg3|(>{=j^;)PUG(yN76g-TP#*pXxK6i1m^aK9{IW++_QSD-BF91)=y zdFri)>#Iv9y_zBkC1+~Koa3!4QoU1Gjbk$@FqLbtZt@LB(i-c1=b5sej5LFgT@c7l zrcBZdIZ`EK8E`%2bbc9noYx^%sy-q)Qo#*11G>^mGL~!|Ghh!1_Ekr<6}D)%zql2Z zdxFZsJ;*?jvQ_TbSt7Nw!29&cCKn53)TXVo=29Bzs0u)UT+@K(n5jw0oxgv=T-M zhbMALWTz?U2(sBKKN2htP`PVp&~xTcg@K%aWe1a7xo55 z16InLBP)CoV(MU-z{oG*cZvs=Pvt01#TyJ0|1}p={T=cNKP(P465H;(I_mvAv*zN^ zXWYggK^k;;iIJm*`}sZrkq@`Fa^c^PLTs2AH~9a5RT$U$m-frYQJP?Ngcu>NFsQ!Y(Vb$TtAE*WyssjK( UY*^WTtg$h=3j@x@KZpbPH@6aWAK2mnz}B1a(wrPlre008BY7axCEcDGU&C0SLf`q-DY zA7IQt&>C#nCYQhOU~FR2q(R;0AqjIibLM!)$@=ZsiewMkG_u=}Z1qSI=(sGKuHKMe zKflq>Br%qCB@?oP4LN}!Z`+5ptg41XUP|7#&{@i5XYn)f_6YHbBv_*}dC50qXRXT9 zRP|b{q^{GFryze|Krm^x%F+l7RM3S2`8gIfh|?>%H2G)*tt4vFDx_Knh+$eIk7827@Ex?w ztlEZaX=G(rn3awyrf}pS(n{*K7cAs=eOrKD+y}GF*wcTU!U(?&3bI`i6>@70W5O;92&0w(ciiOwmy^-TQQYhr?lYcy_a#y}rJt&P&b}QPx}8qGp^r;CWT|?kf({ zF~-ob`1gND;$5Mv(~jaC3NoX~Aqxh@^l$~lIA5V+y|BbrPNUK{$y`*FqHnv4nlkf< z5kwqLmBtg)wgHn=GA}f+cUk~$5M~dm4Ep^mBP|beI{G#pjJ?mrgAK7eXzCq~WcEqf zVDAl3sch3SLE{*!d-y;f0UmE13z{lcheK;sHgA8>vYQP?&5h=RZY*Ayw3Q`h3QcW6 zl5z3M#wfu~e`P)m^^Mu}-a?kAh>^D%h2oZG2VfCtEb87pfp{{9p1gT9{20Z=1QY-O000C4Z%-n#Ed!DT4N*@bM212pa(>lNSj#0Zx-n2|@vGlaUEE0hW`;2{i%ell=)D2D%6U0002bJDN!V delta 913 zcmZ23vOq*Uz?+#xgn@y9gF&c6-kp8bmb=GD^H=8$Z%`#VswVlKgwXkc~TC*qD z&*y(J_FVHg_u$g3L}$N*lefPmu06F$Y=Wp)3GbFZuB%pmeD>Hn)hKaZ%g7Du;JsdbOgc2BKtv&OrO#zgxlgy(ojj=XuGu>8Zl+&-VoN;N!+w)f z@(k0L{SUnRZuQZfzhAX=^*ukW$#Zb?o6k9S{rAmSw0O`WmF_(Xj|x`t&RbAudSgjQWM0Gm%Q6Mc2D`(TUVk4iDOoz> zvf=iZ32$okB6~!o2H#X@AaKk$-00_h+jtTzmFh%@WzZ z!J47nBl+UPyTTpno8O}S7v;kkn3Y71}P zj0b#Oyp^+#bew(^8GmrM#5PI)B_+L%{yH}OJKr?a@0#qkyOfQ;OXVSF<=OJ~AH^=I zOD;3N%w2h9*3b9p>)uRy^Vja3_qRaNqNOSyzU|_fZ0vXBZA`(U>#d&u`*pmfw3j}5 zR~3D)^x~Drub%$QOV-~m^?LJkbMx(wmwcZsKQpN%`?T%np6Lg_eEzim#W!8{j9twxnlBV4jp-jEHKMJ0Vj+BrWGd3ahfr@PY&gDVoaYrgVU6;fATp_ kQ