Use number-columns-repeated in ODS writer

The number-columns-repeated usage may reduce the size of the outputted XML file by merging repeated values together.
This commit is contained in:
Adrien Loison 2015-08-31 12:03:24 -07:00
parent 0f8e7a8f58
commit bc009a3241
4 changed files with 123 additions and 27 deletions

View File

@ -135,7 +135,7 @@ abstract class AbstractWorkbook implements WorkbookInterface
* If shouldCreateNewSheetsAutomatically option is set to true, it will handle pagination * If shouldCreateNewSheetsAutomatically option is set to true, it will handle pagination
* with the creation of new worksheets if one worksheet has reached its maximum capicity. * with the creation of new worksheets if one worksheet has reached its maximum capicity.
* *
* @param array $dataRow Array containing data to be written. * @param array $dataRow Array containing data to be written. Cannot be empty.
* Example $dataRow = ['data1', 1234, null, '', 'data5']; * Example $dataRow = ['data1', 1234, null, '', 'data5'];
* @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. * @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row.
* @return void * @return void

View File

@ -7,7 +7,6 @@ use Box\Spout\Common\Exception\IOException;
use Box\Spout\Common\Helper\StringHelper; use Box\Spout\Common\Helper\StringHelper;
use Box\Spout\Writer\Common\Helper\CellHelper; use Box\Spout\Writer\Common\Helper\CellHelper;
use Box\Spout\Writer\Common\Internal\WorksheetInterface; use Box\Spout\Writer\Common\Internal\WorksheetInterface;
use Box\Spout\Writer\Common\Sheet;
/** /**
* Class Worksheet * Class Worksheet
@ -126,7 +125,7 @@ class Worksheet implements WorksheetInterface
/** /**
* Adds data to the worksheet. * Adds data to the worksheet.
* *
* @param array $dataRow Array containing data to be written. * @param array $dataRow Array containing data to be written. Cannot be empty.
* Example $dataRow = ['data1', 1234, null, '', 'data5']; * Example $dataRow = ['data1', 1234, null, '', 'data5'];
* @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. NULL means use default style. * @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. NULL means use default style.
* @return void * @return void
@ -135,36 +134,26 @@ class Worksheet implements WorksheetInterface
*/ */
public function addRow($dataRow, $style) public function addRow($dataRow, $style)
{ {
$this->maxNumColumns = max($this->maxNumColumns, count($dataRow));
$styleIndex = ($style->getId() + 1); // 1-based $styleIndex = ($style->getId() + 1); // 1-based
$cellsCount = count($dataRow);
$this->maxNumColumns = max($this->maxNumColumns, $cellsCount);
$data = ' <table:table-row table:style-name="ro1">' . PHP_EOL; $data = ' <table:table-row table:style-name="ro1">' . PHP_EOL;
foreach($dataRow as $cellValue) { $currentCellIndex = 0;
$data .= ' <table:table-cell table:style-name="ce' . $styleIndex . '"'; $nextCellIndex = 1;
if (CellHelper::isNonEmptyString($cellValue)) { for ($i = 0; $i < $cellsCount; $i++) {
$data .= ' office:value-type="string" calcext:value-type="string">' . PHP_EOL; $currentCellValue = $dataRow[$currentCellIndex];
$cellValueLines = explode("\n", $cellValue); if (!array_key_exists($nextCellIndex, $dataRow) || $currentCellValue !== $dataRow[$nextCellIndex]) {
foreach ($cellValueLines as $cellValueLine) { $numTimesValueRepeated = ($nextCellIndex - $currentCellIndex);
$data .= ' <text:p>' . $this->stringsEscaper->escape($cellValueLine) . '</text:p>' . PHP_EOL; $data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated);
}
$data .= ' </table:table-cell>' . PHP_EOL; $currentCellIndex = $nextCellIndex;
} else if (CellHelper::isBoolean($cellValue)) {
$data .= ' office:value-type="boolean" calcext:value-type="boolean" office:value="' . $cellValue . '">' . PHP_EOL;
$data .= ' <text:p>' . $cellValue . '</text:p>' . PHP_EOL;
$data .= ' </table:table-cell>' . PHP_EOL;
} else if (CellHelper::isNumeric($cellValue)) {
$data .= ' office:value-type="float" calcext:value-type="float" office:value="' . $cellValue . '">' . PHP_EOL;
$data .= ' <text:p>' . $cellValue . '</text:p>' . PHP_EOL;
$data .= ' </table:table-cell>' . PHP_EOL;
} else if (empty($cellValue)) {
$data .= '/>' . PHP_EOL;
} else {
throw new InvalidArgumentException('Trying to add a value with an unsupported type: ' . gettype($cellValue));
} }
$nextCellIndex++;
} }
$data .= ' </table:table-row>' . PHP_EOL; $data .= ' </table:table-row>' . PHP_EOL;
@ -178,6 +167,49 @@ class Worksheet implements WorksheetInterface
$this->lastWrittenRowIndex++; $this->lastWrittenRowIndex++;
} }
/**
* Returns the cell XML content, given its value.
*
* @param mixed $cellValue The value to be written
* @param int $styleIndex Index of the used style
* @param int $numTimesValueRepeated Number of times the value is consecutively repeated
* @return string The cell XML content
* @throws \Box\Spout\Common\Exception\InvalidArgumentException If a cell value's type is not supported
*/
protected function getCellContent($cellValue, $styleIndex, $numTimesValueRepeated)
{
$data = ' <table:table-cell table:style-name="ce' . $styleIndex . '"';
if ($numTimesValueRepeated !== 1) {
$data .= ' table:number-columns-repeated="' . $numTimesValueRepeated . '"';
}
if (CellHelper::isNonEmptyString($cellValue)) {
$data .= ' office:value-type="string" calcext:value-type="string">' . PHP_EOL;
$cellValueLines = explode("\n", $cellValue);
foreach ($cellValueLines as $cellValueLine) {
$data .= ' <text:p>' . $this->stringsEscaper->escape($cellValueLine) . '</text:p>' . PHP_EOL;
}
$data .= ' </table:table-cell>' . PHP_EOL;
} else if (CellHelper::isBoolean($cellValue)) {
$data .= ' office:value-type="boolean" calcext:value-type="boolean" office:value="' . $cellValue . '">' . PHP_EOL;
$data .= ' <text:p>' . $cellValue . '</text:p>' . PHP_EOL;
$data .= ' </table:table-cell>' . PHP_EOL;
} else if (CellHelper::isNumeric($cellValue)) {
$data .= ' office:value-type="float" calcext:value-type="float" office:value="' . $cellValue . '">' . PHP_EOL;
$data .= ' <text:p>' . $cellValue . '</text:p>' . PHP_EOL;
$data .= ' </table:table-cell>' . PHP_EOL;
} else if (empty($cellValue)) {
$data .= '/>' . PHP_EOL;
} else {
throw new InvalidArgumentException('Trying to add a value with an unsupported type: ' . gettype($cellValue));
}
return $data;
}
/** /**
* Closes the worksheet * Closes the worksheet
* *

View File

@ -118,7 +118,7 @@ EOD;
/** /**
* Adds data to the worksheet. * Adds data to the worksheet.
* *
* @param array $dataRow Array containing data to be written. * @param array $dataRow Array containing data to be written. Cannot be empty.
* Example $dataRow = ['data1', 1234, null, '', 'data5']; * Example $dataRow = ['data1', 1234, null, '', 'data5'];
* @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. NULL means use default style. * @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. NULL means use default style.
* @return void * @return void

View File

@ -195,6 +195,48 @@ class WriterTest extends \PHPUnit_Framework_TestCase
$this->assertValueWasWrittenToSheet($fileName, 1, 10.2); $this->assertValueWasWrittenToSheet($fileName, 1, 10.2);
} }
/**
* @return array
*/
public function dataProviderForTestAddRowShouldUseNumberColumnsRepeatedForRepeatedValues()
{
return [
[['ods--11', 'ods--11', 'ods--11'], 1, 3],
[['', ''], 1, 2],
[[true, true, true, true], 1, 4],
[[1.1, 1.1], 1, 2],
[['foo', 'bar'], 2, 0],
];
}
/**
* @dataProvider dataProviderForTestAddRowShouldUseNumberColumnsRepeatedForRepeatedValues
*
* @param array $dataRow
* @param int $expectedNumTableCells
* @param int $expectedNumColumnsRepeated
* @return void
*/
public function testAddRowShouldUseNumberColumnsRepeatedForRepeatedValues($dataRow, $expectedNumTableCells, $expectedNumColumnsRepeated)
{
$fileName = 'test_add_row_should_use_number_columns_repeated.ods';
$this->writeToODSFile([$dataRow], $fileName);
$sheetXmlNode = $this->getSheetXmlNode($fileName, 1);
$tableCellNodes = $sheetXmlNode->getElementsByTagName('table-cell');
$this->assertEquals($expectedNumTableCells, $tableCellNodes->length);
if ($expectedNumTableCells === 1) {
$tableCellNode = $tableCellNodes->item(0);
$numColumnsRepeated = intval($tableCellNode->getAttribute('table:number-columns-repeated'));
$this->assertEquals($expectedNumColumnsRepeated, $numColumnsRepeated);
} else {
foreach ($tableCellNodes as $tableCellNode) {
$this->assertFalse($tableCellNode->hasAttribute('table:number-columns-repeated'));
}
}
}
/** /**
* @return void * @return void
*/ */
@ -423,12 +465,34 @@ class WriterTest extends \PHPUnit_Framework_TestCase
$this->assertNotContains($valueAsXmlString, $sheetXmlAsString, $message); $this->assertNotContains($valueAsXmlString, $sheetXmlAsString, $message);
} }
/**
* @param string $fileName
* @param int $sheetIndex
* @return \DOMNode
*/
private function getSheetXmlNode($fileName, $sheetIndex)
{
$xmlReader = $this->moveReaderToCorrectTableNode($fileName, $sheetIndex);
return $xmlReader->expand();
}
/** /**
* @param string $fileName * @param string $fileName
* @param int $sheetIndex * @param int $sheetIndex
* @return string * @return string
*/ */
private function getSheetXmlNodeAsString($fileName, $sheetIndex) private function getSheetXmlNodeAsString($fileName, $sheetIndex)
{
$xmlReader = $this->moveReaderToCorrectTableNode($fileName, $sheetIndex);
return $xmlReader->readOuterXml();
}
/**
* @param string $fileName
* @param int $sheetIndex
* @return XMLReader
*/
private function moveReaderToCorrectTableNode($fileName, $sheetIndex)
{ {
$resourcePath = $this->getGeneratedResourcePath($fileName); $resourcePath = $this->getGeneratedResourcePath($fileName);
$pathToSheetFile = $resourcePath . '#content.xml'; $pathToSheetFile = $resourcePath . '#content.xml';
@ -441,6 +505,6 @@ class WriterTest extends \PHPUnit_Framework_TestCase
$xmlReader->readUntilNodeFound('table:table'); $xmlReader->readUntilNodeFound('table:table');
} }
return $xmlReader->readOuterXml(); return $xmlReader;
} }
} }