perf tests in development branch

This commit is contained in:
madflow 2019-01-30 15:43:33 +01:00 committed by Adrien Loison
parent 171a2fab10
commit e8693834a0
12 changed files with 1000533 additions and 11 deletions

View File

@ -30,16 +30,13 @@ Version 3 introduced new functionality but also some breaking changes. If you wa
## Running tests
On the `master` branch, only unit and functional tests are included. The performance tests require very large files and have been excluded.
If you just want to check that everything is working as expected, executing the tests of the `master` branch is enough.
The `master` branch includes unit, functional and performance tests.
If you just want to check that everything is working as expected, executing the unit and functional tests is enough.
If you want to run performance tests, you will need to checkout the `perf-tests` branch. Multiple test suites can then be run, depending on the expected output:
* `phpunit` - runs the whole test suite (unit + functional + performance tests)
* `phpunit --exclude-group perf-tests` - only runs the unit and functional tests
* `phpunit` - runs unit and functional tests
* `phpunit --group perf-tests` - only runs the performance tests
For information, the performance tests take about 30 minutes to run (processing 1 million rows files is not a quick thing).
For information, the performance tests take about 10 minutes to run (processing 1 million rows files is not a quick thing).
> Performance tests status: [![Build Status](https://travis-ci.org/box/spout.svg?branch=perf-tests)](https://travis-ci.org/box/spout)

View File

@ -1,18 +1,27 @@
<phpunit
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="http://schema.phpunit.de/4.3/phpunit.xsd"
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/7.5/phpunit.xsd"
bootstrap="tests/bootstrap.php"
colors="true"
convertErrorsToExceptions="false"
convertWarningsToExceptions="false"
defaultTestSuite="unit-tests"
verbose="false">
<testsuites>
<testsuite name="all-tests">
<testsuite name="unit-tests">
<directory>tests/</directory>
</testsuite>
</testsuites>
<groups>
<exclude>
<group>perf-tests</group>
</exclude>
</groups>
<filter>
<whitelist>
<directory suffix=".php">src/</directory>

View File

@ -0,0 +1,62 @@
<?php
namespace Box\Spout\Reader\CSV;
use Box\Spout\Common\Type;
use Box\Spout\Reader\Common\Creator\ReaderEntityFactory;
use Box\Spout\TestUsingResource;
use PHPUnit\Framework\TestCase;
/**
* Class ReaderPerfTest
* Performance tests for CSV Reader
*/
class ReaderPerfTest extends TestCase
{
use TestUsingResource;
/**
* 1 million rows (each row containing 3 cells) should be read
* in less than 1 minute and the execution should not require
* more than 1MB of memory
*
* @group perf-tests
*
* @return void
*/
public function testPerfWhenReadingOneMillionRowsCSV()
{
// getting current memory peak to avoid taking into account the memory used by PHPUnit
$beforeMemoryPeakUsage = memory_get_peak_usage(true);
$expectedMaxExecutionTime = 60; // 1 minute in seconds
$expectedMaxMemoryPeakUsage = 1 * 1024 * 1024; // 1MB in bytes
$startTime = time();
$fileName = 'csv_with_one_million_rows.csv';
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderEntityFactory::createReader(Type::CSV);
$reader->open($resourcePath);
$numReadRows = 0;
/** @var Sheet $sheet */
foreach ($reader->getSheetIterator() as $sheet) {
foreach ($sheet->getRowIterator() as $row) {
$numReadRows++;
}
}
$reader->close();
$expectedNumRows = 1000000;
$this->assertEquals($expectedNumRows, $numReadRows, "$expectedNumRows rows should have been read");
$executionTime = time() - $startTime;
$this->assertTrue($executionTime < $expectedMaxExecutionTime, "Reading 1 million rows should take less than $expectedMaxExecutionTime seconds (took $executionTime seconds)");
$memoryPeakUsage = memory_get_peak_usage(true) - $beforeMemoryPeakUsage;
$this->assertTrue($memoryPeakUsage < $expectedMaxMemoryPeakUsage, 'Reading 1 million rows should require less than ' . ($expectedMaxMemoryPeakUsage / 1024 / 1024) . ' MB of memory (required ' . round($memoryPeakUsage / 1024 / 1024, 2) . ' MB)');
}
}

View File

@ -0,0 +1,62 @@
<?php
namespace Box\Spout\Reader\ODS;
use Box\Spout\Common\Type;
use Box\Spout\Reader\Common\Creator\ReaderEntityFactory;
use Box\Spout\TestUsingResource;
use PHPUnit\Framework\TestCase;
/**
* Class ReaderPerfTest
* Performance tests for ODS Reader
*/
class ReaderPerfTest extends Testcase
{
use TestUsingResource;
/**
* 1 million rows (each row containing 3 cells) should be read
* in less than 10 minutes and the execution should not require
* more than 1MB of memory
*
* @group perf-tests
*
* @return void
*/
public function testPerfWhenReadingOneMillionRowsODS()
{
// getting current memory peak to avoid taking into account the memory used by PHPUnit
$beforeMemoryPeakUsage = memory_get_peak_usage(true);
$expectedMaxExecutionTime = 600; // 10 minutes in seconds
$expectedMaxMemoryPeakUsage = 1 * 1024 * 1024; // 1MB in bytes
$startTime = time();
$fileName = 'ods_with_one_million_rows.ods';
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderEntityFactory::createReader(Type::ODS);
$reader->open($resourcePath);
$numReadRows = 0;
/** @var Sheet $sheet */
foreach ($reader->getSheetIterator() as $sheet) {
foreach ($sheet->getRowIterator() as $row) {
$numReadRows++;
}
}
$reader->close();
$expectedNumRows = 1000000;
$this->assertEquals($expectedNumRows, $numReadRows, "$expectedNumRows rows should have been read");
$executionTime = time() - $startTime;
$this->assertTrue($executionTime < $expectedMaxExecutionTime, "Reading 1 million rows should take less than $expectedMaxExecutionTime seconds (took $executionTime seconds)");
$memoryPeakUsage = memory_get_peak_usage(true) - $beforeMemoryPeakUsage;
$this->assertTrue($memoryPeakUsage < $expectedMaxMemoryPeakUsage, 'Reading 1 million rows should require less than ' . ($expectedMaxMemoryPeakUsage / 1024 / 1024) . ' MB of memory (required ' . round($memoryPeakUsage / 1024 / 1024, 2) . ' MB)');
}
}

View File

@ -0,0 +1,76 @@
<?php
namespace Box\Spout\Reader\XLSX;
use Box\Spout\Common\Type;
use Box\Spout\Reader\Common\Creator\ReaderEntityFactory;
use Box\Spout\TestUsingResource;
use PHPUnit\Framework\TestCase;
/**
* Class ReaderPerfTest
* Performance tests for XLSX Reader
*/
class ReaderPerfTest extends TestCase
{
use TestUsingResource;
/**
* @return array
*/
public function dataProviderForTestPerfWhenReading300kRowsXLSX()
{
return [
[$shouldUseInlineStrings = true, $expectedMaxExecutionTime = 390], // 6.5 minutes in seconds
[$shouldUseInlineStrings = false, $expectedMaxExecutionTime = 600], // 10 minutes in seconds
];
}
/**
* 300,000 rows (each row containing 3 cells) should be read
* in less than 6.5 minutes for inline strings, 10 minutes for
* shared strings and the execution should not require
* more than 3MB of memory.
*
* @dataProvider dataProviderForTestPerfWhenReading300kRowsXLSX
* @group perf-tests
*
* @param bool $shouldUseInlineStrings
* @param int $expectedMaxExecutionTime
* @return void
*/
public function testPerfWhenReading300kRowsXLSX($shouldUseInlineStrings, $expectedMaxExecutionTime)
{
// getting current memory peak to avoid taking into account the memory used by PHPUnit
$beforeMemoryPeakUsage = memory_get_peak_usage(true);
$expectedMaxMemoryPeakUsage = 3 * 1024 * 1024;
$startTime = time();
$fileName = ($shouldUseInlineStrings) ? 'xlsx_with_300k_rows_and_inline_strings.xlsx' : 'xlsx_with_300k_rows_and_shared_strings.xlsx';
$resourcePath = $this->getResourcePath($fileName);
$reader = ReaderEntityFactory::createReader(Type::XLSX);
$reader->open($resourcePath);
$numReadRows = 0;
/** @var Sheet $sheet */
foreach ($reader->getSheetIterator() as $sheet) {
foreach ($sheet->getRowIterator() as $row) {
$numReadRows++;
}
}
$reader->close();
$expectedNumRows = 300000;
$this->assertEquals($expectedNumRows, $numReadRows, "$expectedNumRows rows should have been read");
$executionTime = time() - $startTime;
$this->assertTrue($executionTime < $expectedMaxExecutionTime, "Reading 300,000 rows should take less than $expectedMaxExecutionTime seconds (took $executionTime seconds)");
$memoryPeakUsage = memory_get_peak_usage(true) - $beforeMemoryPeakUsage;
$this->assertTrue($memoryPeakUsage < $expectedMaxMemoryPeakUsage, 'Reading 300,000 rows should require less than ' . ($expectedMaxMemoryPeakUsage / 1024 / 1024) . ' MB of memory (required ' . ($memoryPeakUsage / 1024 / 1024) . ' MB)');
}
}

View File

@ -0,0 +1,69 @@
<?php
namespace Box\Spout\Writer\CSV;
use Box\Spout\Common\Type;
use Box\Spout\TestUsingResource;
use Box\Spout\Writer\Common\Creator\WriterEntityFactory;
use PHPUnit\Framework\TestCase;
/**
* Class WriterPerfTest
* Performance tests for CSV Writer
*/
class WriterPerfTest extends TestCase
{
use TestUsingResource;
/**
* 1 million rows (each row containing 3 cells) should be written
* in less than 30 seconds and the execution should not require
* more than 1MB of memory
*
* @group perf-tests
*
* @return void
*/
public function testPerfWhenWritingOneMillionRowsCSV()
{
// getting current memory peak to avoid taking into account the memory used by PHPUnit
$beforeMemoryPeakUsage = memory_get_peak_usage(true);
$numRows = 1000000;
$expectedMaxExecutionTime = 30; // 30 seconds
$expectedMaxMemoryPeakUsage = 1 * 1024 * 1024; // 1MB in bytes
$startTime = time();
$fileName = 'csv_with_one_million_rows.csv';
$this->createGeneratedFolderIfNeeded($fileName);
$resourcePath = $this->getGeneratedResourcePath($fileName);
$writer = WriterEntityFactory::createWriter(Type::CSV);
$writer->openToFile($resourcePath);
for ($i = 1; $i <= $numRows; $i++) {
$writer->addRow(WriterEntityFactory::createRowFromArray(["csv--{$i}1", "csv--{$i}2", "csv--{$i}3"]));
}
$writer->close();
$this->assertEquals($numRows, $this->getNumWrittenRows($resourcePath), "The created CSV should contain $numRows rows");
$executionTime = time() - $startTime;
$this->assertTrue($executionTime < $expectedMaxExecutionTime, "Writing 1 million rows should take less than $expectedMaxExecutionTime seconds (took $executionTime seconds)");
$memoryPeakUsage = memory_get_peak_usage(true) - $beforeMemoryPeakUsage;
$this->assertTrue($memoryPeakUsage < $expectedMaxMemoryPeakUsage, 'Writing 1 million rows should require less than ' . ($expectedMaxMemoryPeakUsage / 1024 / 1024) . ' MB of memory (required ' . round($memoryPeakUsage / 1024 / 1024, 2) . ' MB)');
}
/**
* @param string $resourcePath
* @return int
*/
private function getNumWrittenRows($resourcePath)
{
$lineCountResult = `wc -l $resourcePath`;
return (int) $lineCountResult;
}
}

View File

@ -0,0 +1,100 @@
<?php
namespace Box\Spout\Writer\ODS;
use Box\Spout\Common\Type;
use Box\Spout\TestUsingResource;
use Box\Spout\Writer\Common\Creator\WriterEntityFactory;
use PHPUnit\Framework\TestCase;
/**
* Class WriterPerfTest
* Performance tests for ODS Writer
*/
class WriterPerfTest extends TestCase
{
use TestUsingResource;
/**
* 1 million rows (each row containing 3 cells) should be written
* in less than 4 minutes and the execution should not require
* more than 3MB of memory
*
* @group perf-tests
*
* @return void
*/
public function testPerfWhenWritingOneMillionRowsODS()
{
// getting current memory peak to avoid taking into account the memory used by PHPUnit
$beforeMemoryPeakUsage = memory_get_peak_usage(true);
$numRows = 1000000;
$expectedMaxExecutionTime = 240; // 4 minutes in seconds
$expectedMaxMemoryPeakUsage = 3 * 1024 * 1024; // 3MB in bytes
$startTime = time();
$fileName = 'ods_with_one_million_rows.ods';
$this->createGeneratedFolderIfNeeded($fileName);
$resourcePath = $this->getGeneratedResourcePath($fileName);
/** @var Writer $writer */
$writer = WriterEntityFactory::createWriter(Type::ODS);
$writer->setShouldCreateNewSheetsAutomatically(true);
$writer->openToFile($resourcePath);
for ($i = 1; $i <= $numRows; $i++) {
$writer->addRow(WriterEntityFactory::createRowFromArray(["ods--{$i}-1", "ods--{$i}-2", "ods--{$i}-3"]));
}
$writer->close();
$this->assertEquals($numRows, $this->getNumWrittenRows($resourcePath), "The created ODS ($fileName) should contain $numRows rows");
$executionTime = time() - $startTime;
$this->assertTrue($executionTime < $expectedMaxExecutionTime, "Writing 1 million rows should take less than $expectedMaxExecutionTime seconds (took $executionTime seconds)");
$memoryPeakUsage = memory_get_peak_usage(true) - $beforeMemoryPeakUsage;
$this->assertTrue($memoryPeakUsage < $expectedMaxMemoryPeakUsage, 'Writing 1 million rows should require less than ' . ($expectedMaxMemoryPeakUsage / 1024 / 1024) . ' MB of memory (required ' . ($memoryPeakUsage / 1024 / 1024) . ' MB)');
}
/**
* @param string $resourcePath
* @return int
*/
private function getNumWrittenRows($resourcePath)
{
$numWrittenRows = 0;
// to avoid executing the regex of the entire file to get the last row number, we only retrieve the last 10 lines
$endingContentXmlContents = $this->getLastCharactersOfContentXmlFile($resourcePath);
if (preg_match_all('/<text:p>ods--(\d+)-\d<\/text:p>/', $endingContentXmlContents, $matches)) {
$lastMatch = array_pop($matches);
$numWrittenRows = (int) (array_pop($lastMatch));
}
return $numWrittenRows;
}
/**
* @param string $resourcePath
* @return string
*/
private function getLastCharactersOfContentXmlFile($resourcePath)
{
$pathToContentXmlFile = 'zip://' . $resourcePath . '#content.xml';
// since we cannot execute "tail" on a file inside a zip, we need to copy it outside first
$tmpFile = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'get_last_characters.xml';
copy($pathToContentXmlFile, $tmpFile);
// Get the last 200 characters
$lastCharacters = `tail -c 200 $tmpFile`;
// remove the temporary file
unlink($tmpFile);
return $lastCharacters;
}
}

View File

@ -0,0 +1,147 @@
<?php
namespace Box\Spout\Writer\XLSX;
use Box\Spout\Common\Type;
use Box\Spout\TestUsingResource;
use Box\Spout\Writer\Common\Creator\WriterEntityFactory;
use PHPUnit\Framework\TestCase;
/**
* Class WriterPerfTest
* Performance tests for XLSX Writer
*/
class WriterPerfTest extends TestCase
{
use TestUsingResource;
/**
* @return array
*/
public function dataProviderForTestPerfWhenWritingOneMillionRowsXLSX()
{
return [
[$shouldUseInlineStrings = true, $expectedMaxExecutionTime = 330], // 5.5 minutes in seconds
[$shouldUseInlineStrings = false, $expectedMaxExecutionTime = 360], // 6 minutes in seconds
];
}
/**
* 1 million rows (each row containing 3 cells) should be written
* in less than 5.5 minutes for inline strings, 6 minutes for
* shared strings and the execution should not require
* more than 3MB of memory
*
* @dataProvider dataProviderForTestPerfWhenWritingOneMillionRowsXLSX
* @group perf-tests
*
* @param bool $shouldUseInlineStrings
* @param int $expectedMaxExecutionTime
* @return void
*/
public function testPerfWhenWritingOneMillionRowsXLSX($shouldUseInlineStrings, $expectedMaxExecutionTime)
{
// getting current memory peak to avoid taking into account the memory used by PHPUnit
$beforeMemoryPeakUsage = memory_get_peak_usage(true);
$numRows = 1000000;
$expectedMaxMemoryPeakUsage = 3 * 1024 * 1024; // 3MB in bytes
$startTime = time();
$fileName = ($shouldUseInlineStrings) ? 'xlsx_with_one_million_rows_and_inline_strings.xlsx' : 'xlsx_with_one_million_rows_and_shared_strings.xlsx';
$this->createGeneratedFolderIfNeeded($fileName);
$resourcePath = $this->getGeneratedResourcePath($fileName);
/** @var Writer $writer */
$writer = WriterEntityFactory::createWriter(Type::XLSX);
$writer->setShouldUseInlineStrings($shouldUseInlineStrings);
$writer->setShouldCreateNewSheetsAutomatically(true);
$writer->openToFile($resourcePath);
for ($i = 1; $i <= $numRows; $i++) {
$writer->addRow(WriterEntityFactory::createRowFromArray(["xlsx--{$i}-1", "xlsx--{$i}-2", "xlsx--{$i}-3"]));
}
$writer->close();
if ($shouldUseInlineStrings) {
$numSheets = count($writer->getSheets());
$this->assertEquals($numRows, $this->getNumWrittenRowsUsingInlineStrings($resourcePath, $numSheets), "The created XLSX ($fileName) should contain $numRows rows");
} else {
$this->assertEquals($numRows, $this->getNumWrittenRowsUsingSharedStrings($resourcePath), "The created XLSX ($fileName) should contain $numRows rows");
}
$executionTime = time() - $startTime;
$this->assertTrue($executionTime < $expectedMaxExecutionTime, "Writing 1 million rows should take less than $expectedMaxExecutionTime seconds (took $executionTime seconds)");
$memoryPeakUsage = memory_get_peak_usage(true) - $beforeMemoryPeakUsage;
$this->assertTrue($memoryPeakUsage < $expectedMaxMemoryPeakUsage, 'Writing 1 million rows should require less than ' . ($expectedMaxMemoryPeakUsage / 1024 / 1024) . ' MB of memory (required ' . ($memoryPeakUsage / 1024 / 1024) . ' MB)');
}
/**
* @param string $resourcePath
* @param int $numSheets
* @return int
*/
private function getNumWrittenRowsUsingInlineStrings($resourcePath, $numSheets)
{
$pathToLastSheetFile = 'zip://' . $resourcePath . '#xl/worksheets/sheet' . $numSheets . '.xml';
return $this->getLasRowNumberForFile($pathToLastSheetFile);
}
/**
* @param string $resourcePath
* @return int
*/
private function getNumWrittenRowsUsingSharedStrings($resourcePath)
{
$pathToSharedStringsFile = 'zip://' . $resourcePath . '#xl/sharedStrings.xml';
return $this->getLasRowNumberForFile($pathToSharedStringsFile);
}
/**
* @param string $filePath
* @return string
*/
private function getLasRowNumberForFile($filePath)
{
$lastRowNumber = 0;
// to avoid executing the regex of the entire file to get the last row number,
// we only retrieve the last 200 characters of the shared strings file, as the cell value
// contains the row number.
$lastCharactersOfFile = $this->getLastCharactersOfFile($filePath, 200);
// in sharedStrings.xml and sheetN.xml, the cell value will look like this:
// <t>xlsx--[ROW_NUMBER]-[CELL_NUMBER]</t> or <t xml:space="preserve">xlsx--[ROW_NUMBER]-[CELL_NUMBER]</t>
if (preg_match_all('/<t.*>xlsx--(\d+)-\d+<\/t>/', $lastCharactersOfFile, $matches)) {
$lastMatch = array_pop($matches);
$lastRowNumber = (int) (array_pop($lastMatch));
}
return $lastRowNumber;
}
/**
* @param string $filePath
* @param int $numCharacters
* @return string
*/
private function getLastCharactersOfFile($filePath, $numCharacters)
{
// since we cannot execute "tail" on a file inside a zip, we need to copy it outside first
$tmpFile = sys_get_temp_dir() . '/getLastCharacters.xml';
copy($filePath, $tmpFile);
// Get the last 200 characters
$lastCharacters = `tail -c $numCharacters $tmpFile`;
// remove the temporary file
unlink($tmpFile);
return $lastCharacters;
}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.