Fix charachters escaping with CSV reader/writer

PHP's built-in functions fputcsv and fgetcsv are not RFC-4180 compliant and include an escape character that's not defined in the spec.
This results in escaping characters that should not be escaped.
This commit disables this escaping mechanism.
This commit is contained in:
Adrien Loison 2017-11-11 16:05:06 +01:00
parent e2b519d6f9
commit c826d15472
4 changed files with 38 additions and 3 deletions

View File

@ -86,7 +86,13 @@ class GlobalFunctionsHelper
*/ */
public function fgetcsv($handle, $length = null, $delimiter = null, $enclosure = null) public function fgetcsv($handle, $length = null, $delimiter = null, $enclosure = null)
{ {
return fgetcsv($handle, $length, $delimiter, $enclosure); // PHP uses '\' as the default escape character. This is not RFC-4180 compliant...
// To fix that, simply disable the escape character.
// @see https://bugs.php.net/bug.php?id=43225
// @see http://tools.ietf.org/html/rfc4180
$escapeCharacter = "\0";
return fgetcsv($handle, $length, $delimiter, $enclosure, $escapeCharacter);
} }
/** /**
@ -101,7 +107,13 @@ class GlobalFunctionsHelper
*/ */
public function fputcsv($handle, array $fields, $delimiter = null, $enclosure = null) public function fputcsv($handle, array $fields, $delimiter = null, $enclosure = null)
{ {
return fputcsv($handle, $fields, $delimiter, $enclosure); // PHP uses '\' as the default escape character. This is not RFC-4180 compliant...
// To fix that, simply disable the escape character.
// @see https://bugs.php.net/bug.php?id=43225
// @see http://tools.ietf.org/html/rfc4180
$escapeCharacter = "\0";
return fputcsv($handle, $fields, $delimiter, $enclosure, $escapeCharacter);
} }
/** /**

View File

@ -219,12 +219,23 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('This is, a comma', $allRows[0][0]); $this->assertEquals('This is, a comma', $allRows[0][0]);
} }
/**
* @return void
*/
public function testReadShouldSupportEscapedCharacters()
{
$allRows = $this->getAllRowsForFile('csv_with_escaped_characters.csv');
$expectedRow = ['"csv--11"', 'csv--12\\', 'csv--13\\\\', 'csv--14\\\\\\'];
$this->assertEquals([$expectedRow], $allRows);
}
/** /**
* @return void * @return void
*/ */
public function testReadShouldNotTruncateLineBreak() public function testReadShouldNotTruncateLineBreak()
{ {
$allRows = $this->getAllRowsForFile('csv_with_line_breaks.csv', ','); $allRows = $this->getAllRowsForFile('csv_with_line_breaks.csv');
$this->assertEquals("This is,\na comma", $allRows[0][0]); $this->assertEquals("This is,\na comma", $allRows[0][0]);
} }

View File

@ -175,6 +175,17 @@ class WriterTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('#This is, a comma#,csv--12,csv--13', $writtenContent, 'The fields should be enclosed with #'); $this->assertEquals('#This is, a comma#,csv--12,csv--13', $writtenContent, 'The fields should be enclosed with #');
} }
public function testWriteShouldSupportedEscapedCharacters()
{
$allRows = $this->createRowsFromValues([
['"csv--11"', 'csv--12\\', 'csv--13\\\\', 'csv--14\\\\\\'],
]);
$writtenContent = $this->writeToCsvFileAndReturnWrittenContent($allRows, 'csv_with_escaped_characters.csv');
$writtenContent = $this->trimWrittenContent($writtenContent);
$this->assertEquals('"""csv--11""",csv--12\\,csv--13\\\\,csv--14\\\\\\', $writtenContent, 'The \'"\' and \'\\\' characters should be properly escaped');
}
/** /**
* @param Row[] $allRows * @param Row[] $allRows
* @param string $fileName * @param string $fileName

View File

@ -0,0 +1 @@
"""csv--11""","csv--12\","csv--13\\","csv--14\\\"
1 "csv--11" csv--12\ csv--13\\ csv--14\\\