Attempt to convert the non UTF-8 strings to UTF-8
This commit is contained in:
parent
d946f12951
commit
5e1cfbfdbd
@ -21,6 +21,10 @@
|
||||
"phpunit/phpunit": ">=3.7",
|
||||
"scrutinizer/ocular": "~1.1"
|
||||
},
|
||||
"suggest": {
|
||||
"ext-iconv": "To handle non UTF-8 CSV files (if \"php-intl\" is not already installed or is too limited)",
|
||||
"ext-intl": "To handle non UTF-8 CSV files (if \"iconv\" is not already installed)"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Box\\Spout\\": "src/Spout"
|
||||
|
12
src/Spout/Common/Exception/EncodingConversionException.php
Normal file
12
src/Spout/Common/Exception/EncodingConversionException.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Common\Exception;
|
||||
|
||||
/**
|
||||
* Class EncodingConversionException
|
||||
*
|
||||
* @package Box\Spout\Common\Exception
|
||||
*/
|
||||
class EncodingConversionException extends SpoutException
|
||||
{
|
||||
}
|
175
src/Spout/Common/Helper/EncodingHelper.php
Normal file
175
src/Spout/Common/Helper/EncodingHelper.php
Normal file
@ -0,0 +1,175 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Common\Helper;
|
||||
|
||||
use Box\Spout\Common\Exception\EncodingConversionException;
|
||||
|
||||
/**
|
||||
* Class EncodingHelper
|
||||
* This class provides helper functions to work with encodings.
|
||||
*
|
||||
* @package Box\Spout\Common\Helper
|
||||
*/
|
||||
class EncodingHelper
|
||||
{
|
||||
/** Definition of the encodings that can have a BOM */
|
||||
const ENCODING_UTF8 = 'UTF-8';
|
||||
const ENCODING_UTF16_LE = 'UTF-16LE';
|
||||
const ENCODING_UTF16_BE = 'UTF-16BE';
|
||||
const ENCODING_UTF32_LE = 'UTF-32LE';
|
||||
const ENCODING_UTF32_BE = 'UTF-32BE';
|
||||
|
||||
/** Definition of the BOMs for the different encodings */
|
||||
const BOM_UTF8 = "\xEF\xBB\xBF";
|
||||
const BOM_UTF16_LE = "\xFF\xFE";
|
||||
const BOM_UTF16_BE = "\xFE\xFF";
|
||||
const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
|
||||
const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
|
||||
protected $supportedEncodingsWithBom;
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($globalFunctionsHelper)
|
||||
{
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
|
||||
$this->supportedEncodingsWithBom = [
|
||||
self::ENCODING_UTF8 => self::BOM_UTF8,
|
||||
self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
|
||||
self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
|
||||
self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
|
||||
self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes to use as offset in order to skip the BOM.
|
||||
*
|
||||
* @param resource $filePointer Pointer to the file to check
|
||||
* @param string $encoding Encoding of the file to check
|
||||
* @return int Bytes offset to apply to skip the BOM (0 means no BOM)
|
||||
*/
|
||||
public function getBytesOffsetToSkipBOM($filePointer, $encoding)
|
||||
{
|
||||
$byteOffsetToSkipBom = 0;
|
||||
|
||||
if ($this->hasBom($filePointer, $encoding)) {
|
||||
$bomUsed = $this->supportedEncodingsWithBom[$encoding];
|
||||
|
||||
// we skip the N first bytes
|
||||
$byteOffsetToSkipBom = strlen($bomUsed);
|
||||
}
|
||||
|
||||
return $byteOffsetToSkipBom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the file identified by the given pointer has a BOM.
|
||||
*
|
||||
* @param resource $filePointer Pointer to the file to check
|
||||
* @param string $encoding Encoding of the file to check
|
||||
* @return bool TRUE if the file has a BOM, FALSE otherwise
|
||||
*/
|
||||
protected function hasBOM($filePointer, $encoding)
|
||||
{
|
||||
$hasBOM = false;
|
||||
|
||||
$this->globalFunctionsHelper->rewind($filePointer);
|
||||
|
||||
if (array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
|
||||
$potentialBom = $this->supportedEncodingsWithBom[$encoding];
|
||||
$numBytesInBom = strlen($potentialBom);
|
||||
|
||||
$hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
|
||||
}
|
||||
|
||||
return $hasBOM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to convert a non UTF-8 string into UTF-8.
|
||||
*
|
||||
* @param string $string Non UTF-8 string to be converted
|
||||
* @param string $sourceEncoding The encoding used to encode the source string
|
||||
* @return string The converted, UTF-8 string
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
|
||||
*/
|
||||
public function attemptConversionToUTF8($string, $sourceEncoding)
|
||||
{
|
||||
return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to convert a UTF-8 string into the given encoding.
|
||||
*
|
||||
* @param string $string UTF-8 string to be converted
|
||||
* @param string $targetEncoding The encoding the string should be re-encoded into
|
||||
* @return string The converted string, encoded with the given encoding
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
|
||||
*/
|
||||
public function attemptConversionFromUTF8($string, $targetEncoding)
|
||||
{
|
||||
return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to convert the given string to the given encoding.
|
||||
* Depending on what is installed on the server, we will try to iconv or mbstring.
|
||||
*
|
||||
* @param string $string string to be converted
|
||||
* @param string $sourceEncoding The encoding used to encode the source string
|
||||
* @param string $targetEncoding The encoding the string should be re-encoded into
|
||||
* @return string The converted string, encoded with the given encoding
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
|
||||
*/
|
||||
protected function attemptConversion($string, $sourceEncoding, $targetEncoding)
|
||||
{
|
||||
// if source and target encodings are the same, it's a no-op
|
||||
if ($sourceEncoding === $targetEncoding) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
$convertedString = null;
|
||||
|
||||
if ($this->canUseIconv()) {
|
||||
$convertedString = $this->globalFunctionsHelper->iconv($string, $sourceEncoding, $targetEncoding);
|
||||
} else if ($this->canUseMbString()) {
|
||||
$convertedString = $this->globalFunctionsHelper->mb_convert_encoding($string, $sourceEncoding, $targetEncoding);
|
||||
} else {
|
||||
throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding is not supported. Please install \"iconv\" or \"PHP Intl\".");
|
||||
}
|
||||
|
||||
if ($convertedString === false) {
|
||||
throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding failed.");
|
||||
}
|
||||
|
||||
return $convertedString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether "iconv" can be used.
|
||||
*
|
||||
* @return bool TRUE if "iconv" is available and can be used, FALSE otherwise
|
||||
*/
|
||||
protected function canUseIconv()
|
||||
{
|
||||
return $this->globalFunctionsHelper->function_exists('iconv');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether "mb_string" functions can be used.
|
||||
* These functions come with the PHP Intl package.
|
||||
*
|
||||
* @return bool TRUE if "mb_string" functions are available and can be used, FALSE otherwise
|
||||
*/
|
||||
protected function canUseMbString()
|
||||
{
|
||||
return $this->globalFunctionsHelper->function_exists('mb_convert_encoding');
|
||||
}
|
||||
}
|
@ -203,4 +203,73 @@ class GlobalFunctionsHelper
|
||||
{
|
||||
header($string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function iconv()
|
||||
* @see iconv()
|
||||
*
|
||||
* @param string $string The string to be converted
|
||||
* @param string $sourceEncoding The encoding of the source string
|
||||
* @param string $targetEncoding The encoding the source string should be converted to
|
||||
* @return string|bool the converted string or FALSE on failure.
|
||||
*/
|
||||
public function iconv($string, $sourceEncoding, $targetEncoding)
|
||||
{
|
||||
return iconv($sourceEncoding, $targetEncoding, $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function mb_convert_encoding()
|
||||
* @see mb_convert_encoding()
|
||||
*
|
||||
* @param string $string The string to be converted
|
||||
* @param string $sourceEncoding The encoding of the source string
|
||||
* @param string $targetEncoding The encoding the source string should be converted to
|
||||
* @return string|bool the converted string or FALSE on failure.
|
||||
*/
|
||||
public function mb_convert_encoding($string, $sourceEncoding, $targetEncoding)
|
||||
{
|
||||
return mb_convert_encoding($string, $targetEncoding, $sourceEncoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function stream_get_line()
|
||||
* @see stream_get_line()
|
||||
*
|
||||
* @param resource $handle
|
||||
* @param int $length
|
||||
* @param string|void $ending
|
||||
* @return string|bool
|
||||
*/
|
||||
public function stream_get_line($handle, $length, $ending = null)
|
||||
{
|
||||
return stream_get_line($handle, $length, $ending);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function str_getcsv()
|
||||
* @see str_getcsv()
|
||||
*
|
||||
* @param string $input
|
||||
* @param string|void $delimiter
|
||||
* @param string|void $enclosure
|
||||
* @param string|void $escape
|
||||
* @return array
|
||||
*/
|
||||
public function str_getcsv($input, $delimiter = null, $enclosure = null, $escape = null)
|
||||
{
|
||||
return str_getcsv($input, $delimiter, $enclosure, $escape);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around global function function_exists()
|
||||
* @see function_exists()
|
||||
*
|
||||
* @param string $functionName
|
||||
* @return bool
|
||||
*/
|
||||
public function function_exists($functionName)
|
||||
{
|
||||
return function_exists($functionName);
|
||||
}
|
||||
}
|
||||
|
@ -1,92 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Reader\CSV\Helper;
|
||||
|
||||
/**
|
||||
* Class EncodingHelper
|
||||
* This class provides helper functions to work with encodings.
|
||||
*
|
||||
* @package Box\Spout\Reader\CSV\Helper
|
||||
*/
|
||||
class EncodingHelper
|
||||
{
|
||||
/** Definition of the encodings that can have a BOM */
|
||||
const ENCODING_UTF8 = 'UTF-8';
|
||||
const ENCODING_UTF16_LE = 'UTF-16LE';
|
||||
const ENCODING_UTF16_BE = 'UTF-16BE';
|
||||
const ENCODING_UTF32_LE = 'UTF-32LE';
|
||||
const ENCODING_UTF32_BE = 'UTF-32BE';
|
||||
|
||||
/** Definition of the BOMs for the different encodings */
|
||||
const BOM_UTF8 = "\xEF\xBB\xBF";
|
||||
const BOM_UTF16_LE = "\xFF\xFE";
|
||||
const BOM_UTF16_BE = "\xFE\xFF";
|
||||
const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
|
||||
const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
|
||||
protected $supportedEncodingsWithBom;
|
||||
|
||||
/**
|
||||
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
|
||||
*/
|
||||
public function __construct($globalFunctionsHelper)
|
||||
{
|
||||
$this->globalFunctionsHelper = $globalFunctionsHelper;
|
||||
|
||||
$this->supportedEncodingsWithBom = [
|
||||
self::ENCODING_UTF8 => self::BOM_UTF8,
|
||||
self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
|
||||
self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
|
||||
self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
|
||||
self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes to use as offset in order to skip the BOM.
|
||||
*
|
||||
* @param resource $filePointer Pointer to the file to check
|
||||
* @param string $encoding Encoding of the file to check
|
||||
* @return int Bytes offset to apply to skip the BOM (0 means no BOM)
|
||||
*/
|
||||
public function getBytesOffsetToSkipBOM($filePointer, $encoding)
|
||||
{
|
||||
$byteOffsetToSkipBom = 0;
|
||||
|
||||
if ($this->hasBom($filePointer, $encoding)) {
|
||||
$bomUsed = $this->supportedEncodingsWithBom[$encoding];
|
||||
|
||||
// we skip the N first bytes
|
||||
$byteOffsetToSkipBom = strlen($bomUsed);
|
||||
}
|
||||
|
||||
return $byteOffsetToSkipBom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the file identified by the given pointer has a BOM.
|
||||
*
|
||||
* @param resource $filePointer Pointer to the file to check
|
||||
* @param string $encoding Encoding of the file to check
|
||||
* @return bool TRUE if the file has a BOM, FALSE otherwise
|
||||
*/
|
||||
protected function hasBOM($filePointer, $encoding)
|
||||
{
|
||||
$hasBOM = false;
|
||||
|
||||
$this->globalFunctionsHelper->rewind($filePointer);
|
||||
|
||||
if (array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
|
||||
$potentialBom = $this->supportedEncodingsWithBom[$encoding];
|
||||
$numBytesInBom = strlen($potentialBom);
|
||||
|
||||
$hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
|
||||
}
|
||||
|
||||
return $hasBOM;
|
||||
}
|
||||
}
|
@ -4,6 +4,7 @@ namespace Box\Spout\Reader\CSV;
|
||||
|
||||
use Box\Spout\Reader\AbstractReader;
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Common\Helper\EncodingHelper;
|
||||
|
||||
/**
|
||||
* Class Reader
|
||||
@ -26,7 +27,7 @@ class Reader extends AbstractReader
|
||||
protected $fieldEnclosure = '"';
|
||||
|
||||
/** @var string Encoding of the CSV file to be read */
|
||||
protected $encoding = 'UTF-8';
|
||||
protected $encoding = EncodingHelper::ENCODING_UTF8;
|
||||
|
||||
/**
|
||||
* Sets the field delimiter for the CSV.
|
||||
@ -69,6 +70,7 @@ class Reader extends AbstractReader
|
||||
|
||||
/**
|
||||
* Opens the file at the given path to make it ready to be read.
|
||||
* If setEncoding() was not called, it assumes that the file is encoded in UTF-8.
|
||||
*
|
||||
* @param string $filePath Path of the CSV file to be read
|
||||
* @return void
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
namespace Box\Spout\Reader\CSV;
|
||||
|
||||
use Box\Spout\Reader\CSV\Helper\EncodingHelper;
|
||||
use Box\Spout\Reader\IteratorInterface;
|
||||
use Box\Spout\Common\Helper\EncodingHelper;
|
||||
|
||||
/**
|
||||
* Class RowIterator
|
||||
@ -37,6 +37,9 @@ class RowIterator implements IteratorInterface
|
||||
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
|
||||
protected $globalFunctionsHelper;
|
||||
|
||||
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
|
||||
protected $encodingHelper;
|
||||
|
||||
/**
|
||||
* @param resource $filePointer Pointer to the CSV file to read
|
||||
* @param string $fieldDelimiter Character that delimits fields
|
||||
@ -101,6 +104,7 @@ class RowIterator implements IteratorInterface
|
||||
* @link http://php.net/manual/en/iterator.next.php
|
||||
*
|
||||
* @return void
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
@ -109,7 +113,8 @@ class RowIterator implements IteratorInterface
|
||||
|
||||
if (!$this->hasReachedEndOfFile) {
|
||||
do {
|
||||
$lineData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
|
||||
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
|
||||
} while ($lineData === false || ($lineData !== null && $this->isEmptyLine($lineData)));
|
||||
|
||||
if ($lineData !== false && $lineData !== null) {
|
||||
@ -119,6 +124,25 @@ class RowIterator implements IteratorInterface
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next line, converted if necessary to UTF-8.
|
||||
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
|
||||
*
|
||||
* @return string The next line for the current file pointer, encoded in UTF-8
|
||||
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
|
||||
*/
|
||||
protected function getNextUTF8EncodedLine()
|
||||
{
|
||||
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
|
||||
$encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding);
|
||||
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, 0, $encodedEOLDelimiter);
|
||||
|
||||
// Once the line has been read, it can be converted to UTF-8
|
||||
$utf8EncodedLineData = $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding);
|
||||
|
||||
return $utf8EncodedLineData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $lineData Array containing the cells value for the line
|
||||
* @return bool Whether the given line is empty
|
||||
|
@ -4,6 +4,7 @@ namespace Box\Spout\Writer\CSV;
|
||||
|
||||
use Box\Spout\Writer\AbstractWriter;
|
||||
use Box\Spout\Common\Exception\IOException;
|
||||
use Box\Spout\Common\Helper\EncodingHelper;
|
||||
|
||||
/**
|
||||
* Class Writer
|
||||
@ -15,7 +16,6 @@ class Writer extends AbstractWriter
|
||||
{
|
||||
/** Number of rows to write before flushing */
|
||||
const FLUSH_THRESHOLD = 500;
|
||||
const BOM_UTF8 = "\xEF\xBB\xBF";
|
||||
|
||||
/** @var string Content-Type value for the header */
|
||||
protected static $headerContentType = 'text/csv; charset=UTF-8';
|
||||
@ -61,7 +61,7 @@ class Writer extends AbstractWriter
|
||||
protected function openWriter()
|
||||
{
|
||||
// Adds UTF-8 BOM for Unicode compatibility
|
||||
$this->globalFunctionsHelper->fputs($this->filePointer, self::BOM_UTF8);
|
||||
$this->globalFunctionsHelper->fputs($this->filePointer, EncodingHelper::BOM_UTF8);
|
||||
}
|
||||
|
||||
/**
|
||||
|
223
tests/Spout/Common/Helper/EncodingHelperTest.php
Normal file
223
tests/Spout/Common/Helper/EncodingHelperTest.php
Normal file
@ -0,0 +1,223 @@
|
||||
<?php
|
||||
|
||||
namespace Box\Spout\Common\Helper;
|
||||
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
* Class EncodingHelperTest
|
||||
*
|
||||
* @package Box\Spout\Common\Helper
|
||||
*/
|
||||
class EncodingHelperTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
use TestUsingResource;
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestGetBytesOffsetToSkipBOM()
|
||||
{
|
||||
return [
|
||||
['csv_with_utf8_bom.csv', EncodingHelper::ENCODING_UTF8, 3],
|
||||
['csv_with_utf16be_bom.csv', EncodingHelper::ENCODING_UTF16_BE, 2],
|
||||
['csv_with_utf32le_bom.csv', EncodingHelper::ENCODING_UTF32_LE, 4],
|
||||
['csv_with_encoding_utf16le_no_bom.csv', EncodingHelper::ENCODING_UTF16_LE, 0],
|
||||
['csv_standard.csv', EncodingHelper::ENCODING_UTF8, 0],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestGetBytesOffsetToSkipBOM
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param string $encoding
|
||||
* @param int $expectedBytesOffset
|
||||
* @return void
|
||||
*/
|
||||
public function testGetBytesOffsetToSkipBOM($fileName, $encoding, $expectedBytesOffset)
|
||||
{
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
$filePointer = fopen($resourcePath, 'r');
|
||||
|
||||
$encodingHelper = new EncodingHelper(new GlobalFunctionsHelper());
|
||||
$bytesOffset = $encodingHelper->getBytesOffsetToSkipBOM($filePointer, $encoding);
|
||||
|
||||
$this->assertEquals($expectedBytesOffset, $bytesOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForIconvOrMbstringUsage()
|
||||
{
|
||||
return [
|
||||
[$shouldUseIconv = true],
|
||||
[$shouldNotUseIconv = false],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForIconvOrMbstringUsage
|
||||
* @expectedException \Box\Spout\Common\Exception\EncodingConversionException
|
||||
*
|
||||
* @param bool $shouldUseIconv
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionToUTF8ShouldThrowIfConversionFailed($shouldUseIconv)
|
||||
{
|
||||
$helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper')
|
||||
->setMethods(['iconv', 'mb_convert_encoding'])
|
||||
->getMock();
|
||||
$helperStub->method('iconv')->willReturn(false);
|
||||
$helperStub->method('mb_convert_encoding')->willReturn(false);
|
||||
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->setConstructorArgs([$helperStub])
|
||||
->setMethods(['canUseIconv', 'canUseMbString'])
|
||||
->getMock();
|
||||
$encodingHelperStub->method('canUseIconv')->willReturn($shouldUseIconv);
|
||||
$encodingHelperStub->method('canUseMbString')->willReturn(true);
|
||||
|
||||
$encodingHelperStub->attemptConversionToUTF8('input', EncodingHelper::ENCODING_UTF16_LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Common\Exception\EncodingConversionException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionToUTF8ShouldThrowIfConversionNotSupported()
|
||||
{
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->disableOriginalConstructor()
|
||||
->setMethods(['canUseIconv', 'canUseMbString'])
|
||||
->getMock();
|
||||
$encodingHelperStub->method('canUseIconv')->willReturn(false);
|
||||
$encodingHelperStub->method('canUseMbString')->willReturn(false);
|
||||
|
||||
$encodingHelperStub->attemptConversionToUTF8('input', EncodingHelper::ENCODING_UTF16_LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForIconvOrMbstringUsage
|
||||
*
|
||||
* @param bool $shouldUseIconv
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionToUTF8ShouldReturnReencodedString($shouldUseIconv)
|
||||
{
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->setConstructorArgs([new GlobalFunctionsHelper()])
|
||||
->setMethods(['canUseIconv', 'canUseMbString'])
|
||||
->getMock();
|
||||
$encodingHelperStub->method('canUseIconv')->willReturn($shouldUseIconv);
|
||||
$encodingHelperStub->method('canUseMbString')->willReturn(true);
|
||||
|
||||
$encodedString = iconv(EncodingHelper::ENCODING_UTF8, EncodingHelper::ENCODING_UTF16_LE, 'input');
|
||||
$decodedString = $encodingHelperStub->attemptConversionToUTF8($encodedString, EncodingHelper::ENCODING_UTF16_LE);
|
||||
|
||||
$this->assertEquals('input', $decodedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionToUTF8ShouldBeNoopWhenTargetIsUTF8()
|
||||
{
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->disableOriginalConstructor()
|
||||
->setMethods(['canUseIconv'])
|
||||
->getMock();
|
||||
$encodingHelperStub->expects($this->never())->method('canUseIconv');
|
||||
|
||||
$decodedString = $encodingHelperStub->attemptConversionToUTF8('input', EncodingHelper::ENCODING_UTF8);
|
||||
$this->assertEquals('input', $decodedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForIconvOrMbstringUsage
|
||||
* @expectedException \Box\Spout\Common\Exception\EncodingConversionException
|
||||
*
|
||||
* @param bool $shouldUseIconv
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionFromUTF8ShouldThrowIfConversionFailed($shouldUseIconv)
|
||||
{
|
||||
$helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper')
|
||||
->setMethods(['iconv', 'mb_convert_encoding'])
|
||||
->getMock();
|
||||
$helperStub->method('iconv')->willReturn(false);
|
||||
$helperStub->method('mb_convert_encoding')->willReturn(false);
|
||||
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->setConstructorArgs([$helperStub])
|
||||
->setMethods(['canUseIconv', 'canUseMbString'])
|
||||
->getMock();
|
||||
$encodingHelperStub->method('canUseIconv')->willReturn($shouldUseIconv);
|
||||
$encodingHelperStub->method('canUseMbString')->willReturn(true);
|
||||
|
||||
$encodingHelperStub->attemptConversionFromUTF8('input', EncodingHelper::ENCODING_UTF16_LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Box\Spout\Common\Exception\EncodingConversionException
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionFromUTF8ShouldThrowIfConversionNotSupported()
|
||||
{
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->disableOriginalConstructor()
|
||||
->setMethods(['canUseIconv', 'canUseMbString'])
|
||||
->getMock();
|
||||
$encodingHelperStub->method('canUseIconv')->willReturn(false);
|
||||
$encodingHelperStub->method('canUseMbString')->willReturn(false);
|
||||
|
||||
$encodingHelperStub->attemptConversionFromUTF8('input', EncodingHelper::ENCODING_UTF16_LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForIconvOrMbstringUsage
|
||||
*
|
||||
* @param bool $shouldUseIconv
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionFromUTF8ShouldReturnReencodedString($shouldUseIconv)
|
||||
{
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->setConstructorArgs([new GlobalFunctionsHelper()])
|
||||
->setMethods(['canUseIconv', 'canUseMbString'])
|
||||
->getMock();
|
||||
$encodingHelperStub->method('canUseIconv')->willReturn($shouldUseIconv);
|
||||
$encodingHelperStub->method('canUseMbString')->willReturn(true);
|
||||
|
||||
$encodedString = $encodingHelperStub->attemptConversionFromUTF8('input', EncodingHelper::ENCODING_UTF16_LE);
|
||||
$encodedStringWithIconv = iconv(EncodingHelper::ENCODING_UTF8, EncodingHelper::ENCODING_UTF16_LE, 'input');
|
||||
|
||||
$this->assertEquals($encodedStringWithIconv, $encodedString);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return void
|
||||
*/
|
||||
public function testAttemptConversionFromUTF8ShouldBeNoopWhenTargetIsUTF8()
|
||||
{
|
||||
/** @var EncodingHelper $encodingHelperStub */
|
||||
$encodingHelperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\EncodingHelper')
|
||||
->disableOriginalConstructor()
|
||||
->setMethods(['canUseIconv'])
|
||||
->getMock();
|
||||
$encodingHelperStub->expects($this->never())->method('canUseIconv');
|
||||
|
||||
$encodedString = $encodingHelperStub->attemptConversionFromUTF8('input', EncodingHelper::ENCODING_UTF8);
|
||||
$this->assertEquals('input', $encodedString);
|
||||
}
|
||||
}
|
@ -2,8 +2,9 @@
|
||||
|
||||
namespace Box\Spout\Reader\CSV;
|
||||
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Reader\ReaderFactory;
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Common\Helper\EncodingHelper;
|
||||
use Box\Spout\TestUsingResource;
|
||||
|
||||
/**
|
||||
@ -167,15 +168,96 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadShouldSkipBom()
|
||||
{
|
||||
return [
|
||||
['csv_with_utf8_bom.csv', EncodingHelper::ENCODING_UTF8],
|
||||
['csv_with_utf16le_bom.csv', EncodingHelper::ENCODING_UTF16_LE],
|
||||
['csv_with_utf16be_bom.csv', EncodingHelper::ENCODING_UTF16_BE],
|
||||
['csv_with_utf32le_bom.csv', EncodingHelper::ENCODING_UTF32_LE],
|
||||
['csv_with_utf32be_bom.csv', EncodingHelper::ENCODING_UTF32_BE],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldSkipBom
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param string $fileEncoding
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSkipUtf8Bom()
|
||||
public function testReadShouldSkipBom($fileName, $fileEncoding)
|
||||
{
|
||||
$allRows = $this->getAllRowsForFile('csv_with_utf8_bom.csv');
|
||||
$allRows = $this->getAllRowsForFile($fileName, ',', '"', $fileEncoding);
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function dataProviderForTestReadShouldSupportNonUTF8FilesWithoutBOMs()
|
||||
{
|
||||
$shouldUseIconv = true;
|
||||
$shouldNotUseIconv = false;
|
||||
|
||||
return [
|
||||
['csv_with_encoding_utf16le_no_bom.csv', EncodingHelper::ENCODING_UTF16_LE, $shouldUseIconv],
|
||||
['csv_with_encoding_utf16le_no_bom.csv', EncodingHelper::ENCODING_UTF16_LE, $shouldNotUseIconv],
|
||||
['csv_with_encoding_cp1252.csv', 'CP1252', $shouldUseIconv],
|
||||
['csv_with_encoding_cp1252.csv', 'CP1252', $shouldNotUseIconv],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForTestReadShouldSupportNonUTF8FilesWithoutBOMs
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param string $fileEncoding
|
||||
* @param bool $shouldUseIconv
|
||||
* @return void
|
||||
*/
|
||||
public function testReadShouldSupportNonUTF8FilesWithoutBOMs($fileName, $fileEncoding, $shouldUseIconv)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
$helperStub = $this->getMockBuilder('\Box\Spout\Common\Helper\GlobalFunctionsHelper')
|
||||
->setMethods(['function_exists'])
|
||||
->getMock();
|
||||
|
||||
$returnValueMap = [
|
||||
['iconv', $shouldUseIconv],
|
||||
['mb_convert_encoding', true],
|
||||
];
|
||||
$helperStub->method('function_exists')->will($this->returnValueMap($returnValueMap));
|
||||
|
||||
/** @var \Box\Spout\Reader\CSV\Reader $reader */
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader
|
||||
->setGlobalFunctionsHelper($helperStub)
|
||||
->setEncoding($fileEncoding)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheet) {
|
||||
foreach ($sheet->getRowIterator() as $row) {
|
||||
$allRows[] = $row;
|
||||
}
|
||||
}
|
||||
|
||||
$reader->close();
|
||||
|
||||
$expectedRows = [
|
||||
['csv--11', 'csv--12', 'csv--13'],
|
||||
['csv--21', 'csv--22', 'csv--23'],
|
||||
['csv--31', 'csv--32', 'csv--33'],
|
||||
];
|
||||
$this->assertEquals($expectedRows, $allRows);
|
||||
}
|
||||
@ -228,18 +310,25 @@ class ReaderTest extends \PHPUnit_Framework_TestCase
|
||||
* @param string $fileName
|
||||
* @param string|void $fieldDelimiter
|
||||
* @param string|void $fieldEnclosure
|
||||
* @param string|void $encoding
|
||||
* @return array All the read rows the given file
|
||||
*/
|
||||
private function getAllRowsForFile($fileName, $fieldDelimiter = ",", $fieldEnclosure = '"')
|
||||
private function getAllRowsForFile(
|
||||
$fileName,
|
||||
$fieldDelimiter = ',',
|
||||
$fieldEnclosure = '"',
|
||||
$encoding = EncodingHelper::ENCODING_UTF8)
|
||||
{
|
||||
$allRows = [];
|
||||
$resourcePath = $this->getResourcePath($fileName);
|
||||
|
||||
/** @var \Box\Spout\Reader\CSV\Reader $reader */
|
||||
$reader = ReaderFactory::create(Type::CSV);
|
||||
$reader->setFieldDelimiter($fieldDelimiter);
|
||||
$reader->setFieldEnclosure($fieldEnclosure);
|
||||
|
||||
$reader->open($resourcePath);
|
||||
$reader
|
||||
->setFieldDelimiter($fieldDelimiter)
|
||||
->setFieldEnclosure($fieldEnclosure)
|
||||
->setEncoding($encoding)
|
||||
->open($resourcePath);
|
||||
|
||||
foreach ($reader->getSheetIterator() as $sheetIndex => $sheet) {
|
||||
foreach ($sheet->getRowIterator() as $rowIndex => $row) {
|
||||
|
@ -2,8 +2,9 @@
|
||||
|
||||
namespace Box\Spout\Writer\CSV;
|
||||
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\TestUsingResource;
|
||||
use Box\Spout\Common\Type;
|
||||
use Box\Spout\Common\Helper\EncodingHelper;
|
||||
use Box\Spout\Writer\WriterFactory;
|
||||
|
||||
/**
|
||||
@ -70,7 +71,7 @@ class WriterTest extends \PHPUnit_Framework_TestCase
|
||||
];
|
||||
$writtenContent = $this->writeToCsvFileAndReturnWrittenContent($allRows, 'csv_with_utf8_bom.csv');
|
||||
|
||||
$this->assertContains(Writer::BOM_UTF8, $writtenContent, 'The CSV file should contain a UTF-8 BOM');
|
||||
$this->assertContains(EncodingHelper::BOM_UTF8, $writtenContent, 'The CSV file should contain a UTF-8 BOM');
|
||||
}
|
||||
|
||||
/**
|
||||
@ -162,6 +163,6 @@ class WriterTest extends \PHPUnit_Framework_TestCase
|
||||
private function trimWrittenContent($writtenContent)
|
||||
{
|
||||
// remove line feeds and UTF-8 BOM
|
||||
return trim($writtenContent, PHP_EOL . Writer::BOM_UTF8);
|
||||
return trim($writtenContent, PHP_EOL . EncodingHelper::BOM_UTF8);
|
||||
}
|
||||
}
|
||||
|
3
tests/resources/csv/csv_with_encoding_cp1252.csv
Normal file
3
tests/resources/csv/csv_with_encoding_cp1252.csv
Normal file
@ -0,0 +1,3 @@
|
||||
csv--11,csv--12,csv--13
|
||||
csv--21,csv--22,csv--23
|
||||
csv--31,csv--32,csv--33
|
|
BIN
tests/resources/csv/csv_with_encoding_utf16le_no_bom.csv
Normal file
BIN
tests/resources/csv/csv_with_encoding_utf16le_no_bom.csv
Normal file
Binary file not shown.
|
BIN
tests/resources/csv/csv_with_utf16be_bom.csv
Normal file
BIN
tests/resources/csv/csv_with_utf16be_bom.csv
Normal file
Binary file not shown.
|
BIN
tests/resources/csv/csv_with_utf16le_bom.csv
Normal file
BIN
tests/resources/csv/csv_with_utf16le_bom.csv
Normal file
Binary file not shown.
|
BIN
tests/resources/csv/csv_with_utf32be_bom.csv
Normal file
BIN
tests/resources/csv/csv_with_utf32be_bom.csv
Normal file
Binary file not shown.
|
BIN
tests/resources/csv/csv_with_utf32le_bom.csv
Normal file
BIN
tests/resources/csv/csv_with_utf32le_bom.csv
Normal file
Binary file not shown.
|
@ -1,2 +1,3 @@
|
||||
csv--11,csv--12,csv--13
|
||||
csv--21,csv--22,csv--23
|
||||
csv--21,csv--22,csv--23
|
||||
csv--31,csv--32,csv--33
|
|
Loading…
x
Reference in New Issue
Block a user