From 03e85ffc21c15ed22096423e8a285741f064a12f Mon Sep 17 00:00:00 2001 From: Sebastian Fichera Date: Thu, 11 Feb 2016 17:12:54 -0600 Subject: [PATCH 1/4] Added EOL configuration support while reading CSV files... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhancement for #172 issue… --- src/Spout/Reader/CSV/Reader.php | 17 +++++++++++++++++ src/Spout/Reader/CSV/RowIterator.php | 8 ++++++-- src/Spout/Reader/CSV/Sheet.php | 4 ++-- src/Spout/Reader/CSV/SheetIterator.php | 4 ++-- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index 45a13ef..7fc5da8 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -29,6 +29,9 @@ class Reader extends AbstractReader /** @var string Encoding of the CSV file to be read */ protected $encoding = EncodingHelper::ENCODING_UTF8; + /** @var string Defines the End of line */ + protected $endOfLineCharacter = "\n"; + /** * Sets the field delimiter for the CSV. * Needs to be called before opening the reader. @@ -68,6 +71,19 @@ class Reader extends AbstractReader return $this; } + /** + * Sets the EOL for the CSV. + * Needs to be called before opening the reader. + * + * @param string $fieldEnclosure Character that enclose fields + * @return Reader + */ + public function setEndOfLineCharacter($endOfLineCharacter) + { + $this->endOfLineCharacter = $endOfLineCharacter; + return $this; + } + /** * Opens the file at the given path to make it ready to be read. * If setEncoding() was not called, it assumes that the file is encoded in UTF-8. @@ -88,6 +104,7 @@ class Reader extends AbstractReader $this->fieldDelimiter, $this->fieldEnclosure, $this->encoding, + $this->endOfLineCharacter, $this->globalFunctionsHelper ); } diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index f8e33e1..0752d27 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -49,6 +49,9 @@ class RowIterator implements IteratorInterface /** @var string End of line delimiter, encoded using the same encoding as the CSV */ protected $encodedEOLDelimiter; + /** @var string End of line delimiter, given by the user as input. */ + protected $inputEOLDelimiter; + /** * @param resource $filePointer Pointer to the CSV file to read * @param string $fieldDelimiter Character that delimits fields @@ -56,12 +59,13 @@ class RowIterator implements IteratorInterface * @param string $encoding Encoding of the CSV file to be read * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $EOLDelimiter, $globalFunctionsHelper) { $this->filePointer = $filePointer; $this->fieldDelimiter = $fieldDelimiter; $this->fieldEnclosure = $fieldEnclosure; $this->encoding = $encoding; + $this->inputEOLDelimiter = $EOLDelimiter; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->encodingHelper = new EncodingHelper($globalFunctionsHelper); @@ -172,7 +176,7 @@ class RowIterator implements IteratorInterface protected function getEncodedEOLDelimiter() { if (!isset($this->encodedEOLDelimiter)) { - $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8("\n", $this->encoding); + $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding); } return $this->encodedEOLDelimiter; diff --git a/src/Spout/Reader/CSV/Sheet.php b/src/Spout/Reader/CSV/Sheet.php index f949a62..b9c66c7 100644 --- a/src/Spout/Reader/CSV/Sheet.php +++ b/src/Spout/Reader/CSV/Sheet.php @@ -21,9 +21,9 @@ class Sheet implements SheetInterface * @param string $encoding Encoding of the CSV file to be read * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) { - $this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $globalFunctionsHelper); + $this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); } /** diff --git a/src/Spout/Reader/CSV/SheetIterator.php b/src/Spout/Reader/CSV/SheetIterator.php index 4ce0d54..8ee2e99 100644 --- a/src/Spout/Reader/CSV/SheetIterator.php +++ b/src/Spout/Reader/CSV/SheetIterator.php @@ -25,9 +25,9 @@ class SheetIterator implements IteratorInterface * @param string $encoding Encoding of the CSV file to be read * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) { - $this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $globalFunctionsHelper); + $this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); } /** From 4827e56cac7077f3acdf83e7454e8b322e2823ef Mon Sep 17 00:00:00 2001 From: Sebastian Fichera Date: Thu, 11 Feb 2016 17:15:48 -0600 Subject: [PATCH 2/4] Added new public function usage to docs... --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c1e89b9..6bb73aa 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ use Box\Spout\Common\Type; $reader = ReaderFactory::create(Type::CSV); $reader->setFieldDelimiter('|'); $reader->setFieldEnclosure('@'); +$reader->setEndOfLineCharacter("\r"); ``` Additionally, if you need to read non UTF-8 files, you can specify the encoding of your file this way: From 8614f79da36dd79057c127d21435e2eddef473df Mon Sep 17 00:00:00 2001 From: Sebastian Fichera Date: Thu, 11 Feb 2016 17:51:24 -0600 Subject: [PATCH 3/4] Minor fixes in order to be ok with naming conventions and code documentation... --- src/Spout/Reader/CSV/Reader.php | 2 +- src/Spout/Reader/CSV/RowIterator.php | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index 7fc5da8..af02def 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -75,7 +75,7 @@ class Reader extends AbstractReader * Sets the EOL for the CSV. * Needs to be called before opening the reader. * - * @param string $fieldEnclosure Character that enclose fields + * @param string $endOfLineCharacter used to properly get lines from the CSV file. * @return Reader */ public function setEndOfLineCharacter($endOfLineCharacter) diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 0752d27..42bdba4 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -59,13 +59,13 @@ class RowIterator implements IteratorInterface * @param string $encoding Encoding of the CSV file to be read * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $EOLDelimiter, $globalFunctionsHelper) + public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper) { $this->filePointer = $filePointer; $this->fieldDelimiter = $fieldDelimiter; $this->fieldEnclosure = $fieldEnclosure; $this->encoding = $encoding; - $this->inputEOLDelimiter = $EOLDelimiter; + $this->inputEOLDelimiter = $endOfLineDelimiter; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->encodingHelper = new EncodingHelper($globalFunctionsHelper); From 86e26632f6b82e5ce418a3b01aa504f011e1f402 Mon Sep 17 00:00:00 2001 From: Sebastian Fichera Date: Fri, 12 Feb 2016 16:30:18 -0600 Subject: [PATCH 4/4] Added test case for custom EOL characters... --- tests/Spout/Reader/CSV/ReaderTest.php | 46 +++++++++++++++++++++++++ tests/resources/csv/csv_with_CR_EOL.csv | 1 + 2 files changed, 47 insertions(+) create mode 100644 tests/resources/csv/csv_with_CR_EOL.csv diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index 00a0f2c..c4c00ba 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -377,4 +377,50 @@ class ReaderTest extends \PHPUnit_Framework_TestCase return $allRows; } + + /** + * @return array + */ + public function dataProviderForTestReadCustomEOL() + { + return [ + ['csv_with_CR_EOL.csv', "\r"], + ['csv_standard.csv', "\n"], + ]; + } + + /** + * @dataProvider dataProviderForTestReadCustomEOL + * + * @param string $fileName + * @param string $customEOL + * @return void + */ + public function testReadCustomEOLs($fileName, $customEOL) + { + $allRows = []; + $resourcePath = $this->getResourcePath($fileName); + + /** @var \Box\Spout\Reader\CSV\Reader $reader */ + $reader = ReaderFactory::create(Type::CSV); + $reader + ->setEndOfLineCharacter($customEOL) + ->open($resourcePath); + + foreach ($reader->getSheetIterator() as $sheet) { + foreach ($sheet->getRowIterator() as $row) { + $allRows[] = $row; + } + } + + $reader->close(); + + $expectedRows = [ + ['csv--11', 'csv--12', 'csv--13'], + ['csv--21', 'csv--22', 'csv--23'], + ['csv--31', 'csv--32', 'csv--33'], + ]; + $this->assertEquals($expectedRows, $allRows); + } + } diff --git a/tests/resources/csv/csv_with_CR_EOL.csv b/tests/resources/csv/csv_with_CR_EOL.csv new file mode 100644 index 0000000..004f328 --- /dev/null +++ b/tests/resources/csv/csv_with_CR_EOL.csv @@ -0,0 +1 @@ +csv--11,csv--12,csv--13 csv--21,csv--22,csv--23 csv--31,csv--32,csv--33 \ No newline at end of file