[BUGFIX] CsvUtility method csvToArray does not handle enclosures right 11/41211/4
authorPatrick Broens <patrick@patrickbroens.nl>
Tue, 14 Jul 2015 14:24:07 +0000 (16:24 +0200)
committerPatrick Broens <patrick@patrickbroens.nl>
Tue, 14 Jul 2015 20:17:19 +0000 (22:17 +0200)
When we have a csv string with enclosures, the string is sometimes not
correctly transformed into a multidimensional array. The cause of this
is the first str_getcsv() function, responsible for exploding the rows.
It returns rows where the first column does not have an enclosure
anymore.

This patch fixes this by using fgetcsv(). A temporary file is created
and written to the typo3temp folder and then passed to fgetcsv(). This
way we get a correct multidimensional array from the csv string.

Change-Id: I042c1ab326ef2b5f5cdd2b1455da76ccefd5ae36
Resolves: #68127
Releases: master
Reviewed-on: http://review.typo3.org/41211
Reviewed-by: Sascha Egerer <sascha@sascha-egerer.de>
Tested-by: Sascha Egerer <sascha@sascha-egerer.de>
Reviewed-by: Christian Kuhn <lolli@schwarzbu.ch>
Tested-by: Christian Kuhn <lolli@schwarzbu.ch>
Reviewed-by: Patrick Broens <patrick@patrickbroens.nl>
Tested-by: Patrick Broens <patrick@patrickbroens.nl>
typo3/sysext/core/Classes/Utility/CsvUtility.php
typo3/sysext/core/Tests/Unit/Utility/CsvUtilityTest.php
typo3/sysext/frontend/Classes/DataProcessing/CommaSeparatedValueProcessor.php

index a744c05..fcf5a99 100644 (file)
@@ -22,26 +22,26 @@ class CsvUtility {
        /**
         * Convert a string, formatted as CSV, into an multidimensional array
         *
+        * This cannot be done by str_getcsv, since it's impossible to handle enclosed cells with a line feed in it
+        *
         * @param string $input The CSV input
         * @param string $fieldDelimiter The field delimiter
         * @param string $fieldEnclosure The field enclosure
-        * @param string $rowDelimiter The row delimiter
         * @param int $maximumColumns The maximum amount of columns
         * @return array
         */
-       static public function csvToArray($input, $fieldDelimiter = ',', $fieldEnclosure = '"', $rowDelimiter = LF, $maximumColumns = 0) {
+       static public function csvToArray($input, $fieldDelimiter = ',', $fieldEnclosure = '"', $maximumColumns = 0) {
                $multiArray = array();
                $maximumCellCount = 0;
 
-               // explode() would not work with enclosed newlines
-               $rows = str_getcsv($input, $rowDelimiter);
-
-               foreach ($rows as $row) {
-                       $cells = str_getcsv($row, $fieldDelimiter, $fieldEnclosure);
-
-                       $maximumCellCount = max(count($cells), $maximumCellCount);
-
-                       $multiArray[] = $cells;
+               if (($handle = fopen('php://memory', 'r+')) !== FALSE) {
+                       fwrite($handle, $input);
+                       rewind($handle);
+                       while (($cells = fgetcsv($handle, 0, $fieldDelimiter, $fieldEnclosure)) !== FALSE) {
+                               $maximumCellCount = max(count($cells), $maximumCellCount);
+                               $multiArray[] = $cells;
+                       }
+                       fclose($handle);
                }
 
                if ($maximumColumns > $maximumCellCount) {
@@ -66,4 +66,4 @@ class CsvUtility {
 
                return $multiArray;
        }
-}
\ No newline at end of file
+}
index 094e7c3..9ef3b4e 100644 (file)
@@ -29,8 +29,7 @@ class CsvUtilityTest extends UnitTestCase {
                        'Valid data' => array(
                                'input'  => 'Column A, Column B, Column C' . LF . 'Value, Value2, Value 3',
                                'fieldDelimiter' => ',',
-                               'fieldEnclosure' => '',
-                               'rowDelimiter' => LF,
+                               'fieldEnclosure' => '"',
                                'maximumColumns' => 0,
                                'expectedResult' => array(
                                        array('Column A', ' Column B', ' Column C'),
@@ -42,7 +41,6 @@ class CsvUtilityTest extends UnitTestCase {
                                'input'  => '"Column A", "Column B", "Column C"' . LF . '"Value", "Value2", "Value 3"',
                                'fieldDelimiter' => ',',
                                'fieldEnclosure' => '"',
-                               'rowDelimiter' => LF,
                                'maximumColumns' => 0,
                                'expectedResult' => array(
                                        array('Column A', 'Column B', 'Column C'),
@@ -54,7 +52,6 @@ class CsvUtilityTest extends UnitTestCase {
                                'input'  => '"Column A"; "Column B"; "Column C"' . LF . '"Value"; "Value2"; "Value 3"',
                                'fieldDelimiter' => ';',
                                'fieldEnclosure' => '"',
-                               'rowDelimiter' => LF,
                                'maximumColumns' => 0,
                                'expectedResult' => array(
                                        array('Column A', 'Column B', 'Column C'),
@@ -66,7 +63,6 @@ class CsvUtilityTest extends UnitTestCase {
                                'input'  => '"Column A"; "Column B"; "Column C"; "Column D"' . LF . '"Value"; "Value2"; "Value 3"',
                                'fieldDelimiter' => ';',
                                'fieldEnclosure' => '"',
-                               'rowDelimiter' => LF,
                                'maximumColumns' => 2,
                                'expectedResult' => array(
                                        array('Column A', 'Column B'),
@@ -78,24 +74,11 @@ class CsvUtilityTest extends UnitTestCase {
                                'input'  => '"Column A", "Column B", "Column C"' . LF . '"Value", "Value2", "Value 3"',
                                'fieldDelimiter' => ';',
                                'fieldEnclosure' => '"',
-                               'rowDelimiter' => LF,
                                'maximumColumns' => 0,
                                'expectedResult' => array(
                                        array('Column A, "Column B", "Column C"'),
                                        array('Value, "Value2", "Value 3"')
                                )
-                       ),
-
-                       'Data with comma as field delimiter and semicolons as row delimiter' => array(
-                               'input'  => '"Column A", "Column B", "Column C";"Value", "Value2", "Value 3"',
-                               'fieldDelimiter' => ',',
-                               'fieldEnclosure' => '"',
-                               'rowDelimiter' => ';',
-                               'maximumColumns' => 0,
-                               'expectedResult' => array(
-                                       array('Column A', 'Column B', 'Column C'),
-                                       array('Value', 'Value2', 'Value 3')
-                               )
                        )
                );
        }
@@ -104,7 +87,7 @@ class CsvUtilityTest extends UnitTestCase {
         * @dataProvider csvToArrayDataProvider
         * @test
         */
-       public function csvToArraySplitsAsExpected($input, $fieldDelimiter, $fieldEnclosure, $rowDelimiter, $maximumColumns, $expectedResult) {
-               $this->assertEquals($expectedResult, CsvUtility::csvToArray($input, $fieldDelimiter, $fieldEnclosure, $rowDelimiter, $maximumColumns));
+       public function csvToArraySplitsAsExpected($input, $fieldDelimiter, $fieldEnclosure, $maximumColumns, $expectedResult) {
+               $this->assertEquals($expectedResult, CsvUtility::csvToArray($input, $fieldDelimiter, $fieldEnclosure, $maximumColumns));
        }
 }
\ No newline at end of file
index 3fe236e..ebdaaf3 100644 (file)
@@ -91,14 +91,10 @@ class CommaSeparatedValueProcessor implements DataProcessorInterface {
                // Set the field enclosure which is " by default
                $fieldEnclosure = $cObj->stdWrapValue('fieldEnclosure', $processorConfiguration, '"');
 
-               // Set the row delimiter which is "LF" by default
-               $rowDelimiter = $cObj->stdWrapValue('rowDelimiter', $processorConfiguration, LF);
-
                $processedData[$targetVariableName] = CsvUtility::csvToArray(
                        $originalValue,
                        $fieldDelimiter,
                        $fieldEnclosure,
-                       $rowDelimiter,
                        (int)$maximumColumns
                );