[BUGFIX] GeneralUtility::getUrl() fails to process redirects sometimes 06/34306/2
authorDmitry Dulepov <dmitry@typo3.org>
Fri, 22 Feb 2013 09:16:34 +0000 (13:16 +0400)
committerHelmut Hummel <helmut.hummel@typo3.org>
Wed, 3 Dec 2014 15:56:34 +0000 (16:56 +0100)
If PHP's open_basedir is active, CURLOPT_FOLLOWLOCATION will be disabled
by PHP. In such case any redirects will not work. This causes problems
with extensions that fetch pages of the site and hit redirects. For
example, if the site uses http and parts of it use https, extensions
like "pagepath" or "solr" will not be able to work with "secondary"
protocol.

Change-Id: I2d9e50d2d00e4c8b0c930a929be7884fd0215c21
Resolves: #45741
Releases: 6.2, master
Reviewed-on: http://review.typo3.org/34306
Reviewed-by: Helmut Hummel <helmut.hummel@typo3.org>
Tested-by: Helmut Hummel <helmut.hummel@typo3.org>
typo3/sysext/core/Classes/Utility/GeneralUtility.php
typo3/sysext/core/Tests/Unit/Utility/Fixtures/GeneralUtilityFixture.php
typo3/sysext/core/Tests/Unit/Utility/GeneralUtilityTest.php

index dfb0d71..e7c5219 100755 (executable)
@@ -2463,14 +2463,17 @@ class GeneralUtility {
                                }
                                return FALSE;
                        }
+
+                       $followLocationSucceeded = @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
+
                        curl_setopt($ch, CURLOPT_URL, $url);
-                       curl_setopt($ch, CURLOPT_HEADER, $includeHeader ? 1 : 0);
+                       curl_setopt($ch, CURLOPT_HEADER, !$followLocationSucceeded || $includeHeader ? 1 : 0);
                        curl_setopt($ch, CURLOPT_NOBODY, $includeHeader == 2 ? 1 : 0);
                        curl_setopt($ch, CURLOPT_HTTPGET, $includeHeader == 2 ? 'HEAD' : 'GET');
                        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
                        curl_setopt($ch, CURLOPT_FAILONERROR, 1);
                        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, max(0, (int)$GLOBALS['TYPO3_CONF_VARS']['SYS']['curlTimeout']));
-                       $followLocation = @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
+
                        if (is_array($requestHeaders)) {
                                curl_setopt($ch, CURLOPT_HTTPHEADER, $requestHeaders);
                        }
@@ -2488,21 +2491,42 @@ class GeneralUtility {
                                }
                        }
                        $content = curl_exec($ch);
+                       $curlInfo = curl_getinfo($ch);
+
+                       if (!$followLocationSucceeded) {
+                               // Check if we need to do redirects
+                               if ($curlInfo['http_code'] >= 300 && $curlInfo['http_code'] < 400) {
+                                       $locationUrl = $curlInfo['redirect_url'];
+                                       if (!$locationUrl) {
+                                               // Some curllib versions do not return redirect_url. Examine headers.
+                                               $locationUrl = self::getRedirectUrlFromHttpHeaders($content);
+                                       }
+                                       if ($locationUrl) {
+                                               $content = self::getUrl($locationUrl, $includeHeader, $requestHeaders, $report);
+                                               $followLocationSucceeded = TRUE;
+                                       } else {
+                                               // Failure: we got a redirection status code but not the URL to redirect to.
+                                               $content = FALSE;
+                                       }
+                               }
+                               if ($content && !$includeHeader) {
+                                       $content = self::stripHttpHeaders($content);
+                               }
+                       }
+
                        if (isset($report)) {
-                               if ($content === FALSE) {
+                               if (!$followLocationSucceeded && $curlInfo['http_code'] >= 300 && $curlInfo['http_code'] < 400) {
+                                       $report['http_code'] = $curlInfo['http_code'];
+                                       $report['content_type'] = $curlInfo['content_type'];
+                                       $report['error'] = CURLE_GOT_NOTHING;
+                                       $report['message'] = 'Expected "Location" header but got nothing.';
+                               } elseif ($content === FALSE) {
                                        $report['error'] = curl_errno($ch);
                                        $report['message'] = curl_error($ch);
-                               } else {
-                                       $curlInfo = curl_getinfo($ch);
-                                       // We hit a redirection but we couldn't follow it
-                                       if (!$followLocation && $curlInfo['status'] >= 300 && $curlInfo['status'] < 400) {
-                                               $report['error'] = -1;
-                                               $report['message'] = 'Couldn\'t follow location redirect (PHP configuration option open_basedir is in effect).';
-                                       } elseif ($includeHeader) {
-                                               // Set only for $includeHeader to work exactly like PHP variant
-                                               $report['http_code'] = $curlInfo['http_code'];
-                                               $report['content_type'] = $curlInfo['content_type'];
-                                       }
+                               } elseif ($includeHeader) {
+                                       // Set only for $includeHeader to work exactly like PHP variant
+                                       $report['http_code'] = $curlInfo['http_code'];
+                                       $report['content_type'] = $curlInfo['content_type'];
                                }
                        }
                        curl_close($ch);
@@ -2603,6 +2627,45 @@ Connection: close
        }
 
        /**
+        * Parses HTTP headers and returns the content of the "Location" header
+        * or the empty string if no such header found.
+        *
+        * @param string $content
+        * @return string
+        */
+       static protected function getRedirectUrlFromHttpHeaders($content) {
+               $result = '';
+               $headers = explode("\r\n", $content);
+               foreach ($headers as $header) {
+                       if ($header == '') {
+                               break;
+                       }
+                       if (preg_match('/^\s*Location\s*:/i', $header)) {
+                               list(, $result) = self::trimExplode(':', $header, FALSE, 2);
+                               if ($result) {
+                                       $result = self::locationHeaderUrl($result);
+                               }
+                               break;
+                       }
+               }
+               return $result;
+       }
+
+       /**
+        * Strips HTTP headers from the content.
+        *
+        * @param string $content
+        * @return string
+        */
+       static protected function stripHttpHeaders($content) {
+               $headersEndPos = strpos($content, "\r\n\r\n");
+               if ($headersEndPos) {
+                       $content = substr($content, $headersEndPos + 4);
+               }
+               return $content;
+       }
+
+       /**
         * Writes $content to the file $file
         *
         * @param string $file Filepath to write to
@@ -5320,4 +5383,4 @@ Connection: close
        static public function isRunningOnCgiServerApi() {
                return in_array(PHP_SAPI, self::$supportedCgiServerApis, TRUE);
        }
-}
\ No newline at end of file
+}
index 79132fc..5ac6604 100644 (file)
@@ -53,5 +53,24 @@ class GeneralUtilityFixture extends GeneralUtility {
                return FALSE;
        }
 
+       /**
+        * Parses HTTP headers and returns the content of the "Location" header
+        * or the empty string if no such header found.
+        *
+        * @param string $content
+        * @return string
+        */
+       static public function getRedirectUrlFromHttpHeaders($content) {
+               return parent::getRedirectUrlFromHttpHeaders($content);
+       }
 
-}
\ No newline at end of file
+       /**
+        * Strips HTTP headers from the content.
+        *
+        * @param string $content
+        * @return string
+        */
+       static public function stripHttpHeaders($content) {
+               return parent::stripHttpHeaders($content);
+       }
+}
index 2e2e384..041f673 100644 (file)
@@ -4538,9 +4538,48 @@ text with a ' . $urlMatch . '$|s'),
         * @param string $input Text to recognise URLs from
         * @param string $expected Text with correctly detected URLs
         */
-       public function substUrlsInPlainText($input, $expectedPreg) {
+       public function substUrlsInPlainText($input, $expected) {
                $GLOBALS['TYPO3_DB'] = $this->getMock('TYPO3\\CMS\\Core\\Database\\DatabaseConnection', array(), array(), '', FALSE);
-               $this->assertTrue(preg_match($expectedPreg, Utility\GeneralUtility::substUrlsInPlainText($input, 1, 'http://example.com/index.php')) == 1);
+               $this->assertTrue(preg_match($expected, Utility\GeneralUtility::substUrlsInPlainText($input, 1, 'http://example.com/index.php')) == 1);
        }
 
+       /**
+        * @return array
+        */
+       public function getRedirectUrlFromHttpHeadersDataProvider() {
+               return array(
+                       'Extracts redirect URL from Location header' => array("HTTP/1.0 302 Redirect\r\nServer: Apache\r\nLocation: http://example.com/\r\nX-pad: avoid browser bug\r\n\r\nLocation: test\r\n", 'http://example.com/'),
+                       'Returns empty string if no Location is found in header' => array("HTTP/1.0 302 Redirect\r\nServer: Apache\r\nX-pad: avoid browser bug\r\n\r\nLocation: test\r\n", ''),
+               );
+       }
+
+       /**
+        * @param string $httpResponse
+        * @param string $expected
+        * @test
+        * @dataProvider getRedirectUrlFromHttpHeadersDataProvider
+        */
+       public function getRedirectUrlReturnsRedirectUrlFromHttpResponse($httpResponse, $expected) {
+               $this->assertEquals($expected, GeneralUtilityFixture::getRedirectUrlFromHttpHeaders($httpResponse));
+       }
+
+       /**
+        * @return array
+        */
+       public function getStripHttpHeadersDataProvider() {
+               return array(
+                       'Simple content' => array("HTTP/1.0 302 Redirect\r\nServer: Apache\r\nX-pad: avoid browser bug\r\n\r\nHello, world!", 'Hello, world!'),
+                       'Content with multiple returns' => array("HTTP/1.0 302 Redirect\r\nServer: Apache\r\nX-pad: avoid browser bug\r\n\r\nHello, world!\r\n\r\nAnother hello here!", "Hello, world!\r\n\r\nAnother hello here!"),
+               );
+       }
+
+       /**
+        * @param string $httpResponse
+        * @param string $expected
+        * @test
+        * @dataProvider getStripHttpHeadersDataProvider
+        */
+       public function stripHttpHeadersStripsHeadersFromHttpResponse($httpResponse, $expected) {
+               $this->assertEquals($expected, GeneralUtilityFixture::stripHttpHeaders($httpResponse));
+       }
 }