[TASK] Refactor to t3lib_http_Request for external URLs
authorPhilipp Gampe <forge.typo3.org@philippgampe.info>
Wed, 10 Aug 2011 16:46:23 +0000 (18:46 +0200)
committerPhilipp Gampe <forge.typo3.org@philippgampe.info>
Thu, 11 Aug 2011 17:02:02 +0000 (19:02 +0200)
Refactor external URL checker by using t3lib_http-Request.

This solves all outstanding issues with externel URL
checker.
Change-Id: Ib5d70f4c18fce2617dd9b4d0c675468c5c9558ed
Resolves: #28915
Resolves: #13849
Resolves: #13800
Fixes: #12225
Fixes: #13756
Fixes: #13547
Fixes: #13802
Fixes: #13847

typo3/sysext/linkvalidator/classes/linktype/class.tx_linkvalidator_linktype_external.php
typo3/sysext/linkvalidator/modfuncreport/locallang.xlf

index acd22dd..c351d61 100644 (file)
@@ -27,6 +27,7 @@
  *
  * @author Dimitri K├Ânig <dk@cabag.ch>
  * @author Michael Miousse <michael.miousse@infoglobe.ca>
+ * @author Philipp Gampe <typo3.dev@philippgampe.info>
  * @package TYPO3
  * @subpackage linkvalidator
  */
@@ -35,37 +36,36 @@ class tx_linkvalidator_linktype_External extends tx_linkvalidator_linktype_Abstr
        /**
         * Cached list of the URLs, which were already checked for the current processing.
         *
-        * @var array
+        * @var array $urlReports
         */
        protected $urlReports = array();
 
        /**
         * Cached list of all error parameters of the URLs, which were already checked for the current processing.
         *
-        * @var array
+        * @var array $urlErrorParams
         */
        protected $urlErrorParams = array();
 
        /**
         * List of headers to be used for metching an URL for the current processing
         *
-        * @var array
+        * @var array $additionalHeaders
         */
        protected $additionalHeaders = array();
 
 
        /**
-        * Checks a given URL + /path/filename.ext for validity
+        * Checks a given URL for validity
         *
-        * @param       string          $url: url to check
-        * @param        array     $softRefEntry: the softref entry which builds the context of that url
-        * @param       object          $reference:  parent instance of tx_linkvalidator_Processor
-        * @return      string          TRUE on success or FALSE on error
+        * @param string $url The URL to check
+        * @param array $softRefEntry The softref entry which builds the context of that URL
+        * @param tx_linkvalidator_Processor $reference Parent instance of tx_linkvalidator_Processor
+        * @return boolean TRUE on success or FALSE on error
         */
-       public function checkLink($url, $softRefEntry, $reference) {
+       public function checkLink($url, array $softRefEntry, tx_linkvalidator_Processor $reference) {
                $errorParams = array();
-               $report = array();
-               $additionalHeaders['User-Agent'] = 'User-Agent: Mozilla/5.0 TYPO3-linkvalidator';
+               $isValidUrl = TRUE;
 
                if (isset($this->urlReports[$url])) {
                        if(!$this->urlReports[$url]) {
@@ -76,108 +76,66 @@ class tx_linkvalidator_linktype_External extends tx_linkvalidator_linktype_Abstr
                        return $this->urlReports[$url];
                }
 
-                       // remove possible anchor from the url
-               if (strrpos($url, '#') !== FALSE) {
-                       $url = substr($url, 0, strrpos($url, '#'));
-               }
-
-                       // try to fetch the content of the URL
-               $content = t3lib_div::getURL($url, 1, $additionalHeaders, $report);
-
-               $tries = 0;
-               $lastUrl = $url;
-               while (($report['http_code'] == 301 || $report['http_code'] == 302
-                               || $report['http_code'] == 303 || $report['http_code'] == 307)
-                          && ($tries < 5)) {
-
-                               // split header into lines and find Location:
-                       $responseHeaders = t3lib_div::trimExplode(chr(10), $content, TRUE);
-                       foreach ($responseHeaders as $line) {
-                                       // construct new URL
-                               if ((preg_match('/Location: ([^\r\n]+)/', $line, $location))) {
-                                       if (isset($location[1])) {
-                                               $parsedUrl = parse_url($location[1]);
-                                               if (!isset($parsedUrl['host'])) {
-                                                               // the location did not contain a complete URI, build it!
-                                                       $parsedUrl = parse_url($lastUrl);
-                                                       $newUrl = $parsedUrl['scheme'] . '://' . (isset($parsedUrl['user']) ?
-                                                               $parsedUrl['user'] . (isset($parsedUrl['pass']) ? ':' . $parsedUrl['pass'] : '')
-                                                               : '') . $parsedUrl['host'] . (
-                                                       isset($parsedUrl['port']) ? ':' . $parsedUrl['port'] : '') . $location[1];
-                                               } else {
-                                                       $newUrl = $location[1];
-                                               }
-
-                                               if ($lastUrl === $newUrl) {
-                                                       break 2;
-                                               }
-                                       } else {
-                                               break 2;
-                                       }
-                               }
+               $config = array(
+                       'follow_redirects' => TRUE,
+                       'strict_redirects' => TRUE,
+               );
+
+                       /** @var t3lib_http_Request|HTTP_Request2 $request  */
+               $request = t3lib_div::makeInstance('t3lib_http_Request', $url, 'HEAD', $config);
+                       // Observe cookies
+               $request->setCookieJar(TRUE);
+
+               try {
+                               /** @var HTTP_Request2_Response $response  */
+                       $response = $request->send();
+                               // HEAD was not allowed, now trying GET
+                       if (isset($response) && $response->getStatus() === 405) {
+                               $request->setMethod('GET');
+                               $request->setHeader('Range', 'bytes = 0 - 4048');
+                                       /** @var HTTP_Request2_Response $response  */
+                               $response = $request->send();
                        }
-
-                               // now try to fetch again
-                       $content = t3lib_div::getURL($newUrl, 1, $additionalHeaders, $report);
-                       $lastUrl = $newUrl;
-                       $tries++;
                }
-
-
-               $response = TRUE;
-
-                       // analyze the response
-               if ($report['error']) {
-                               // More cURL error codes can be found here:
-                               // http://curl.haxx.se/libcurl/c/libcurl-errors.html
-                       if ($report['lib'] === 'cURL' && $report['error'] === 28) {
-                               $errorParams['errorType'] = 'cURL28';
-                       } elseif ($report['lib'] === 'cURL' && $report['error'] === 22) {
-                               if (strstr($report['message'], '404')) {
-                                       $errorParams['errorType'] = 404;
-                               } elseif(strstr($report['message'], '403')) {
-                                       $errorParams['errorType'] = 403;
-                               } elseif(strstr($report['message'], '500')) {
-                                       $errorParams['errorType'] = 500;
-                               }
-                       } elseif ($report['lib'] === 'cURL' && $report['error'] === 6) {
-                               $errorParams['errorType'] = 'cURL6';
-                       } elseif ($report['lib'] === 'cURL' && $report['error'] === 56) {
-                               $errorParams['errorType'] = 'cURL56';
+               catch (Exception $e) {
+                       $isValidUrl = FALSE;
+                               // we hit a redirect loop
+                       if ($e->getCode() === 40) {
+                                       // Parse the exception for more information
+                               $trace = $e->getTrace();
+                               $traceUrl = $trace[0]['args'][0]->getUrl()->getUrl();
+                               $traceCode = $trace[0]['args'][1]->getStatus();
+
+                               $errorParams['errorType'] = 'loop';
+                               $errorParams['location'] = $traceUrl;
+                               $errorParams['errorCode'] = $traceCode;
+                       } else {
+                               $errorParams['errorType'] = 'exception';
                        }
-
-                       $response = FALSE;
+                       $errorParams['message'] = $e->getMessage();
                }
 
-
-                       // special handling for more information
-               if (($report['http_code'] == 301) || ($report['http_code'] == 302)
-                       || ($report['http_code'] == 303) || ($report['http_code'] == 307)) {
-                               $errorParams['errorType'] = $report['http_code'];
-                               $errorParams['location'] = $lastUrl;
-                               $response = FALSE;
-               }
-
-               if ($report['http_code'] >= 300 && $response) {
-                       $errorParams['errorType'] = $report['http_code'];
-                       $response = FALSE;
+               if (isset($response) && $response->getStatus() >= 300) {
+                       $isValidUrl = FALSE;
+                       $errorParams['errorType'] = $response->getStatus();
+                       $errorParams['message'] = $response->getReasonPhrase();
                }
 
-               if(!$response) {
+               if(!$isValidUrl) {
                        $this->setErrorParams($errorParams);
                }
 
-               $this->urlReports[$url] = $response;
+               $this->urlReports[$url] = $isValidUrl;
                $this->urlErrorParams[$url] = $errorParams;
 
-               return $response;
+               return $isValidUrl;
        }
 
        /**
         * Generate the localized error message from the error params saved from the parsing.
         *
-        * @param   array    all parameters needed for the rendering of the error message
-        * @return  string    validation error message
+        * @param array $errorParams All parameters needed for the rendering of the error message
+        * @return string Validation error message
         */
        public function getErrorMessage($errorParams) {
                $errorType = $errorParams['errorType'];
@@ -186,13 +144,6 @@ class tx_linkvalidator_linktype_External extends tx_linkvalidator_linktype_Abstr
                                $response = sprintf($GLOBALS['LANG']->getLL('list.report.externalerror'), $errorType);
                                break;
 
-                       case 301:
-                       case 302:
-                       case 303:
-                       case 307:
-                               $response = sprintf($GLOBALS['LANG']->getLL('list.report.redirectloop'), $errorType, $errorParams['location']);
-                               break;
-
                        case 403:
                                $response = $GLOBALS['LANG']->getLL('list.report.pageforbidden403');
                                break;
@@ -205,35 +156,31 @@ class tx_linkvalidator_linktype_External extends tx_linkvalidator_linktype_Abstr
                                $response = $GLOBALS['LANG']->getLL('list.report.internalerror500');
                                break;
 
-                       case 'cURL6':
-                               $response = $GLOBALS['LANG']->getLL('list.report.couldnotresolvehost');
-                               break;
-
-                       case 'cURL28':
-                               $response = $GLOBALS['LANG']->getLL('list.report.timeout');
+                       case 'loop':
+                               $response = sprintf($GLOBALS['LANG']->getLL('list.report.redirectloop'), $errorParams['errorCode'], $errorParams['location']);
                                break;
 
-                       case 'cURL56':
-                               $response = $GLOBALS['LANG']->getLL('list.report.errornetworkdata');
+                       case 'exception':
+                               $response = sprintf($GLOBALS['LANG']->getLL('list.report.httpexception'), $errorParams['message']);
                                break;
 
                        default:
-                               $response = $GLOBALS['LANG']->getLL('list.report.noresponse');
+                               $response = sprintf($GLOBALS['LANG']->getLL('list.report.otherhttpcode'), $errorType, $errorParams['message']);
                }
 
                return $response;
        }
 
        /**
-        * get the external type from the softRefParserObj result.
+        * Get the external type from the softRefParserObj result.
         *
-        * @param   array         $value: reference properties
-        * @param   string       $type: current type
-        * @param   string       $key: validator hook name
-        * @return  string       fetched type
+        * @param array $value Reference properties
+        * @param string $type Current type
+        * @param string $key Validator hook name
+        * @return string Fetched type
         */
-       public function fetchType($value, $type, $key) {
-               preg_match_all('/((?:http|https|ftp|ftps))(?::\/\/)(?:[^\s<>]+)/i', $value['tokenValue'], $urls, PREG_PATTERN_ORDER);
+       public function fetchType(array $value, $type, $key) {
+               preg_match_all('/((?:http|https))(?::\/\/)(?:[^\s<>]+)/i', $value['tokenValue'], $urls, PREG_PATTERN_ORDER);
 
                if (!empty($urls[0][0])) {
                        $type = "external";
@@ -248,4 +195,4 @@ if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLA
        include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/linkvalidator/classes/linktypes/class.tx_linkvalidator_linktypes_external.php']);
 }
 
-?>
+?>
\ No newline at end of file
index 84fcd87..108f5b3 100644 (file)
                                <source>Failure with receiving network data.</source>
                                <target approved="yes">Failure with receiving network data.</target>
                        </trans-unit>
+                       <trans-unit id="list.report.otherhttpcode">
+                               <source>An error occurred (%s): "%s".</source>
+                               <target approved="yes">An error occurred (%s): "%s".</target>
+                       </trans-unit>
+                       <trans-unit id="list.report.httpexception">
+                               <source>Exception: %s</source>
+                               <target approved="yes">Exception: %s</target>
+                       </trans-unit>
                        <trans-unit id="list.msg.ok">
                                <source>Ok</source>
                                <target approved="yes">Ok</target>