[BUGFIX] Better handling of requests in ExternalLinktype 56/57356/2
authorSybille Peters <sypets@gmx.de>
Fri, 18 May 2018 18:03:07 +0000 (20:03 +0200)
committerStefan Neufeind <typo3.neufeind@speedpartner.de>
Sun, 24 Jun 2018 22:06:13 +0000 (00:06 +0200)
- Always check for existing response
- If HEAD request fails, a GET request should always be triggered
- Restructured, moved some local variables to class variables
- In case of redirect loop, output exception message instead of
  location and status code

Resolves: #83611
Resolves: #85067
Releases: master, 8.7
Change-Id: I1cf6ef4e3dbaa5fbc683affc7cf96a0dbeea75cd
Reviewed-on: https://review.typo3.org/57356
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Stefan Neufeind <typo3.neufeind@speedpartner.de>
Tested-by: Stefan Neufeind <typo3.neufeind@speedpartner.de>
typo3/sysext/linkvalidator/Classes/Linktype/ExternalLinktype.php

index 7f93fd4..f4a691f 100644 (file)
@@ -15,7 +15,6 @@ namespace TYPO3\CMS\Linkvalidator\Linktype;
  */
 
 use GuzzleHttp\Cookie\CookieJar;
-use GuzzleHttp\Exception\TooManyRedirectsException;
 use Mso\IdnaConvert\IdnaConvert;
 use TYPO3\CMS\Core\Http\RequestFactory;
 use TYPO3\CMS\Core\Utility\GeneralUtility;
@@ -47,69 +46,98 @@ class ExternalLinktype extends AbstractLinktype
     protected $additionalHeaders = [];
 
     /**
+     * @var RequestFactory
+     */
+    protected $requestFactory;
+
+    /**
+     * @var array $this->errorParams
+     */
+    protected $errorParams = [];
+
+    public function __construct(RequestFactory $requestFactory = null)
+    {
+        $this->requestFactory = $requestFactory ?: GeneralUtility::makeInstance(RequestFactory::class);
+    }
+
+    /**
      * Checks a given URL for validity
      *
-     * @param string $url The URL to check
+     * @param string $origUrl The URL to check
      * @param array $softRefEntry The soft reference entry which builds the context of that URL
      * @param \TYPO3\CMS\Linkvalidator\LinkAnalyzer $reference Parent instance
      * @return bool TRUE on success or FALSE on error
+     * @throws \InvalidArgumentException
      */
-    public function checkLink($url, $softRefEntry, $reference)
+    public function checkLink($origUrl, $softRefEntry, $reference)
     {
-        $errorParams = [];
-        $isValidUrl = true;
-        if (isset($this->urlReports[$url])) {
-            if (!$this->urlReports[$url]) {
-                if (is_array($this->urlErrorParams[$url])) {
-                    $this->setErrorParams($this->urlErrorParams[$url]);
-                }
-            }
-            return $this->urlReports[$url];
+        // use URL from cache, if available
+        if (isset($this->urlReports[$origUrl])) {
+            $this->setErrorParams($this->urlErrorParams[$origUrl]);
+            return $this->urlReports[$origUrl];
         }
         $options = [
             'cookies' => GeneralUtility::makeInstance(CookieJar::class),
             'allow_redirects' => ['strict' => true]
         ];
-
-        $requestFactory = GeneralUtility::makeInstance(RequestFactory::class);
-        try {
-            $url = $this->preprocessUrl($url);
-            $response = $requestFactory->request($url, 'HEAD', $options);
-            // HEAD was not allowed or threw an error, now trying GET
-            if ($response->getStatusCode() >= 400) {
+        $url = $this->preprocessUrl($origUrl);
+        if (!empty($url)) {
+            $isValidUrl = $this->requestUrl($url, 'HEAD', $options);
+            if (!$isValidUrl) {
+                // HEAD was not allowed or threw an error, now trying GET
                 $options['headers']['Range'] = 'bytes = 0 - 4048';
-                $response = $requestFactory->request($url, 'GET', $options);
+                $isValidUrl = $this->requestUrl($url, 'GET', $options);
             }
-            if ($response->getStatusCode() >= 300) {
-                $isValidUrl = false;
-                $errorParams['errorType'] = $response->getStatusCode();
-                $errorParams['message'] = $this->getErrorMessage($errorParams);
+        }
+        $this->urlReports[$origUrl] = $isValidUrl;
+        $this->urlErrorParams[$origUrl] = $this->errorParams;
+        return $isValidUrl;
+    }
+
+    /**
+     * Check URL using the specified request methods
+     *
+     * @param string $url
+     * @param string $method
+     * @param array $options
+     * @return bool
+     */
+    protected function requestUrl(string $url, string $method, array $options): bool
+    {
+        $this->errorParams = [];
+        $isValidUrl = false;
+        try {
+            $response = $this->requestFactory->request($url, $method, $options);
+            if ($response->getStatusCode() < 300) {
+                $isValidUrl = true;
+            } else {
+                $this->errorParams['errorType'] = $response->getStatusCode();
+                $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
             }
-        } catch (TooManyRedirectsException $e) {
-            $lastRequest = $e->getRequest();
-            $response = $e->getResponse();
-            $errorParams['errorType'] = 'loop';
-            $errorParams['location'] = (string)$lastRequest->getUri();
-            $errorParams['errorCode'] = $response->getStatusCode();
+            $isValidUrl = true;
+        } catch (\GuzzleHttp\Exception\TooManyRedirectsException $e) {
+            // redirect loop or too many redirects
+            // todo: change errorType to 'redirect' (breaking change)
+            $this->errorParams['errorType'] = 'loop';
+            $this->errorParams['exception'] = $e->getMessage();
+            $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
         } catch (\GuzzleHttp\Exception\ClientException $e) {
-            $isValidUrl = false;
-            $errorParams['errorType'] = $e->getResponse()->getStatusCode();
-            $errorParams['message'] = $this->getErrorMessage($errorParams);
+            if ($e->hasResponse()) {
+                $this->errorParams['errorType'] = $e->getResponse()->getStatusCode();
+            } else {
+                $this->errorParams['errorType'] = 'unknown';
+            }
+            $this->errorParams['exception'] = $e->getMessage();
+            $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
         } catch (\GuzzleHttp\Exception\RequestException $e) {
-            $isValidUrl = false;
-            $errorParams['errorType'] = 'network';
-            $errorParams['message'] = $this->getErrorMessage($errorParams);
+            $this->errorParams['errorType'] = 'network';
+            $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
         } catch (\Exception $e) {
             // Generic catch for anything else that may go wrong
-            $isValidUrl = false;
-            $errorParams['errorType'] = 'exception';
-            $errorParams['message'] = $e->getMessage();
-        }
-        if (!$isValidUrl) {
-            $this->setErrorParams($errorParams);
+            $this->errorParams['errorType'] = 'exception';
+            $this->errorParams['exception'] = $e->getMessage();
+            $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
         }
-        $this->urlReports[$url] = $isValidUrl;
-        $this->urlErrorParams[$url] = $errorParams;
         return $isValidUrl;
     }
 
@@ -125,30 +153,34 @@ class ExternalLinktype extends AbstractLinktype
         $errorType = $errorParams['errorType'];
         switch ($errorType) {
             case 300:
-                $response = sprintf($lang->getLL('list.report.externalerror'), $errorType);
+                $message = sprintf($lang->getLL('list.report.externalerror'), $errorType);
                 break;
             case 403:
-                $response = $lang->getLL('list.report.pageforbidden403');
+                $message = $lang->getLL('list.report.pageforbidden403');
                 break;
             case 404:
-                $response = $lang->getLL('list.report.pagenotfound404');
+                $message = $lang->getLL('list.report.pagenotfound404');
                 break;
             case 500:
-                $response = $lang->getLL('list.report.internalerror500');
+                $message = $lang->getLL('list.report.internalerror500');
                 break;
             case 'loop':
-                $response = sprintf($lang->getLL('list.report.redirectloop'), $errorParams['errorCode'], $errorParams['location']);
+                $message = sprintf(
+                    $lang->getLL('list.report.redirectloop'),
+                    $errorParams['exception'],
+                    ''
+                );
                 break;
             case 'exception':
-                $response = sprintf($lang->getLL('list.report.httpexception'), $errorParams['message']);
+                $message = sprintf($lang->getLL('list.report.httpexception'), $errorParams['exception']);
                 break;
             case 'network':
-                $response = $lang->getLL('list.report.networkexception');
+                $message = $lang->getLL('list.report.networkexception');
                 break;
             default:
-                $response = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, $errorParams['message']);
+                $message = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, $errorParams['exception']);
         }
-        return $response;
+        return $message;
     }
 
     /**
@@ -176,6 +208,14 @@ class ExternalLinktype extends AbstractLinktype
      */
     protected function preprocessUrl(string $url): string
     {
-        return (new IdnaConvert())->encode($url);
+        try {
+            return (new IdnaConvert())->encode($url);
+        } catch (\Exception $e) {
+            // in case of any error, return empty url.
+            $this->errorParams['errorType'] = 'exception';
+            $this->errorParams['exception'] = $e->getMessage();
+            $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
+            return '';
+        }
     }
 }