[BUGFIX] Better handling of requests in ExternalLinktype
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / Linktype / ExternalLinktype.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator\Linktype;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use GuzzleHttp\Cookie\CookieJar;
18 use Mso\IdnaConvert\IdnaConvert;
19 use TYPO3\CMS\Core\Http\RequestFactory;
20 use TYPO3\CMS\Core\Utility\GeneralUtility;
21
22 /**
23 * This class provides Check External Links plugin implementation
24 */
25 class ExternalLinktype extends AbstractLinktype
26 {
27 /**
28 * Cached list of the URLs, which were already checked for the current processing
29 *
30 * @var array $urlReports
31 */
32 protected $urlReports = [];
33
34 /**
35 * Cached list of all error parameters of the URLs, which were already checked for the current processing
36 *
37 * @var array $urlErrorParams
38 */
39 protected $urlErrorParams = [];
40
41 /**
42 * List of headers to be used for matching an URL for the current processing
43 *
44 * @var array $additionalHeaders
45 */
46 protected $additionalHeaders = [];
47
48 /**
49 * @var RequestFactory
50 */
51 protected $requestFactory;
52
53 /**
54 * @var array $this->errorParams
55 */
56 protected $errorParams = [];
57
58 public function __construct(RequestFactory $requestFactory = null)
59 {
60 $this->requestFactory = $requestFactory ?: GeneralUtility::makeInstance(RequestFactory::class);
61 }
62
63 /**
64 * Checks a given URL for validity
65 *
66 * @param string $origUrl The URL to check
67 * @param array $softRefEntry The soft reference entry which builds the context of that URL
68 * @param \TYPO3\CMS\Linkvalidator\LinkAnalyzer $reference Parent instance
69 * @return bool TRUE on success or FALSE on error
70 * @throws \InvalidArgumentException
71 */
72 public function checkLink($origUrl, $softRefEntry, $reference)
73 {
74 // use URL from cache, if available
75 if (isset($this->urlReports[$origUrl])) {
76 $this->setErrorParams($this->urlErrorParams[$origUrl]);
77 return $this->urlReports[$origUrl];
78 }
79 $options = [
80 'cookies' => GeneralUtility::makeInstance(CookieJar::class),
81 'allow_redirects' => ['strict' => true]
82 ];
83 $url = $this->preprocessUrl($origUrl);
84 if (!empty($url)) {
85 $isValidUrl = $this->requestUrl($url, 'HEAD', $options);
86 if (!$isValidUrl) {
87 // HEAD was not allowed or threw an error, now trying GET
88 $options['headers']['Range'] = 'bytes = 0 - 4048';
89 $isValidUrl = $this->requestUrl($url, 'GET', $options);
90 }
91 }
92 $this->urlReports[$origUrl] = $isValidUrl;
93 $this->urlErrorParams[$origUrl] = $this->errorParams;
94 return $isValidUrl;
95 }
96
97 /**
98 * Check URL using the specified request methods
99 *
100 * @param string $url
101 * @param string $method
102 * @param array $options
103 * @return bool
104 */
105 protected function requestUrl(string $url, string $method, array $options): bool
106 {
107 $this->errorParams = [];
108 $isValidUrl = false;
109 try {
110 $response = $this->requestFactory->request($url, $method, $options);
111 if ($response->getStatusCode() < 300) {
112 $isValidUrl = true;
113 } else {
114 $this->errorParams['errorType'] = $response->getStatusCode();
115 $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
116 }
117 $isValidUrl = true;
118 } catch (\GuzzleHttp\Exception\TooManyRedirectsException $e) {
119 // redirect loop or too many redirects
120 // todo: change errorType to 'redirect' (breaking change)
121 $this->errorParams['errorType'] = 'loop';
122 $this->errorParams['exception'] = $e->getMessage();
123 $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
124 } catch (\GuzzleHttp\Exception\ClientException $e) {
125 if ($e->hasResponse()) {
126 $this->errorParams['errorType'] = $e->getResponse()->getStatusCode();
127 } else {
128 $this->errorParams['errorType'] = 'unknown';
129 }
130 $this->errorParams['exception'] = $e->getMessage();
131 $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
132 } catch (\GuzzleHttp\Exception\RequestException $e) {
133 $this->errorParams['errorType'] = 'network';
134 $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
135 } catch (\Exception $e) {
136 // Generic catch for anything else that may go wrong
137 $this->errorParams['errorType'] = 'exception';
138 $this->errorParams['exception'] = $e->getMessage();
139 $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
140 }
141 return $isValidUrl;
142 }
143
144 /**
145 * Generate the localized error message from the error params saved from the parsing
146 *
147 * @param array $errorParams All parameters needed for the rendering of the error message
148 * @return string Validation error message
149 */
150 public function getErrorMessage($errorParams)
151 {
152 $lang = $this->getLanguageService();
153 $errorType = $errorParams['errorType'];
154 switch ($errorType) {
155 case 300:
156 $message = sprintf($lang->getLL('list.report.externalerror'), $errorType);
157 break;
158 case 403:
159 $message = $lang->getLL('list.report.pageforbidden403');
160 break;
161 case 404:
162 $message = $lang->getLL('list.report.pagenotfound404');
163 break;
164 case 500:
165 $message = $lang->getLL('list.report.internalerror500');
166 break;
167 case 'loop':
168 $message = sprintf(
169 $lang->getLL('list.report.redirectloop'),
170 $errorParams['exception'],
171 ''
172 );
173 break;
174 case 'exception':
175 $message = sprintf($lang->getLL('list.report.httpexception'), $errorParams['exception']);
176 break;
177 case 'network':
178 $message = $lang->getLL('list.report.networkexception');
179 break;
180 default:
181 $message = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, $errorParams['exception']);
182 }
183 return $message;
184 }
185
186 /**
187 * Get the external type from the softRefParserObj result
188 *
189 * @param array $value Reference properties
190 * @param string $type Current type
191 * @param string $key Validator hook name
192 * @return string Fetched type
193 */
194 public function fetchType($value, $type, $key)
195 {
196 preg_match_all('/((?:http|https))(?::\\/\\/)(?:[^\\s<>]+)/i', $value['tokenValue'], $urls, PREG_PATTERN_ORDER);
197 if (!empty($urls[0][0])) {
198 $type = 'external';
199 }
200 return $type;
201 }
202
203 /**
204 * Convert given URL to punycode to handle domains with non-ASCII characters
205 *
206 * @param string $url
207 * @return string
208 */
209 protected function preprocessUrl(string $url): string
210 {
211 try {
212 return (new IdnaConvert())->encode($url);
213 } catch (\Exception $e) {
214 // in case of any error, return empty url.
215 $this->errorParams['errorType'] = 'exception';
216 $this->errorParams['exception'] = $e->getMessage();
217 $this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
218 return '';
219 }
220 }
221 }