7f93fd435e4b6454aa5cafbae12b7b0605ead96e
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / Linktype / ExternalLinktype.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator\Linktype;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use GuzzleHttp\Cookie\CookieJar;
18 use GuzzleHttp\Exception\TooManyRedirectsException;
19 use Mso\IdnaConvert\IdnaConvert;
20 use TYPO3\CMS\Core\Http\RequestFactory;
21 use TYPO3\CMS\Core\Utility\GeneralUtility;
22
23 /**
24 * This class provides Check External Links plugin implementation
25 */
26 class ExternalLinktype extends AbstractLinktype
27 {
28 /**
29 * Cached list of the URLs, which were already checked for the current processing
30 *
31 * @var array $urlReports
32 */
33 protected $urlReports = [];
34
35 /**
36 * Cached list of all error parameters of the URLs, which were already checked for the current processing
37 *
38 * @var array $urlErrorParams
39 */
40 protected $urlErrorParams = [];
41
42 /**
43 * List of headers to be used for matching an URL for the current processing
44 *
45 * @var array $additionalHeaders
46 */
47 protected $additionalHeaders = [];
48
49 /**
50 * Checks a given URL for validity
51 *
52 * @param string $url The URL to check
53 * @param array $softRefEntry The soft reference entry which builds the context of that URL
54 * @param \TYPO3\CMS\Linkvalidator\LinkAnalyzer $reference Parent instance
55 * @return bool TRUE on success or FALSE on error
56 */
57 public function checkLink($url, $softRefEntry, $reference)
58 {
59 $errorParams = [];
60 $isValidUrl = true;
61 if (isset($this->urlReports[$url])) {
62 if (!$this->urlReports[$url]) {
63 if (is_array($this->urlErrorParams[$url])) {
64 $this->setErrorParams($this->urlErrorParams[$url]);
65 }
66 }
67 return $this->urlReports[$url];
68 }
69 $options = [
70 'cookies' => GeneralUtility::makeInstance(CookieJar::class),
71 'allow_redirects' => ['strict' => true]
72 ];
73
74 $requestFactory = GeneralUtility::makeInstance(RequestFactory::class);
75 try {
76 $url = $this->preprocessUrl($url);
77 $response = $requestFactory->request($url, 'HEAD', $options);
78 // HEAD was not allowed or threw an error, now trying GET
79 if ($response->getStatusCode() >= 400) {
80 $options['headers']['Range'] = 'bytes = 0 - 4048';
81 $response = $requestFactory->request($url, 'GET', $options);
82 }
83 if ($response->getStatusCode() >= 300) {
84 $isValidUrl = false;
85 $errorParams['errorType'] = $response->getStatusCode();
86 $errorParams['message'] = $this->getErrorMessage($errorParams);
87 }
88 } catch (TooManyRedirectsException $e) {
89 $lastRequest = $e->getRequest();
90 $response = $e->getResponse();
91 $errorParams['errorType'] = 'loop';
92 $errorParams['location'] = (string)$lastRequest->getUri();
93 $errorParams['errorCode'] = $response->getStatusCode();
94 } catch (\GuzzleHttp\Exception\ClientException $e) {
95 $isValidUrl = false;
96 $errorParams['errorType'] = $e->getResponse()->getStatusCode();
97 $errorParams['message'] = $this->getErrorMessage($errorParams);
98 } catch (\GuzzleHttp\Exception\RequestException $e) {
99 $isValidUrl = false;
100 $errorParams['errorType'] = 'network';
101 $errorParams['message'] = $this->getErrorMessage($errorParams);
102 } catch (\Exception $e) {
103 // Generic catch for anything else that may go wrong
104 $isValidUrl = false;
105 $errorParams['errorType'] = 'exception';
106 $errorParams['message'] = $e->getMessage();
107 }
108 if (!$isValidUrl) {
109 $this->setErrorParams($errorParams);
110 }
111 $this->urlReports[$url] = $isValidUrl;
112 $this->urlErrorParams[$url] = $errorParams;
113 return $isValidUrl;
114 }
115
116 /**
117 * Generate the localized error message from the error params saved from the parsing
118 *
119 * @param array $errorParams All parameters needed for the rendering of the error message
120 * @return string Validation error message
121 */
122 public function getErrorMessage($errorParams)
123 {
124 $lang = $this->getLanguageService();
125 $errorType = $errorParams['errorType'];
126 switch ($errorType) {
127 case 300:
128 $response = sprintf($lang->getLL('list.report.externalerror'), $errorType);
129 break;
130 case 403:
131 $response = $lang->getLL('list.report.pageforbidden403');
132 break;
133 case 404:
134 $response = $lang->getLL('list.report.pagenotfound404');
135 break;
136 case 500:
137 $response = $lang->getLL('list.report.internalerror500');
138 break;
139 case 'loop':
140 $response = sprintf($lang->getLL('list.report.redirectloop'), $errorParams['errorCode'], $errorParams['location']);
141 break;
142 case 'exception':
143 $response = sprintf($lang->getLL('list.report.httpexception'), $errorParams['message']);
144 break;
145 case 'network':
146 $response = $lang->getLL('list.report.networkexception');
147 break;
148 default:
149 $response = sprintf($lang->getLL('list.report.otherhttpcode'), $errorType, $errorParams['message']);
150 }
151 return $response;
152 }
153
154 /**
155 * Get the external type from the softRefParserObj result
156 *
157 * @param array $value Reference properties
158 * @param string $type Current type
159 * @param string $key Validator hook name
160 * @return string Fetched type
161 */
162 public function fetchType($value, $type, $key)
163 {
164 preg_match_all('/((?:http|https))(?::\\/\\/)(?:[^\\s<>]+)/i', $value['tokenValue'], $urls, PREG_PATTERN_ORDER);
165 if (!empty($urls[0][0])) {
166 $type = 'external';
167 }
168 return $type;
169 }
170
171 /**
172 * Convert given URL to punycode to handle domains with non-ASCII characters
173 *
174 * @param string $url
175 * @return string
176 */
177 protected function preprocessUrl(string $url): string
178 {
179 return (new IdnaConvert())->encode($url);
180 }
181 }