[TASK] Centralize sys_domain resolving
[Packages/TYPO3.CMS.git] / typo3 / sysext / frontend / Classes / Compatibility / LegacyDomainResolver.php
1 <?php
2 declare(strict_types = 1);
3
4 namespace TYPO3\CMS\Frontend\Compatibility;
5
6 /*
7 * This file is part of the TYPO3 CMS project.
8 *
9 * It is free software; you can redistribute it and/or modify it under
10 * the terms of the GNU General Public License, either version 2
11 * of the License, or any later version.
12 *
13 * For the full copyright and license information, please read the
14 * LICENSE.txt file that was distributed with this source code.
15 *
16 * The TYPO3 project - inspiring people to share!
17 */
18
19 use Psr\Http\Message\ServerRequestInterface;
20 use Symfony\Component\Routing\Exception\NoConfigurationException;
21 use Symfony\Component\Routing\Matcher\UrlMatcher;
22 use Symfony\Component\Routing\RequestContext;
23 use Symfony\Component\Routing\Route;
24 use Symfony\Component\Routing\RouteCollection;
25 use TYPO3\CMS\Backend\Routing\Exception\ResourceNotFoundException;
26 use TYPO3\CMS\Core\Cache\CacheManager;
27 use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface;
28 use TYPO3\CMS\Core\Database\ConnectionPool;
29 use TYPO3\CMS\Core\Exception\Page\RootLineException;
30 use TYPO3\CMS\Core\Http\NormalizedParams;
31 use TYPO3\CMS\Core\SingletonInterface;
32 use TYPO3\CMS\Core\Utility\GeneralUtility;
33 use TYPO3\CMS\Core\Utility\RootlineUtility;
34
35 /**
36 * Resolves sys_domain entries when a Request object is given,
37 * or a pageId is given or a rootpage Id is given (= if there is a sys_domain record on that specific page).
38 * Always keeps the sorting in line.
39 *
40 * @todo: would be nice to flush caches if sys_domain has been touched in DataHandler
41 * @internal as this should ideally be wrapped inside the "main" site router in the future.
42 */
43 class LegacyDomainResolver implements SingletonInterface
44 {
45 /**
46 * Runtime cache of domains per processed page ids.
47 *
48 * @var array
49 */
50 protected $domainDataCache = [];
51
52 /**
53 * @var FrontendInterface
54 */
55 protected $cache;
56
57 /**
58 * Whether a sys_domain like example.com should also match for my.blog.example.com
59 *
60 * @var bool
61 */
62 protected $recursiveDomainSearch;
63
64 /**
65 * @var RouteCollection
66 */
67 protected $routeCollection;
68
69 /**
70 * all entries in sys_domain
71 * @var array
72 */
73 protected $allDomainRecords;
74
75 /**
76 * all entries in sys_domain grouped by page (pid)
77 * @var array
78 */
79 protected $groupedDomainsPerPage;
80
81 public function __construct()
82 {
83 $this->cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('cache_core');
84 $this->recursiveDomainSearch = (bool)$GLOBALS['TYPO3_CONF_VARS']['SYS']['recursiveDomainSearch'];
85 $this->routeCollection = new RouteCollection();
86 $this->populate();
87 }
88
89 /**
90 * Builds up all domain records from DB and all routes
91 */
92 protected function populate()
93 {
94 if ($data = $this->cache->get('legacy-domains')) {
95 // Due to the nature of PhpFrontend, the `<?php` and `#` wraps have to be removed
96 $data = preg_replace('/^<\?php\s*|\s*#$/', '', $data);
97 $data = unserialize($data, ['allowed_classes' => [Route::class, RouteCollection::class]]);
98 $this->routeCollection = $data['routeCollection'];
99 $this->allDomainRecords = $data['allDomainRecords'];
100 $this->groupedDomainsPerPage = $data['groupedDomainsPerPage'];
101 } else {
102 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_domain');
103 $queryBuilder->getRestrictions()->removeAll();
104 $statement = $queryBuilder
105 ->select('*')
106 ->from('sys_domain')
107 ->orderBy('sorting', 'ASC')
108 ->execute();
109
110 while ($row = $statement->fetch()) {
111 $row['domainName'] = rtrim($row['domainName'], '/');
112 $this->allDomainRecords[(int)$row['uid']] = $row;
113 $this->groupedDomainsPerPage[(int)$row['pid']][] = $row;
114 if (!$row['hidden']) {
115 if (strpos($row['domainName'], '/') === false) {
116 $path = '';
117 list($host, $port) = explode(':', $row['domainName']);
118 } else {
119 $urlParts = parse_url($row['domainName']);
120 $path = trim($urlParts['path'], '/');
121 $host = $urlParts['host'];
122 $port = (string)$urlParts['port'];
123 }
124 $route = new Route(
125 $path . '/{next}',
126 ['pageId' => $row['pid']],
127 array_filter(['next' => '.*', 'port' => $port]),
128 ['utf8' => true],
129 $host ?? ''
130 );
131 $this->routeCollection->add('domain_' . $row['uid'], $route);
132 }
133 }
134
135 $data = [
136 'routeCollection' => $this->routeCollection,
137 'allDomainRecords' => $this->allDomainRecords,
138 'groupedDomainsPerPage' => $this->groupedDomainsPerPage
139 ];
140 $this->cache->set('legacy-domains', serialize($data), ['sys_domain'], 0);
141 }
142 }
143
144 /**
145 * Return the page ID (pid) of a sys_domain record, based on a request object, does the infamous
146 * "recursive domain search", to also detect if the domain is like "abc.def.example.com" even if the
147 * sys_domain entry is "example.com".
148 *
149 * @param ServerRequestInterface $request
150 * @return int page ID
151 */
152 public function matchRequest(ServerRequestInterface $request): int
153 {
154 if (empty($this->allDomainRecords) || count($this->routeCollection) === 0) {
155 return 0;
156 }
157 $context = new RequestContext('/', $request->getMethod(), $request->getUri()->getHost());
158 $matcher = new UrlMatcher($this->routeCollection, $context);
159 if ($this->recursiveDomainSearch) {
160 $pageUid = 0;
161 $host = explode('.', $request->getUri()->getHost());
162 while (count($host)) {
163 $context->setHost(implode('.', $host));
164 try {
165 $result = $matcher->match($request->getUri()->getPath());
166 return (int)$result['pageId'];
167 } catch (NoConfigurationException | ResourceNotFoundException $e) {
168 array_shift($host);
169 }
170 }
171 return $pageUid;
172 }
173 try {
174 $result = $matcher->match($request->getUri()->getPath());
175 return (int)$result['pageId'];
176 } catch (NoConfigurationException | ResourceNotFoundException $e) {
177 // No domain record found
178 }
179 return 0;
180 }
181
182 /**
183 * Obtains a sys_domain record that fits for a given page ID by traversing the rootline up and finding
184 * a suitable page with sys_domain records.
185 * As all sys_domains have been fetched already, the internal grouped list of sys_domains can be used directly.
186 *
187 * Usually used in the Frontend to find out the domain of a page to link to.
188 *
189 * Includes a runtime cache if a frontend request links to the same page multiple times.
190 *
191 * @param int $pageId Target page id
192 * @param ServerRequestInterface|null $currentRequest if given, the domain record is marked with "isCurrentDomain"
193 * @return array|null the sys_domain record if found
194 */
195 public function matchPageId(int $pageId, ServerRequestInterface $currentRequest = null): ?array
196 {
197 // Using array_key_exists() here, nice $result can be NULL
198 // (happens, if there's no domain records defined)
199 if (array_key_exists($pageId, $this->domainDataCache)) {
200 return $this->domainDataCache[$pageId];
201 }
202 try {
203 $this->domainDataCache[$pageId] = $this->resolveDomainEntry(
204 $pageId,
205 $currentRequest
206 );
207 } catch (RootLineException $e) {
208 $this->domainDataCache[$pageId] = null;
209 }
210 return $this->domainDataCache[$pageId];
211 }
212
213 /**
214 * Returns the full sys_domain record, based on a page record, which is assumed the "pid" of the sys_domain record.
215 * Since ordering is taken into account, this is the first sys_domain record on that page Id.
216 *
217 * @param int $pageId
218 * @return array|null
219 */
220 public function matchRootPageId(int $pageId): ?array
221 {
222 return !empty($this->groupedDomainsPerPage[$pageId]) ? reset($this->groupedDomainsPerPage[$pageId]) : null;
223 }
224
225 /**
226 * @param int $pageId
227 * @param ServerRequestInterface|null $currentRequest
228 * @return array|null
229 */
230 protected function resolveDomainEntry(int $pageId, ?ServerRequestInterface $currentRequest): ?array
231 {
232 $rootLine = GeneralUtility::makeInstance(RootlineUtility::class, $pageId)->get();
233 // walk the rootline downwards from the target page
234 // to the root page, until a domain record is found
235 foreach ($rootLine as $pageInRootline) {
236 $pidInRootline = $pageInRootline['uid'];
237 if (empty($this->groupedDomainsPerPage[$pidInRootline])) {
238 continue;
239 }
240
241 $domainEntriesOfPage = $this->groupedDomainsPerPage[$pidInRootline];
242 foreach ($domainEntriesOfPage as $domainEntry) {
243 if ($domainEntry['hidden']) {
244 continue;
245 }
246 // When no currentRequest is given, let's take the first non-hidden sys_domain page
247 if ($currentRequest === null) {
248 return $domainEntry;
249 }
250 // Otherwise the check should match against the current domain (and set "isCurrentDomain")
251 // Current domain is "forced", however, otherwise the first one is fine
252 if ($this->domainNameMatchesCurrentRequest($domainEntry['domainName'], $currentRequest)) {
253 $result = $domainEntry;
254 $result['isCurrentDomain'] = true;
255 return $result;
256 }
257 }
258 }
259 return null;
260 }
261
262 /**
263 * Whether the given domain name (potentially including a path segment) matches currently requested host or
264 * the host including the path segment
265 *
266 * @param string $domainName
267 * @param ServerRequestInterface|null $request
268 * @return bool
269 */
270 protected function domainNameMatchesCurrentRequest($domainName, ServerRequestInterface $request): bool
271 {
272 /** @var NormalizedParams $normalizedParams */
273 $normalizedParams = $request->getAttribute('normalizedParams');
274 if (!($normalizedParams instanceof NormalizedParams)) {
275 return false;
276 }
277 $currentDomain = $normalizedParams->getHttpHost();
278 // remove the script filename from the path segment.
279 $currentPathSegment = trim(preg_replace('|/[^/]*$|', '', $normalizedParams->getScriptName()));
280 return $currentDomain === $domainName || $currentDomain . $currentPathSegment === $domainName;
281 }
282 }