[TASK] Improved slug generation for translated pages
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / DataHandling / SlugHelper.php
1 <?php
2 declare(strict_types = 1);
3 namespace TYPO3\CMS\Core\DataHandling;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Doctrine\DBAL\Connection;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\Charset\CharsetConverter;
21 use TYPO3\CMS\Core\Database\ConnectionPool;
22 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
23 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24 use TYPO3\CMS\Core\Routing\SiteMatcher;
25 use TYPO3\CMS\Core\Utility\GeneralUtility;
26 use TYPO3\CMS\Core\Utility\MathUtility;
27
28 /**
29 * Generates, sanitizes and validates slugs for a TCA field
30 */
31 class SlugHelper
32 {
33 /**
34 * @var string
35 */
36 protected $tableName;
37
38 /**
39 * @var string
40 */
41 protected $fieldName;
42
43 /**
44 * @var int
45 */
46 protected $workspaceId;
47
48 /**
49 * @var array
50 */
51 protected $configuration = [];
52
53 /**
54 * @var bool
55 */
56 protected $workspaceEnabled;
57
58 /**
59 * Slug constructor.
60 *
61 * @param string $tableName TCA table
62 * @param string $fieldName TCA field
63 * @param array $configuration TCA configuration of the field
64 * @param int $workspaceId the workspace ID to be working on.
65 */
66 public function __construct(string $tableName, string $fieldName, array $configuration, int $workspaceId = 0)
67 {
68 $this->tableName = $tableName;
69 $this->fieldName = $fieldName;
70 $this->configuration = $configuration;
71 $this->workspaceId = $workspaceId;
72
73 $this->workspaceEnabled = BackendUtility::isTableWorkspaceEnabled($tableName);
74 }
75
76 /**
77 * Cleans a slug value so it is used directly in the path segment of a URL.
78 *
79 * @param string $slug
80 * @return string
81 */
82 public function sanitize(string $slug): string
83 {
84 // Convert to lowercase + remove tags
85 $slug = mb_strtolower($slug, 'utf-8');
86 $slug = strip_tags($slug);
87
88 // Convert some special tokens (space, "_" and "-") to the space character
89 $fallbackCharacter = (string)($this->configuration['fallbackCharacter'] ?? '-');
90 $slug = preg_replace('/[ \t\x{00A0}\-+_]+/u', $fallbackCharacter, $slug);
91
92 // Convert extended letters to ascii equivalents
93 $slug = GeneralUtility::makeInstance(CharsetConverter::class)->specCharsToASCII('utf-8', $slug);
94
95 // Get rid of all invalid characters, but allow slashes
96 $slug = preg_replace('/[^\p{L}0-9\/' . preg_quote($fallbackCharacter) . ']/u', '', $slug);
97
98 // Convert multiple fallback characters to a single one
99 if ($fallbackCharacter !== '') {
100 $slug = preg_replace('/' . preg_quote($fallbackCharacter) . '{2,}/', $fallbackCharacter, $slug);
101 }
102
103 // Ensure slug is lower cased after all replacement was done:
104 // The specCharsToASCII() above for example converts "€" to "EUR"
105 $slug = mb_strtolower($slug, 'utf-8');
106 // keep slashes: re-convert them after rawurlencode did everything
107 $slug = rawurlencode($slug);
108 // @todo: add a test and see if we need this
109 $slug = str_replace('%2F', '/', $slug);
110 // Remove trailing and beginning slashes
111 $slug = '/' . $this->extract($slug);
112 return $slug;
113 }
114
115 /**
116 * Extracts payload of slug and removes wrapping delimiters,
117 * e.g. `/hello/world/` will become `hello/world`.
118 *
119 * @param string $slug
120 * @return string
121 */
122 public function extract(string $slug): string
123 {
124 // Convert some special tokens (space, "_" and "-") to the space character
125 $fallbackCharacter = $this->configuration['fallbackCharacter'] ?? '-';
126 return trim($slug, $fallbackCharacter . '/');
127 }
128
129 /**
130 * Used when no slug exists for a record
131 *
132 * @param array $recordData
133 * @param int $pid
134 * @return string
135 */
136 public function generate(array $recordData, int $pid): string
137 {
138 if ($pid === 0 || (!empty($recordData['is_siteroot']) && $this->tableName === 'pages')) {
139 return '/';
140 }
141 $prefix = '';
142 $languageId = (int)$recordData[$GLOBALS['TCA'][$this->tableName]['ctrl']['languageField']];
143 if ($this->configuration['generatorOptions']['prefixParentPageSlug'] ?? false) {
144 $rootLine = BackendUtility::BEgetRootLine($pid, '', true, ['nav_title']);
145 $parentPageRecord = reset($rootLine);
146 if ($languageId > 0) {
147 $localizedParentPageRecord = BackendUtility::getRecordLocalization('pages', $parentPageRecord['uid'], $languageId);
148 if (!empty($localizedParentPageRecord)) {
149 $parentPageRecord = reset($localizedParentPageRecord);
150 }
151 }
152 if (is_array($parentPageRecord)) {
153 $rootLineItemSlug = $this->generate($parentPageRecord, (int)$parentPageRecord['pid']);
154 $rootLineItemSlug = trim($rootLineItemSlug, '/');
155 if (!empty($rootLineItemSlug)) {
156 $prefix = $rootLineItemSlug;
157 }
158 }
159 }
160
161 $fieldSeparator = $this->configuration['generatorOptions']['fieldSeparator'] ?? '/';
162 $slugParts = [];
163 foreach ($this->configuration['generatorOptions']['fields'] ?? [] as $fieldName) {
164 if (!empty($recordData[$fieldName])) {
165 $slugParts[] = $recordData[$fieldName];
166 }
167 }
168 $slug = implode($fieldSeparator, $slugParts);
169 if (!empty($prefix)) {
170 $slug = $prefix . '/' . $slug;
171 }
172
173 return $this->sanitize($slug);
174 }
175
176 /**
177 * Checks if there are other records with the same slug that are located on the same PID.
178 *
179 * @param string $slug
180 * @param string|int $recordId
181 * @param int $pageId
182 * @param int $languageId
183 * @return bool
184 */
185 public function isUniqueInPid(string $slug, $recordId, int $pageId, int $languageId): bool
186 {
187 if ($pageId < 0) {
188 $pageId = $this->resolveLivePageId($recordId);
189 }
190
191 $queryBuilder = $this->createPreparedQueryBuilder();
192 $this->applySlugConstraint($queryBuilder, $slug);
193 $this->applyPageIdConstraint($queryBuilder, $pageId);
194 $this->applyRecordConstraint($queryBuilder, $recordId);
195 $this->applyLanguageConstraint($queryBuilder, $languageId);
196 $this->applyWorkspaceConstraint($queryBuilder);
197 $statement = $queryBuilder->execute();
198 return $statement->rowCount() === 0;
199 }
200
201 /**
202 * Check if there are other records with the same slug that are located on the same site.
203 *
204 * @param string $slug
205 * @param string|int $recordId
206 * @param int $pageId
207 * @param int $languageId
208 * @return bool
209 */
210 public function isUniqueInSite(string $slug, $recordId, int $pageId, int $languageId): bool
211 {
212 if ($pageId < 0) {
213 $pageId = $this->resolveLivePageId($recordId);
214 }
215
216 $queryBuilder = $this->createPreparedQueryBuilder();
217 $this->applySlugConstraint($queryBuilder, $slug);
218 $this->applyRecordConstraint($queryBuilder, $recordId);
219 $this->applyLanguageConstraint($queryBuilder, $languageId);
220 $this->applyWorkspaceConstraint($queryBuilder);
221 $statement = $queryBuilder->execute();
222
223 $records = $statement->fetchAll();
224 if (count($records) === 0) {
225 return true;
226 }
227
228 // The installation contains at least ONE other record with the same slug
229 // Now find out if it is the same root page ID
230 $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
231 $siteOfCurrentRecord = $siteMatcher->matchByPageId($pageId);
232 foreach ($records as $record) {
233 $siteOfExistingRecord = $siteMatcher->matchByPageId((int)$record['uid']);
234 if ($siteOfExistingRecord->getRootPageId() === $siteOfCurrentRecord->getRootPageId()) {
235 return false;
236 }
237 }
238
239 // Otherwise, everything is still fine
240 return true;
241 }
242
243 /**
244 * Generate a slug with a suffix "/mytitle-1" if that is in use already.
245 *
246 * @param string $slug proposed slug
247 * @param mixed $recordId can be a new record (non-int) or an existing record ID
248 * @param int $realPid pageID (already workspace-resolved)
249 * @param int $languageId the language ID realm to be searched for
250 * @return string
251 */
252 public function buildSlugForUniqueInSite(string $slug, $recordId, int $realPid, int $languageId): string
253 {
254 $slug = $this->sanitize($slug);
255 $rawValue = $this->extract($slug);
256 $newValue = $slug;
257 $counter = 0;
258 while (!$this->isUniqueInSite(
259 $newValue,
260 $recordId,
261 $realPid,
262 $languageId
263 ) && $counter++ < 100
264 ) {
265 $newValue = $this->sanitize($rawValue . '-' . $counter);
266 }
267 if ($counter === 100) {
268 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
269 }
270 return $newValue;
271 }
272
273 /**
274 * Generate a slug with a suffix "/mytitle-1" if the suggested slug is in use already.
275 *
276 * @param string $slug proposed slug
277 * @param mixed $recordId can be a new record (non-int) or an existing record ID
278 * @param int $realPid pageID (already workspace-resolved)
279 * @param int $languageId the language ID realm to be searched for
280 * @return string
281 */
282 public function buildSlugForUniqueInPid(string $slug, $recordId, int $realPid, int $languageId): string
283 {
284 $slug = $this->sanitize($slug);
285 $rawValue = $this->extract($slug);
286 $newValue = $slug;
287 $counter = 0;
288 while (!$this->isUniqueInPid(
289 $newValue,
290 $recordId,
291 $realPid,
292 $languageId
293 ) && $counter++ < 100
294 ) {
295 $newValue = $this->sanitize($rawValue . '-' . $counter);
296 }
297 if ($counter === 100) {
298 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
299 }
300 return $newValue;
301 }
302
303 /**
304 * @return QueryBuilder
305 */
306 protected function createPreparedQueryBuilder(): QueryBuilder
307 {
308 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
309 $queryBuilder->getRestrictions()
310 ->removeAll()
311 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
312 $queryBuilder
313 ->select('uid', 'pid', $this->fieldName)
314 ->from($this->tableName);
315 return $queryBuilder;
316 }
317
318 /**
319 * @param QueryBuilder $queryBuilder
320 */
321 protected function applyWorkspaceConstraint(QueryBuilder $queryBuilder)
322 {
323 if (!$this->workspaceEnabled) {
324 return;
325 }
326
327 $workspaceIds = [0];
328 if ($this->workspaceId > 0) {
329 $workspaceIds[] = $this->workspaceId;
330 }
331 $queryBuilder->andWhere(
332 $queryBuilder->expr()->in(
333 't3ver_wsid',
334 $queryBuilder->createNamedParameter($workspaceIds, Connection::PARAM_INT_ARRAY)
335 )
336 );
337 }
338
339 /**
340 * @param QueryBuilder $queryBuilder
341 * @param int $languageId
342 */
343 protected function applyLanguageConstraint(QueryBuilder $queryBuilder, int $languageId)
344 {
345 $languageField = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
346 if (!is_string($languageField)) {
347 return;
348 }
349
350 // Only check records of the given language
351 $queryBuilder->andWhere(
352 $queryBuilder->expr()->eq(
353 $languageField,
354 $queryBuilder->createNamedParameter($languageId, \PDO::PARAM_INT)
355 )
356 );
357 }
358
359 /**
360 * @param QueryBuilder $queryBuilder
361 * @param string $slug
362 */
363 protected function applySlugConstraint(QueryBuilder $queryBuilder, string $slug)
364 {
365 $queryBuilder->where(
366 $queryBuilder->expr()->eq(
367 $this->fieldName,
368 $queryBuilder->createNamedParameter($slug)
369 )
370 );
371 }
372
373 /**
374 * @param QueryBuilder $queryBuilder
375 * @param int $pageId
376 */
377 protected function applyPageIdConstraint(QueryBuilder $queryBuilder, int $pageId)
378 {
379 if ($pageId < 0) {
380 throw new \RuntimeException(
381 sprintf(
382 'Page id must be positive "%d"',
383 $pageId
384 ),
385 1534962573
386 );
387 }
388
389 $queryBuilder->andWhere(
390 $queryBuilder->expr()->eq(
391 'pid',
392 $queryBuilder->createNamedParameter($pageId, \PDO::PARAM_INT)
393 )
394 );
395 }
396
397 /**
398 * @param QueryBuilder $queryBuilder
399 * @param string|int $recordId
400 */
401 protected function applyRecordConstraint(QueryBuilder $queryBuilder, $recordId)
402 {
403 // Exclude the current record if it is an existing record
404 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
405 return;
406 }
407
408 $queryBuilder->andWhere(
409 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($recordId, \PDO::PARAM_INT))
410 );
411 if ($this->workspaceId > 0 && $this->workspaceEnabled) {
412 $liveId = BackendUtility::getLiveVersionIdOfRecord($this->tableName, $recordId) ?? $recordId;
413 $queryBuilder->andWhere(
414 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($liveId, \PDO::PARAM_INT))
415 );
416 }
417 }
418
419 /**
420 * @param int $recordId
421 * @return int
422 * @throws \RuntimeException
423 */
424 protected function resolveLivePageId($recordId): int
425 {
426 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
427 throw new \RuntimeException(
428 sprintf(
429 'Cannot resolve live page id for non-numeric identifier "%s"',
430 $recordId
431 ),
432 1534951024
433 );
434 }
435
436 $liveVersion = BackendUtility::getLiveVersionOfRecord(
437 $this->tableName,
438 $recordId,
439 'pid'
440 );
441
442 if (empty($liveVersion)) {
443 throw new \RuntimeException(
444 sprintf(
445 'Cannot resolve live page id for record "%s:%d"',
446 $this->tableName,
447 $recordId
448 ),
449 1534951025
450 );
451 }
452
453 return (int)$liveVersion['pid'];
454 }
455 }