[FEATURE] Allow replacing strings in slug
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / DataHandling / SlugHelper.php
1 <?php
2 declare(strict_types = 1);
3 namespace TYPO3\CMS\Core\DataHandling;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Doctrine\DBAL\Connection;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\Charset\CharsetConverter;
21 use TYPO3\CMS\Core\Database\ConnectionPool;
22 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
23 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24 use TYPO3\CMS\Core\DataHandling\Model\RecordState;
25 use TYPO3\CMS\Core\DataHandling\Model\RecordStateFactory;
26 use TYPO3\CMS\Core\Exception\SiteNotFoundException;
27 use TYPO3\CMS\Core\Routing\SiteMatcher;
28 use TYPO3\CMS\Core\Utility\GeneralUtility;
29 use TYPO3\CMS\Core\Utility\MathUtility;
30 use TYPO3\CMS\Core\Versioning\VersionState;
31
32 /**
33 * Generates, sanitizes and validates slugs for a TCA field
34 */
35 class SlugHelper
36 {
37 /**
38 * @var string
39 */
40 protected $tableName;
41
42 /**
43 * @var string
44 */
45 protected $fieldName;
46
47 /**
48 * @var int
49 */
50 protected $workspaceId;
51
52 /**
53 * @var array
54 */
55 protected $configuration = [];
56
57 /**
58 * @var bool
59 */
60 protected $workspaceEnabled;
61
62 /**
63 * Defines whether the slug field should start with "/".
64 * For pages (due to rootline functionality), this is a must have, otherwise the root level page
65 * would have an empty value.
66 *
67 * @var bool
68 */
69 protected $prependSlashInSlug;
70
71 /**
72 * Slug constructor.
73 *
74 * @param string $tableName TCA table
75 * @param string $fieldName TCA field
76 * @param array $configuration TCA configuration of the field
77 * @param int $workspaceId the workspace ID to be working on.
78 */
79 public function __construct(string $tableName, string $fieldName, array $configuration, int $workspaceId = 0)
80 {
81 $this->tableName = $tableName;
82 $this->fieldName = $fieldName;
83 $this->configuration = $configuration;
84 $this->workspaceId = $workspaceId;
85
86 if ($this->tableName === 'pages' && $this->fieldName === 'slug') {
87 $this->prependSlashInSlug = true;
88 } else {
89 $this->prependSlashInSlug = $this->configuration['prependSlash'] ?? false;
90 }
91
92 $this->workspaceEnabled = BackendUtility::isTableWorkspaceEnabled($tableName);
93 }
94
95 /**
96 * Cleans a slug value so it is used directly in the path segment of a URL.
97 *
98 * @param string $slug
99 * @return string
100 */
101 public function sanitize(string $slug): string
102 {
103 // Convert to lowercase + remove tags
104 $slug = mb_strtolower($slug, 'utf-8');
105 $slug = strip_tags($slug);
106
107 // Convert some special tokens (space, "_" and "-") to the space character
108 $fallbackCharacter = (string)($this->configuration['fallbackCharacter'] ?? '-');
109 $slug = preg_replace('/[ \t\x{00A0}\-+_]+/u', $fallbackCharacter, $slug);
110
111 // Convert extended letters to ascii equivalents
112 // The specCharsToASCII() converts "€" to "EUR"
113 $slug = GeneralUtility::makeInstance(CharsetConverter::class)->specCharsToASCII('utf-8', $slug);
114
115 // Get rid of all invalid characters, but allow slashes
116 $slug = preg_replace('/[^\p{L}0-9\/' . preg_quote($fallbackCharacter) . ']/u', '', $slug);
117
118 // Convert multiple fallback characters to a single one
119 if ($fallbackCharacter !== '') {
120 $slug = preg_replace('/' . preg_quote($fallbackCharacter) . '{2,}/', $fallbackCharacter, $slug);
121 }
122
123 // Ensure slug is lower cased after all replacement was done
124 $slug = mb_strtolower($slug, 'utf-8');
125 // keep slashes: re-convert them after rawurlencode did everything
126 $slug = rawurlencode($slug);
127 // @todo: add a test and see if we need this
128 $slug = str_replace('%2F', '/', $slug);
129 // Extract slug, thus it does not have wrapping fallback and slash characters
130 $extractedSlug = $this->extract($slug);
131 // Remove trailing and beginning slashes, except if the trailing slash was added, then we'll re-add it
132 $appendTrailingSlash = $extractedSlug !== '' && substr($slug, -1) === '/';
133 $slug = $extractedSlug . ($appendTrailingSlash ? '/' : '');
134 if ($this->prependSlashInSlug && ($slug{0} ?? '') !== '/') {
135 $slug = '/' . $slug;
136 }
137 return $slug;
138 }
139
140 /**
141 * Extracts payload of slug and removes wrapping delimiters,
142 * e.g. `/hello/world/` will become `hello/world`.
143 *
144 * @param string $slug
145 * @return string
146 */
147 public function extract(string $slug): string
148 {
149 // Convert some special tokens (space, "_" and "-") to the space character
150 $fallbackCharacter = $this->configuration['fallbackCharacter'] ?? '-';
151 return trim($slug, $fallbackCharacter . '/');
152 }
153
154 /**
155 * Used when no slug exists for a record
156 *
157 * @param array $recordData
158 * @param int $pid
159 * @return string
160 */
161 public function generate(array $recordData, int $pid): string
162 {
163 if ($pid === 0 || (!empty($recordData['is_siteroot']) && $this->tableName === 'pages')) {
164 return '/';
165 }
166 $prefix = '';
167 if ($this->configuration['generatorOptions']['prefixParentPageSlug'] ?? false) {
168 $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
169 $languageId = (int)($recordData[$languageFieldName] ?? 0);
170 $parentPageRecord = $this->resolveParentPageRecord($pid, $languageId);
171 if (is_array($parentPageRecord)) {
172 // If the parent page has a slug, use that instead of "re-generating" the slug from the parents' page title
173 if (!empty($parentPageRecord['slug'])) {
174 $rootLineItemSlug = $parentPageRecord['slug'];
175 } else {
176 $rootLineItemSlug = $this->generate($parentPageRecord, (int)$parentPageRecord['pid']);
177 }
178 $rootLineItemSlug = trim($rootLineItemSlug, '/');
179 if (!empty($rootLineItemSlug)) {
180 $prefix = $rootLineItemSlug;
181 }
182 }
183 }
184
185 $fieldSeparator = $this->configuration['generatorOptions']['fieldSeparator'] ?? '/';
186 $slugParts = [];
187
188 $replaceConfiguration = $this->configuration['generatorOptions']['replacements'] ?? [];
189 foreach ($this->configuration['generatorOptions']['fields'] ?? [] as $fieldName) {
190 if (!empty($recordData[$fieldName])) {
191 $pieceOfSlug = $recordData[$fieldName];
192 $pieceOfSlug = str_replace(array_keys($replaceConfiguration), array_values($replaceConfiguration), $pieceOfSlug);
193 $slugParts[] = $pieceOfSlug;
194 }
195 }
196 $slug = implode($fieldSeparator, $slugParts);
197 $slug = $this->sanitize($slug);
198 // No valid data found
199 if ($slug === '' || $slug === '/') {
200 $slug = 'default-' . GeneralUtility::shortMD5(json_encode($recordData));
201 }
202 if ($this->prependSlashInSlug && ($slug{0} ?? '') !== '/') {
203 $slug = '/' . $slug;
204 }
205 if (!empty($prefix)) {
206 $slug = $prefix . $slug;
207 }
208
209 return $this->sanitize($slug);
210 }
211
212 /**
213 * Checks if there are other records with the same slug that are located on the same PID.
214 *
215 * @param string $slug
216 * @param RecordState $state
217 * @return bool
218 */
219 public function isUniqueInPid(string $slug, RecordState $state): bool
220 {
221 $pageId = (int)$state->resolveNodeIdentifier();
222 $recordId = $state->getSubject()->getIdentifier();
223 $languageId = $state->getContext()->getLanguageId();
224
225 if ($pageId < 0) {
226 $pageId = $this->resolveLivePageId($recordId);
227 }
228
229 $queryBuilder = $this->createPreparedQueryBuilder();
230 $this->applySlugConstraint($queryBuilder, $slug);
231 $this->applyPageIdConstraint($queryBuilder, $pageId);
232 $this->applyRecordConstraint($queryBuilder, $recordId);
233 $this->applyLanguageConstraint($queryBuilder, $languageId);
234 $this->applyWorkspaceConstraint($queryBuilder);
235 $statement = $queryBuilder->execute();
236
237 $records = $this->resolveVersionOverlays(
238 $statement->fetchAll()
239 );
240 return count($records) === 0;
241 }
242
243 /**
244 * Check if there are other records with the same slug that are located on the same site.
245 *
246 * @param string $slug
247 * @param RecordState $state
248 * @return bool
249 * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
250 */
251 public function isUniqueInSite(string $slug, RecordState $state): bool
252 {
253 $pageId = (int)$state->resolveNodeIdentifier();
254 $recordId = $state->getSubject()->getIdentifier();
255 $languageId = $state->getContext()->getLanguageId();
256
257 if ($pageId < 0) {
258 $pageId = $this->resolveLivePageId($recordId);
259 }
260
261 $queryBuilder = $this->createPreparedQueryBuilder();
262 $this->applySlugConstraint($queryBuilder, $slug);
263 $this->applyRecordConstraint($queryBuilder, $recordId);
264 $this->applyLanguageConstraint($queryBuilder, $languageId);
265 $this->applyWorkspaceConstraint($queryBuilder);
266 $statement = $queryBuilder->execute();
267
268 $records = $this->resolveVersionOverlays(
269 $statement->fetchAll()
270 );
271 if (count($records) === 0) {
272 return true;
273 }
274
275 // The installation contains at least ONE other record with the same slug
276 // Now find out if it is the same root page ID
277 $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
278 $siteOfCurrentRecord = $siteMatcher->matchByPageId($pageId);
279 foreach ($records as $record) {
280 try {
281 $recordState = RecordStateFactory::forName($this->tableName)->fromArray($record);
282 $siteOfExistingRecord = $siteMatcher->matchByPageId(
283 (int)$recordState->resolveNodeAggregateIdentifier()
284 );
285 } catch (SiteNotFoundException $exception) {
286 // In case not site is found, the record is not
287 // organized in any site or pseudo-site
288 continue;
289 }
290 if ($siteOfExistingRecord->getRootPageId() === $siteOfCurrentRecord->getRootPageId()) {
291 return false;
292 }
293 }
294
295 // Otherwise, everything is still fine
296 return true;
297 }
298
299 /**
300 * Generate a slug with a suffix "/mytitle-1" if that is in use already.
301 *
302 * @param string $slug proposed slug
303 * @param RecordState $state
304 * @return string
305 * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
306 */
307 public function buildSlugForUniqueInSite(string $slug, RecordState $state): string
308 {
309 $slug = $this->sanitize($slug);
310 $rawValue = $this->extract($slug);
311 $newValue = $slug;
312 $counter = 0;
313 while (!$this->isUniqueInSite(
314 $newValue,
315 $state
316 ) && $counter++ < 100
317 ) {
318 $newValue = $this->sanitize($rawValue . '-' . $counter);
319 }
320 if ($counter === 100) {
321 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
322 }
323 return $newValue;
324 }
325
326 /**
327 * Generate a slug with a suffix "/mytitle-1" if the suggested slug is in use already.
328 *
329 * @param string $slug proposed slug
330 * @param RecordState $state
331 * @return string
332 */
333 public function buildSlugForUniqueInPid(string $slug, RecordState $state): string
334 {
335 $slug = $this->sanitize($slug);
336 $rawValue = $this->extract($slug);
337 $newValue = $slug;
338 $counter = 0;
339 while (!$this->isUniqueInPid(
340 $newValue,
341 $state
342 ) && $counter++ < 100
343 ) {
344 $newValue = $this->sanitize($rawValue . '-' . $counter);
345 }
346 if ($counter === 100) {
347 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
348 }
349 return $newValue;
350 }
351
352 /**
353 * @return QueryBuilder
354 */
355 protected function createPreparedQueryBuilder(): QueryBuilder
356 {
357 $fieldNames = ['uid', 'pid', $this->fieldName];
358 if ($this->workspaceEnabled) {
359 $fieldNames[] = 't3ver_state';
360 }
361 $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
362 if (is_string($languageFieldName)) {
363 $fieldNames[] = $languageFieldName;
364 }
365 $languageParentFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['transOrigPointerField'] ?? null;
366 if (is_string($languageParentFieldName)) {
367 $fieldNames[] = $languageParentFieldName;
368 }
369
370 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
371 $queryBuilder->getRestrictions()
372 ->removeAll()
373 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
374 $queryBuilder
375 ->select(...$fieldNames)
376 ->from($this->tableName);
377 return $queryBuilder;
378 }
379
380 /**
381 * @param QueryBuilder $queryBuilder
382 */
383 protected function applyWorkspaceConstraint(QueryBuilder $queryBuilder)
384 {
385 if (!$this->workspaceEnabled) {
386 return;
387 }
388
389 $workspaceIds = [0];
390 if ($this->workspaceId > 0) {
391 $workspaceIds[] = $this->workspaceId;
392 }
393 $queryBuilder->andWhere(
394 $queryBuilder->expr()->in(
395 't3ver_wsid',
396 $queryBuilder->createNamedParameter($workspaceIds, Connection::PARAM_INT_ARRAY)
397 ),
398 $queryBuilder->expr()->neq(
399 'pid',
400 $queryBuilder->createNamedParameter(-1, \PDO::PARAM_INT)
401 )
402 );
403 }
404
405 /**
406 * @param QueryBuilder $queryBuilder
407 * @param int $languageId
408 */
409 protected function applyLanguageConstraint(QueryBuilder $queryBuilder, int $languageId)
410 {
411 $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
412 if (!is_string($languageFieldName)) {
413 return;
414 }
415
416 // Only check records of the given language
417 $queryBuilder->andWhere(
418 $queryBuilder->expr()->eq(
419 $languageFieldName,
420 $queryBuilder->createNamedParameter($languageId, \PDO::PARAM_INT)
421 )
422 );
423 }
424
425 /**
426 * @param QueryBuilder $queryBuilder
427 * @param string $slug
428 */
429 protected function applySlugConstraint(QueryBuilder $queryBuilder, string $slug)
430 {
431 $queryBuilder->where(
432 $queryBuilder->expr()->eq(
433 $this->fieldName,
434 $queryBuilder->createNamedParameter($slug)
435 )
436 );
437 }
438
439 /**
440 * @param QueryBuilder $queryBuilder
441 * @param int $pageId
442 */
443 protected function applyPageIdConstraint(QueryBuilder $queryBuilder, int $pageId)
444 {
445 if ($pageId < 0) {
446 throw new \RuntimeException(
447 sprintf(
448 'Page id must be positive "%d"',
449 $pageId
450 ),
451 1534962573
452 );
453 }
454
455 $queryBuilder->andWhere(
456 $queryBuilder->expr()->eq(
457 'pid',
458 $queryBuilder->createNamedParameter($pageId, \PDO::PARAM_INT)
459 )
460 );
461 }
462
463 /**
464 * @param QueryBuilder $queryBuilder
465 * @param string|int $recordId
466 */
467 protected function applyRecordConstraint(QueryBuilder $queryBuilder, $recordId)
468 {
469 // Exclude the current record if it is an existing record
470 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
471 return;
472 }
473
474 $queryBuilder->andWhere(
475 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($recordId, \PDO::PARAM_INT))
476 );
477 if ($this->workspaceId > 0 && $this->workspaceEnabled) {
478 $liveId = BackendUtility::getLiveVersionIdOfRecord($this->tableName, $recordId) ?? $recordId;
479 $queryBuilder->andWhere(
480 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($liveId, \PDO::PARAM_INT))
481 );
482 }
483 }
484
485 /**
486 * @param int $recordId
487 * @return int
488 * @throws \RuntimeException
489 */
490 protected function resolveLivePageId($recordId): int
491 {
492 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
493 throw new \RuntimeException(
494 sprintf(
495 'Cannot resolve live page id for non-numeric identifier "%s"',
496 $recordId
497 ),
498 1534951024
499 );
500 }
501
502 $liveVersion = BackendUtility::getLiveVersionOfRecord(
503 $this->tableName,
504 $recordId,
505 'pid'
506 );
507
508 if (empty($liveVersion)) {
509 throw new \RuntimeException(
510 sprintf(
511 'Cannot resolve live page id for record "%s:%d"',
512 $this->tableName,
513 $recordId
514 ),
515 1534951025
516 );
517 }
518
519 return (int)$liveVersion['pid'];
520 }
521
522 /**
523 * @param array $records
524 * @return array
525 */
526 protected function resolveVersionOverlays(array $records): array
527 {
528 if (!$this->workspaceEnabled) {
529 return $records;
530 }
531
532 return array_filter(
533 array_map(
534 function (array $record) {
535 BackendUtility::workspaceOL(
536 $this->tableName,
537 $record,
538 $this->workspaceId,
539 true
540 );
541 if (VersionState::cast($record['t3ver_state'] ?? null)
542 ->equals(VersionState::DELETE_PLACEHOLDER)) {
543 return null;
544 }
545 return $record;
546 },
547 $records
548 )
549 );
550 }
551
552 /**
553 * Fetch a parent page, but exclude spacers, recyclers and sys-folders and all doktypes > 200
554 * @param int $pid
555 * @param int $languageId
556 * @return array|null
557 */
558 protected function resolveParentPageRecord(int $pid, int $languageId): ?array
559 {
560 $parentPageRecord = null;
561 $rootLine = BackendUtility::BEgetRootLine($pid, '', true, ['nav_title']);
562 do {
563 $parentPageRecord = array_shift($rootLine);
564 // do not use spacers (199), recyclers and folders and everything else
565 } while (!empty($rootLine) && (int)$parentPageRecord['doktype'] >= 199);
566 if ($languageId > 0) {
567 $localizedParentPageRecord = BackendUtility::getRecordLocalization('pages', $parentPageRecord['uid'], $languageId);
568 if (!empty($localizedParentPageRecord)) {
569 $parentPageRecord = reset($localizedParentPageRecord);
570 }
571 }
572 return $parentPageRecord;
573 }
574 }