86cba5b398f40ad59718ba13c8036dc2aef6dc8a
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / DataHandling / SlugHelper.php
1 <?php
2 declare(strict_types = 1);
3 namespace TYPO3\CMS\Core\DataHandling;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Doctrine\DBAL\Connection;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\Charset\CharsetConverter;
21 use TYPO3\CMS\Core\Database\ConnectionPool;
22 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
23 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24 use TYPO3\CMS\Core\DataHandling\Model\RecordState;
25 use TYPO3\CMS\Core\Exception\SiteNotFoundException;
26 use TYPO3\CMS\Core\Routing\SiteMatcher;
27 use TYPO3\CMS\Core\Utility\GeneralUtility;
28 use TYPO3\CMS\Core\Utility\MathUtility;
29 use TYPO3\CMS\Core\Versioning\VersionState;
30
31 /**
32 * Generates, sanitizes and validates slugs for a TCA field
33 */
34 class SlugHelper
35 {
36 /**
37 * @var string
38 */
39 protected $tableName;
40
41 /**
42 * @var string
43 */
44 protected $fieldName;
45
46 /**
47 * @var int
48 */
49 protected $workspaceId;
50
51 /**
52 * @var array
53 */
54 protected $configuration = [];
55
56 /**
57 * @var bool
58 */
59 protected $workspaceEnabled;
60
61 /**
62 * Defines whether the slug field should start with "/".
63 * For pages (due to rootline functionality), this is a must have, otherwise the root level page
64 * would have an empty value.
65 *
66 * @var bool
67 */
68 protected $prependSlashInSlug;
69
70 /**
71 * Slug constructor.
72 *
73 * @param string $tableName TCA table
74 * @param string $fieldName TCA field
75 * @param array $configuration TCA configuration of the field
76 * @param int $workspaceId the workspace ID to be working on.
77 */
78 public function __construct(string $tableName, string $fieldName, array $configuration, int $workspaceId = 0)
79 {
80 $this->tableName = $tableName;
81 $this->fieldName = $fieldName;
82 $this->configuration = $configuration;
83 $this->workspaceId = $workspaceId;
84
85 if ($this->tableName === 'pages' && $this->fieldName === 'slug') {
86 $this->prependSlashInSlug = true;
87 } else {
88 $this->prependSlashInSlug = $this->configuration['prependSlash'] ?? false;
89 }
90
91 $this->workspaceEnabled = BackendUtility::isTableWorkspaceEnabled($tableName);
92 }
93
94 /**
95 * Cleans a slug value so it is used directly in the path segment of a URL.
96 *
97 * @param string $slug
98 * @return string
99 */
100 public function sanitize(string $slug): string
101 {
102 // Convert to lowercase + remove tags
103 $slug = mb_strtolower($slug, 'utf-8');
104 $slug = strip_tags($slug);
105
106 // Convert some special tokens (space, "_" and "-") to the space character
107 $fallbackCharacter = (string)($this->configuration['fallbackCharacter'] ?? '-');
108 $slug = preg_replace('/[ \t\x{00A0}\-+_]+/u', $fallbackCharacter, $slug);
109
110 // Convert extended letters to ascii equivalents
111 // The specCharsToASCII() converts "€" to "EUR"
112 $slug = GeneralUtility::makeInstance(CharsetConverter::class)->specCharsToASCII('utf-8', $slug);
113
114 // Get rid of all invalid characters, but allow slashes
115 $slug = preg_replace('/[^\p{L}0-9\/' . preg_quote($fallbackCharacter) . ']/u', '', $slug);
116
117 // Convert multiple fallback characters to a single one
118 if ($fallbackCharacter !== '') {
119 $slug = preg_replace('/' . preg_quote($fallbackCharacter) . '{2,}/', $fallbackCharacter, $slug);
120 }
121
122 // Ensure slug is lower cased after all replacement was done
123 $slug = mb_strtolower($slug, 'utf-8');
124 // keep slashes: re-convert them after rawurlencode did everything
125 $slug = rawurlencode($slug);
126 // @todo: add a test and see if we need this
127 $slug = str_replace('%2F', '/', $slug);
128 // Extract slug, thus it does not have wrapping fallback and slash characters
129 $extractedSlug = $this->extract($slug);
130 // Remove trailing and beginning slashes, except if the trailing slash was added, then we'll re-add it
131 $appendTrailingSlash = $extractedSlug !== '' && substr($slug, -1) === '/';
132 $slug = $extractedSlug . ($appendTrailingSlash ? '/' : '');
133 if ($this->prependSlashInSlug && ($slug{0} ?? '') !== '/') {
134 $slug = '/' . $slug;
135 }
136 return $slug;
137 }
138
139 /**
140 * Extracts payload of slug and removes wrapping delimiters,
141 * e.g. `/hello/world/` will become `hello/world`.
142 *
143 * @param string $slug
144 * @return string
145 */
146 public function extract(string $slug): string
147 {
148 // Convert some special tokens (space, "_" and "-") to the space character
149 $fallbackCharacter = $this->configuration['fallbackCharacter'] ?? '-';
150 return trim($slug, $fallbackCharacter . '/');
151 }
152
153 /**
154 * Used when no slug exists for a record
155 *
156 * @param array $recordData
157 * @param int $pid
158 * @return string
159 */
160 public function generate(array $recordData, int $pid): string
161 {
162 if ($pid === 0 || (!empty($recordData['is_siteroot']) && $this->tableName === 'pages')) {
163 return '/';
164 }
165 $prefix = '';
166 if ($this->configuration['generatorOptions']['prefixParentPageSlug'] ?? false) {
167 $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
168 $languageId = (int)($recordData[$languageFieldName] ?? 0);
169 $parentPageRecord = $this->resolveParentPageRecord($pid, $languageId);
170 if (is_array($parentPageRecord)) {
171 // If the parent page has a slug, use that instead of "re-generating" the slug from the parents' page title
172 if (!empty($parentPageRecord['slug'])) {
173 $rootLineItemSlug = $parentPageRecord['slug'];
174 } else {
175 $rootLineItemSlug = $this->generate($parentPageRecord, (int)$parentPageRecord['pid']);
176 }
177 $rootLineItemSlug = trim($rootLineItemSlug, '/');
178 if (!empty($rootLineItemSlug)) {
179 $prefix = $rootLineItemSlug;
180 }
181 }
182 }
183
184 $fieldSeparator = $this->configuration['generatorOptions']['fieldSeparator'] ?? '/';
185 $slugParts = [];
186 foreach ($this->configuration['generatorOptions']['fields'] ?? [] as $fieldName) {
187 if (!empty($recordData[$fieldName])) {
188 $slugParts[] = $recordData[$fieldName];
189 }
190 }
191 $slug = implode($fieldSeparator, $slugParts);
192 $slug = $this->sanitize($slug);
193 // No valid data found
194 if ($slug === '' || $slug === '/') {
195 $slug = 'default-' . GeneralUtility::shortMD5(json_encode($recordData));
196 }
197 if ($this->prependSlashInSlug && ($slug{0} ?? '') !== '/') {
198 $slug = '/' . $slug;
199 }
200 if (!empty($prefix)) {
201 $slug = $prefix . $slug;
202 }
203
204 return $this->sanitize($slug);
205 }
206
207 /**
208 * Checks if there are other records with the same slug that are located on the same PID.
209 *
210 * @param string $slug
211 * @param RecordState $state
212 * @return bool
213 */
214 public function isUniqueInPid(string $slug, RecordState $state): bool
215 {
216 $pageId = (int)$state->resolveAggregateNodeIdentifier();
217 $recordId = $state->getSubject()->getIdentifier();
218 $languageId = $state->getContext()->getLanguageId();
219
220 if ($pageId < 0) {
221 $pageId = $this->resolveLivePageId($recordId);
222 }
223
224 $queryBuilder = $this->createPreparedQueryBuilder();
225 $this->applySlugConstraint($queryBuilder, $slug);
226 $this->applyPageIdConstraint($queryBuilder, $pageId);
227 $this->applyRecordConstraint($queryBuilder, $recordId);
228 $this->applyLanguageConstraint($queryBuilder, $languageId);
229 $this->applyWorkspaceConstraint($queryBuilder);
230 $statement = $queryBuilder->execute();
231
232 $records = $this->resolveVersionOverlays(
233 $statement->fetchAll()
234 );
235 return count($records) === 0;
236 }
237
238 /**
239 * Check if there are other records with the same slug that are located on the same site.
240 *
241 * @param string $slug
242 * @param RecordState $state
243 * @return bool
244 * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
245 */
246 public function isUniqueInSite(string $slug, RecordState $state): bool
247 {
248 $pageId = (int)$state->resolveAggregateNodeIdentifier();
249 $recordId = $state->getSubject()->getIdentifier();
250 $languageId = $state->getContext()->getLanguageId();
251
252 if ($pageId < 0) {
253 $pageId = $this->resolveLivePageId($recordId);
254 }
255
256 $queryBuilder = $this->createPreparedQueryBuilder();
257 $this->applySlugConstraint($queryBuilder, $slug);
258 $this->applyRecordConstraint($queryBuilder, $recordId);
259 $this->applyLanguageConstraint($queryBuilder, $languageId);
260 $this->applyWorkspaceConstraint($queryBuilder);
261 $statement = $queryBuilder->execute();
262
263 $records = $this->resolveVersionOverlays(
264 $statement->fetchAll()
265 );
266 if (count($records) === 0) {
267 return true;
268 }
269
270 // The installation contains at least ONE other record with the same slug
271 // Now find out if it is the same root page ID
272 $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
273 $siteOfCurrentRecord = $siteMatcher->matchByPageId($pageId);
274 foreach ($records as $record) {
275 try {
276 $siteOfExistingRecord = $siteMatcher->matchByPageId((int)$record['uid']);
277 } catch (SiteNotFoundException $exception) {
278 // In case not site is found, the record is not
279 // organized in any site or pseudo-site
280 continue;
281 }
282 if ($siteOfExistingRecord->getRootPageId() === $siteOfCurrentRecord->getRootPageId()) {
283 return false;
284 }
285 }
286
287 // Otherwise, everything is still fine
288 return true;
289 }
290
291 /**
292 * Generate a slug with a suffix "/mytitle-1" if that is in use already.
293 *
294 * @param string $slug proposed slug
295 * @param RecordState $state
296 * @return string
297 * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
298 */
299 public function buildSlugForUniqueInSite(string $slug, RecordState $state): string
300 {
301 $slug = $this->sanitize($slug);
302 $rawValue = $this->extract($slug);
303 $newValue = $slug;
304 $counter = 0;
305 while (!$this->isUniqueInSite(
306 $newValue,
307 $state
308 ) && $counter++ < 100
309 ) {
310 $newValue = $this->sanitize($rawValue . '-' . $counter);
311 }
312 if ($counter === 100) {
313 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
314 }
315 return $newValue;
316 }
317
318 /**
319 * Generate a slug with a suffix "/mytitle-1" if the suggested slug is in use already.
320 *
321 * @param string $slug proposed slug
322 * @param RecordState $state
323 * @return string
324 */
325 public function buildSlugForUniqueInPid(string $slug, RecordState $state): string
326 {
327 $slug = $this->sanitize($slug);
328 $rawValue = $this->extract($slug);
329 $newValue = $slug;
330 $counter = 0;
331 while (!$this->isUniqueInPid(
332 $newValue,
333 $state
334 ) && $counter++ < 100
335 ) {
336 $newValue = $this->sanitize($rawValue . '-' . $counter);
337 }
338 if ($counter === 100) {
339 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
340 }
341 return $newValue;
342 }
343
344 /**
345 * @return QueryBuilder
346 */
347 protected function createPreparedQueryBuilder(): QueryBuilder
348 {
349 $fieldNames = ['uid', 'pid', $this->fieldName];
350 if ($this->workspaceEnabled) {
351 $fieldNames[] = 't3ver_state';
352 }
353
354 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
355 $queryBuilder->getRestrictions()
356 ->removeAll()
357 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
358 $queryBuilder
359 ->select(...$fieldNames)
360 ->from($this->tableName);
361 return $queryBuilder;
362 }
363
364 /**
365 * @param QueryBuilder $queryBuilder
366 */
367 protected function applyWorkspaceConstraint(QueryBuilder $queryBuilder)
368 {
369 if (!$this->workspaceEnabled) {
370 return;
371 }
372
373 $workspaceIds = [0];
374 if ($this->workspaceId > 0) {
375 $workspaceIds[] = $this->workspaceId;
376 }
377 $queryBuilder->andWhere(
378 $queryBuilder->expr()->in(
379 't3ver_wsid',
380 $queryBuilder->createNamedParameter($workspaceIds, Connection::PARAM_INT_ARRAY)
381 ),
382 $queryBuilder->expr()->neq(
383 'pid',
384 $queryBuilder->createNamedParameter(-1, \PDO::PARAM_INT)
385 )
386 );
387 }
388
389 /**
390 * @param QueryBuilder $queryBuilder
391 * @param int $languageId
392 */
393 protected function applyLanguageConstraint(QueryBuilder $queryBuilder, int $languageId)
394 {
395 $languageField = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
396 if (!is_string($languageField)) {
397 return;
398 }
399
400 // Only check records of the given language
401 $queryBuilder->andWhere(
402 $queryBuilder->expr()->eq(
403 $languageField,
404 $queryBuilder->createNamedParameter($languageId, \PDO::PARAM_INT)
405 )
406 );
407 }
408
409 /**
410 * @param QueryBuilder $queryBuilder
411 * @param string $slug
412 */
413 protected function applySlugConstraint(QueryBuilder $queryBuilder, string $slug)
414 {
415 $queryBuilder->where(
416 $queryBuilder->expr()->eq(
417 $this->fieldName,
418 $queryBuilder->createNamedParameter($slug)
419 )
420 );
421 }
422
423 /**
424 * @param QueryBuilder $queryBuilder
425 * @param int $pageId
426 */
427 protected function applyPageIdConstraint(QueryBuilder $queryBuilder, int $pageId)
428 {
429 if ($pageId < 0) {
430 throw new \RuntimeException(
431 sprintf(
432 'Page id must be positive "%d"',
433 $pageId
434 ),
435 1534962573
436 );
437 }
438
439 $queryBuilder->andWhere(
440 $queryBuilder->expr()->eq(
441 'pid',
442 $queryBuilder->createNamedParameter($pageId, \PDO::PARAM_INT)
443 )
444 );
445 }
446
447 /**
448 * @param QueryBuilder $queryBuilder
449 * @param string|int $recordId
450 */
451 protected function applyRecordConstraint(QueryBuilder $queryBuilder, $recordId)
452 {
453 // Exclude the current record if it is an existing record
454 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
455 return;
456 }
457
458 $queryBuilder->andWhere(
459 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($recordId, \PDO::PARAM_INT))
460 );
461 if ($this->workspaceId > 0 && $this->workspaceEnabled) {
462 $liveId = BackendUtility::getLiveVersionIdOfRecord($this->tableName, $recordId) ?? $recordId;
463 $queryBuilder->andWhere(
464 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($liveId, \PDO::PARAM_INT))
465 );
466 }
467 }
468
469 /**
470 * @param int $recordId
471 * @return int
472 * @throws \RuntimeException
473 */
474 protected function resolveLivePageId($recordId): int
475 {
476 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
477 throw new \RuntimeException(
478 sprintf(
479 'Cannot resolve live page id for non-numeric identifier "%s"',
480 $recordId
481 ),
482 1534951024
483 );
484 }
485
486 $liveVersion = BackendUtility::getLiveVersionOfRecord(
487 $this->tableName,
488 $recordId,
489 'pid'
490 );
491
492 if (empty($liveVersion)) {
493 throw new \RuntimeException(
494 sprintf(
495 'Cannot resolve live page id for record "%s:%d"',
496 $this->tableName,
497 $recordId
498 ),
499 1534951025
500 );
501 }
502
503 return (int)$liveVersion['pid'];
504 }
505
506 /**
507 * @param array $records
508 * @return array
509 */
510 protected function resolveVersionOverlays(array $records): array
511 {
512 if (!$this->workspaceEnabled) {
513 return $records;
514 }
515
516 return array_filter(
517 array_map(
518 function (array $record) {
519 BackendUtility::workspaceOL(
520 $this->tableName,
521 $record,
522 $this->workspaceId,
523 true
524 );
525 if (VersionState::cast($record['t3ver_state'] ?? null)
526 ->equals(VersionState::DELETE_PLACEHOLDER)) {
527 return null;
528 }
529 return $record;
530 },
531 $records
532 )
533 );
534 }
535
536 /**
537 * Fetch a parent page, but exclude spacers, recyclers and sys-folders and all doktypes > 200
538 * @param int $pid
539 * @param int $languageId
540 * @return array|null
541 */
542 protected function resolveParentPageRecord(int $pid, int $languageId): ?array
543 {
544 $parentPageRecord = null;
545 $rootLine = BackendUtility::BEgetRootLine($pid, '', true, ['nav_title']);
546 do {
547 $parentPageRecord = array_shift($rootLine);
548 // do not use spacers (199), recyclers and folders and everything else
549 } while (!empty($rootLine) && (int)$parentPageRecord['doktype'] >= 199);
550 if ($languageId > 0) {
551 $localizedParentPageRecord = BackendUtility::getRecordLocalization('pages', $parentPageRecord['uid'], $languageId);
552 if (!empty($localizedParentPageRecord)) {
553 $parentPageRecord = reset($localizedParentPageRecord);
554 }
555 }
556 return $parentPageRecord;
557 }
558 }