6b313a7e3963253e62e29fb84658a5832ce972fb
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / DataHandling / SlugHelper.php
1 <?php
2 declare(strict_types = 1);
3 namespace TYPO3\CMS\Core\DataHandling;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Doctrine\DBAL\Connection;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\Charset\CharsetConverter;
21 use TYPO3\CMS\Core\Database\ConnectionPool;
22 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
23 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24 use TYPO3\CMS\Core\Routing\SiteMatcher;
25 use TYPO3\CMS\Core\Utility\GeneralUtility;
26 use TYPO3\CMS\Core\Utility\MathUtility;
27 use TYPO3\CMS\Core\Versioning\VersionState;
28
29 /**
30 * Generates, sanitizes and validates slugs for a TCA field
31 */
32 class SlugHelper
33 {
34 /**
35 * @var string
36 */
37 protected $tableName;
38
39 /**
40 * @var string
41 */
42 protected $fieldName;
43
44 /**
45 * @var int
46 */
47 protected $workspaceId;
48
49 /**
50 * @var array
51 */
52 protected $configuration = [];
53
54 /**
55 * @var bool
56 */
57 protected $workspaceEnabled;
58
59 /**
60 * Defines whether the slug field should start with "/".
61 * For pages (due to rootline functionality), this is a must have, otherwise the root level page
62 * would have an empty value.
63 *
64 * @var bool
65 */
66 protected $prependSlashInSlug;
67
68 /**
69 * Slug constructor.
70 *
71 * @param string $tableName TCA table
72 * @param string $fieldName TCA field
73 * @param array $configuration TCA configuration of the field
74 * @param int $workspaceId the workspace ID to be working on.
75 */
76 public function __construct(string $tableName, string $fieldName, array $configuration, int $workspaceId = 0)
77 {
78 $this->tableName = $tableName;
79 $this->fieldName = $fieldName;
80 $this->configuration = $configuration;
81 $this->workspaceId = $workspaceId;
82
83 if ($this->tableName === 'pages' && $this->fieldName === 'slug') {
84 $this->prependSlashInSlug = true;
85 } else {
86 $this->prependSlashInSlug = $this->configuration['prependSlash'] ?? false;
87 }
88
89 $this->workspaceEnabled = BackendUtility::isTableWorkspaceEnabled($tableName);
90 }
91
92 /**
93 * Cleans a slug value so it is used directly in the path segment of a URL.
94 *
95 * @param string $slug
96 * @return string
97 */
98 public function sanitize(string $slug): string
99 {
100 // Convert to lowercase + remove tags
101 $slug = mb_strtolower($slug, 'utf-8');
102 $slug = strip_tags($slug);
103
104 // Convert some special tokens (space, "_" and "-") to the space character
105 $fallbackCharacter = (string)($this->configuration['fallbackCharacter'] ?? '-');
106 $slug = preg_replace('/[ \t\x{00A0}\-+_]+/u', $fallbackCharacter, $slug);
107
108 // Convert extended letters to ascii equivalents
109 // The specCharsToASCII() converts "€" to "EUR"
110 $slug = GeneralUtility::makeInstance(CharsetConverter::class)->specCharsToASCII('utf-8', $slug);
111
112 // Get rid of all invalid characters, but allow slashes
113 $slug = preg_replace('/[^\p{L}0-9\/' . preg_quote($fallbackCharacter) . ']/u', '', $slug);
114
115 // Convert multiple fallback characters to a single one
116 if ($fallbackCharacter !== '') {
117 $slug = preg_replace('/' . preg_quote($fallbackCharacter) . '{2,}/', $fallbackCharacter, $slug);
118 }
119
120 // Ensure slug is lower cased after all replacement was done
121 $slug = mb_strtolower($slug, 'utf-8');
122 // keep slashes: re-convert them after rawurlencode did everything
123 $slug = rawurlencode($slug);
124 // @todo: add a test and see if we need this
125 $slug = str_replace('%2F', '/', $slug);
126 // Extract slug, thus it does not have wrapping fallback and slash characters
127 $extractedSlug = $this->extract($slug);
128 // Remove trailing and beginning slashes, except if the trailing slash was added, then we'll re-add it
129 $appendTrailingSlash = $extractedSlug !== '' && substr($slug, -1) === '/';
130 $slug = $extractedSlug . ($appendTrailingSlash ? '/' : '');
131 if ($this->prependSlashInSlug) {
132 $slug = '/' . $slug;
133 }
134 return $slug;
135 }
136
137 /**
138 * Extracts payload of slug and removes wrapping delimiters,
139 * e.g. `/hello/world/` will become `hello/world`.
140 *
141 * @param string $slug
142 * @return string
143 */
144 public function extract(string $slug): string
145 {
146 // Convert some special tokens (space, "_" and "-") to the space character
147 $fallbackCharacter = $this->configuration['fallbackCharacter'] ?? '-';
148 return trim($slug, $fallbackCharacter . '/');
149 }
150
151 /**
152 * Used when no slug exists for a record
153 *
154 * @param array $recordData
155 * @param int $pid
156 * @return string
157 */
158 public function generate(array $recordData, int $pid): string
159 {
160 if ($pid === 0 || (!empty($recordData['is_siteroot']) && $this->tableName === 'pages')) {
161 return '/';
162 }
163 $prefix = '';
164 if ($this->configuration['generatorOptions']['prefixParentPageSlug'] ?? false) {
165 $languageId = (int)$recordData[$GLOBALS['TCA'][$this->tableName]['ctrl']['languageField']];
166 $rootLine = BackendUtility::BEgetRootLine($pid, '', true, ['nav_title']);
167 $parentPageRecord = reset($rootLine);
168 if ($languageId > 0) {
169 $localizedParentPageRecord = BackendUtility::getRecordLocalization('pages', $parentPageRecord['uid'], $languageId);
170 if (!empty($localizedParentPageRecord)) {
171 $parentPageRecord = reset($localizedParentPageRecord);
172 }
173 }
174 if (is_array($parentPageRecord)) {
175 // If the parent page has a slug, use that instead of "re-generating" the slug from the parents' page title
176 if (!empty($parentPageRecord['slug'])) {
177 $rootLineItemSlug = $parentPageRecord['slug'];
178 } else {
179 $rootLineItemSlug = $this->generate($parentPageRecord, (int)$parentPageRecord['pid']);
180 }
181 $rootLineItemSlug = trim($rootLineItemSlug, '/');
182 if (!empty($rootLineItemSlug)) {
183 $prefix = $rootLineItemSlug;
184 }
185 }
186 }
187
188 $fieldSeparator = $this->configuration['generatorOptions']['fieldSeparator'] ?? '/';
189 $slugParts = [];
190 foreach ($this->configuration['generatorOptions']['fields'] ?? [] as $fieldName) {
191 if (!empty($recordData[$fieldName])) {
192 $slugParts[] = $recordData[$fieldName];
193 }
194 }
195 $slug = implode($fieldSeparator, $slugParts);
196 $slug = $this->sanitize($slug);
197 // No valid data found
198 if ($slug === '' || $slug === '/') {
199 $slug = 'default-' . GeneralUtility::shortMD5(json_encode($recordData));
200 }
201 if ($this->prependSlashInSlug) {
202 $slug = '/' . $slug;
203 }
204 if (!empty($prefix)) {
205 $slug = $prefix . $slug;
206 }
207
208 return $this->sanitize($slug);
209 }
210
211 /**
212 * Checks if there are other records with the same slug that are located on the same PID.
213 *
214 * @param string $slug
215 * @param string|int $recordId
216 * @param int $pageId
217 * @param int $languageId
218 * @return bool
219 */
220 public function isUniqueInPid(string $slug, $recordId, int $pageId, int $languageId): bool
221 {
222 if ($pageId < 0) {
223 $pageId = $this->resolveLivePageId($recordId);
224 }
225
226 $queryBuilder = $this->createPreparedQueryBuilder();
227 $this->applySlugConstraint($queryBuilder, $slug);
228 $this->applyPageIdConstraint($queryBuilder, $pageId);
229 $this->applyRecordConstraint($queryBuilder, $recordId);
230 $this->applyLanguageConstraint($queryBuilder, $languageId);
231 $this->applyWorkspaceConstraint($queryBuilder);
232 $statement = $queryBuilder->execute();
233
234 $records = $this->resolveVersionOverlays(
235 $statement->fetchAll()
236 );
237 return count($records) === 0;
238 }
239
240 /**
241 * Check if there are other records with the same slug that are located on the same site.
242 *
243 * @param string $slug
244 * @param string|int $recordId
245 * @param int $pageId
246 * @param int $languageId
247 * @return bool
248 */
249 public function isUniqueInSite(string $slug, $recordId, int $pageId, int $languageId): bool
250 {
251 if ($pageId < 0) {
252 $pageId = $this->resolveLivePageId($recordId);
253 }
254
255 $queryBuilder = $this->createPreparedQueryBuilder();
256 $this->applySlugConstraint($queryBuilder, $slug);
257 $this->applyRecordConstraint($queryBuilder, $recordId);
258 $this->applyLanguageConstraint($queryBuilder, $languageId);
259 $this->applyWorkspaceConstraint($queryBuilder);
260 $statement = $queryBuilder->execute();
261
262 $records = $this->resolveVersionOverlays(
263 $statement->fetchAll()
264 );
265 if (count($records) === 0) {
266 return true;
267 }
268
269 // The installation contains at least ONE other record with the same slug
270 // Now find out if it is the same root page ID
271 $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
272 $siteOfCurrentRecord = $siteMatcher->matchByPageId($pageId);
273 foreach ($records as $record) {
274 $siteOfExistingRecord = $siteMatcher->matchByPageId((int)$record['uid']);
275 if ($siteOfExistingRecord->getRootPageId() === $siteOfCurrentRecord->getRootPageId()) {
276 return false;
277 }
278 }
279
280 // Otherwise, everything is still fine
281 return true;
282 }
283
284 /**
285 * Generate a slug with a suffix "/mytitle-1" if that is in use already.
286 *
287 * @param string $slug proposed slug
288 * @param mixed $recordId can be a new record (non-int) or an existing record ID
289 * @param int $realPid pageID (already workspace-resolved)
290 * @param int $languageId the language ID realm to be searched for
291 * @return string
292 */
293 public function buildSlugForUniqueInSite(string $slug, $recordId, int $realPid, int $languageId): string
294 {
295 $slug = $this->sanitize($slug);
296 $rawValue = $this->extract($slug);
297 $newValue = $slug;
298 $counter = 0;
299 while (!$this->isUniqueInSite(
300 $newValue,
301 $recordId,
302 $realPid,
303 $languageId
304 ) && $counter++ < 100
305 ) {
306 $newValue = $this->sanitize($rawValue . '-' . $counter);
307 }
308 if ($counter === 100) {
309 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
310 }
311 return $newValue;
312 }
313
314 /**
315 * Generate a slug with a suffix "/mytitle-1" if the suggested slug is in use already.
316 *
317 * @param string $slug proposed slug
318 * @param mixed $recordId can be a new record (non-int) or an existing record ID
319 * @param int $realPid pageID (already workspace-resolved)
320 * @param int $languageId the language ID realm to be searched for
321 * @return string
322 */
323 public function buildSlugForUniqueInPid(string $slug, $recordId, int $realPid, int $languageId): string
324 {
325 $slug = $this->sanitize($slug);
326 $rawValue = $this->extract($slug);
327 $newValue = $slug;
328 $counter = 0;
329 while (!$this->isUniqueInPid(
330 $newValue,
331 $recordId,
332 $realPid,
333 $languageId
334 ) && $counter++ < 100
335 ) {
336 $newValue = $this->sanitize($rawValue . '-' . $counter);
337 }
338 if ($counter === 100) {
339 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
340 }
341 return $newValue;
342 }
343
344 /**
345 * @return QueryBuilder
346 */
347 protected function createPreparedQueryBuilder(): QueryBuilder
348 {
349 $fieldNames = ['uid', 'pid', $this->fieldName];
350 if ($this->workspaceEnabled) {
351 $fieldNames[] = 't3ver_state';
352 }
353
354 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
355 $queryBuilder->getRestrictions()
356 ->removeAll()
357 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
358 $queryBuilder
359 ->select(...$fieldNames)
360 ->from($this->tableName);
361 return $queryBuilder;
362 }
363
364 /**
365 * @param QueryBuilder $queryBuilder
366 */
367 protected function applyWorkspaceConstraint(QueryBuilder $queryBuilder)
368 {
369 if (!$this->workspaceEnabled) {
370 return;
371 }
372
373 $workspaceIds = [0];
374 if ($this->workspaceId > 0) {
375 $workspaceIds[] = $this->workspaceId;
376 }
377 $queryBuilder->andWhere(
378 $queryBuilder->expr()->in(
379 't3ver_wsid',
380 $queryBuilder->createNamedParameter($workspaceIds, Connection::PARAM_INT_ARRAY)
381 ),
382 $queryBuilder->expr()->neq(
383 'pid',
384 $queryBuilder->createNamedParameter(-1, \PDO::PARAM_INT)
385 )
386 );
387 }
388
389 /**
390 * @param QueryBuilder $queryBuilder
391 * @param int $languageId
392 */
393 protected function applyLanguageConstraint(QueryBuilder $queryBuilder, int $languageId)
394 {
395 $languageField = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
396 if (!is_string($languageField)) {
397 return;
398 }
399
400 // Only check records of the given language
401 $queryBuilder->andWhere(
402 $queryBuilder->expr()->eq(
403 $languageField,
404 $queryBuilder->createNamedParameter($languageId, \PDO::PARAM_INT)
405 )
406 );
407 }
408
409 /**
410 * @param QueryBuilder $queryBuilder
411 * @param string $slug
412 */
413 protected function applySlugConstraint(QueryBuilder $queryBuilder, string $slug)
414 {
415 $queryBuilder->where(
416 $queryBuilder->expr()->eq(
417 $this->fieldName,
418 $queryBuilder->createNamedParameter($slug)
419 )
420 );
421 }
422
423 /**
424 * @param QueryBuilder $queryBuilder
425 * @param int $pageId
426 */
427 protected function applyPageIdConstraint(QueryBuilder $queryBuilder, int $pageId)
428 {
429 if ($pageId < 0) {
430 throw new \RuntimeException(
431 sprintf(
432 'Page id must be positive "%d"',
433 $pageId
434 ),
435 1534962573
436 );
437 }
438
439 $queryBuilder->andWhere(
440 $queryBuilder->expr()->eq(
441 'pid',
442 $queryBuilder->createNamedParameter($pageId, \PDO::PARAM_INT)
443 )
444 );
445 }
446
447 /**
448 * @param QueryBuilder $queryBuilder
449 * @param string|int $recordId
450 */
451 protected function applyRecordConstraint(QueryBuilder $queryBuilder, $recordId)
452 {
453 // Exclude the current record if it is an existing record
454 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
455 return;
456 }
457
458 $queryBuilder->andWhere(
459 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($recordId, \PDO::PARAM_INT))
460 );
461 if ($this->workspaceId > 0 && $this->workspaceEnabled) {
462 $liveId = BackendUtility::getLiveVersionIdOfRecord($this->tableName, $recordId) ?? $recordId;
463 $queryBuilder->andWhere(
464 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($liveId, \PDO::PARAM_INT))
465 );
466 }
467 }
468
469 /**
470 * @param int $recordId
471 * @return int
472 * @throws \RuntimeException
473 */
474 protected function resolveLivePageId($recordId): int
475 {
476 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
477 throw new \RuntimeException(
478 sprintf(
479 'Cannot resolve live page id for non-numeric identifier "%s"',
480 $recordId
481 ),
482 1534951024
483 );
484 }
485
486 $liveVersion = BackendUtility::getLiveVersionOfRecord(
487 $this->tableName,
488 $recordId,
489 'pid'
490 );
491
492 if (empty($liveVersion)) {
493 throw new \RuntimeException(
494 sprintf(
495 'Cannot resolve live page id for record "%s:%d"',
496 $this->tableName,
497 $recordId
498 ),
499 1534951025
500 );
501 }
502
503 return (int)$liveVersion['pid'];
504 }
505
506 /**
507 * @param array $records
508 * @return array
509 */
510 protected function resolveVersionOverlays(array $records): array
511 {
512 if (!$this->workspaceEnabled) {
513 return $records;
514 }
515
516 return array_filter(
517 array_map(
518 function (array $record) {
519 BackendUtility::workspaceOL(
520 $this->tableName,
521 $record,
522 $this->workspaceId,
523 true
524 );
525 if (VersionState::cast($record['t3ver_state'] ?? null)
526 ->equals(VersionState::DELETE_PLACEHOLDER)) {
527 return null;
528 }
529 return $record;
530 },
531 $records
532 )
533 );
534 }
535 }