[BUGFIX] Avoid double slashes in slug generation
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / DataHandling / SlugHelper.php
1 <?php
2 declare(strict_types = 1);
3 namespace TYPO3\CMS\Core\DataHandling;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Doctrine\DBAL\Connection;
19 use TYPO3\CMS\Backend\Utility\BackendUtility;
20 use TYPO3\CMS\Core\Charset\CharsetConverter;
21 use TYPO3\CMS\Core\Database\ConnectionPool;
22 use TYPO3\CMS\Core\Database\Query\QueryBuilder;
23 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24 use TYPO3\CMS\Core\Routing\SiteMatcher;
25 use TYPO3\CMS\Core\Utility\GeneralUtility;
26 use TYPO3\CMS\Core\Utility\MathUtility;
27 use TYPO3\CMS\Core\Versioning\VersionState;
28
29 /**
30 * Generates, sanitizes and validates slugs for a TCA field
31 */
32 class SlugHelper
33 {
34 /**
35 * @var string
36 */
37 protected $tableName;
38
39 /**
40 * @var string
41 */
42 protected $fieldName;
43
44 /**
45 * @var int
46 */
47 protected $workspaceId;
48
49 /**
50 * @var array
51 */
52 protected $configuration = [];
53
54 /**
55 * @var bool
56 */
57 protected $workspaceEnabled;
58
59 /**
60 * Defines whether the slug field should start with "/".
61 * For pages (due to rootline functionality), this is a must have, otherwise the root level page
62 * would have an empty value.
63 *
64 * @var bool
65 */
66 protected $prependSlashInSlug;
67
68 /**
69 * Slug constructor.
70 *
71 * @param string $tableName TCA table
72 * @param string $fieldName TCA field
73 * @param array $configuration TCA configuration of the field
74 * @param int $workspaceId the workspace ID to be working on.
75 */
76 public function __construct(string $tableName, string $fieldName, array $configuration, int $workspaceId = 0)
77 {
78 $this->tableName = $tableName;
79 $this->fieldName = $fieldName;
80 $this->configuration = $configuration;
81 $this->workspaceId = $workspaceId;
82
83 if ($this->tableName === 'pages' && $this->fieldName === 'slug') {
84 $this->prependSlashInSlug = true;
85 } else {
86 $this->prependSlashInSlug = $this->configuration['prependSlash'] ?? false;
87 }
88
89 $this->workspaceEnabled = BackendUtility::isTableWorkspaceEnabled($tableName);
90 }
91
92 /**
93 * Cleans a slug value so it is used directly in the path segment of a URL.
94 *
95 * @param string $slug
96 * @return string
97 */
98 public function sanitize(string $slug): string
99 {
100 // Convert to lowercase + remove tags
101 $slug = mb_strtolower($slug, 'utf-8');
102 $slug = strip_tags($slug);
103
104 // Convert some special tokens (space, "_" and "-") to the space character
105 $fallbackCharacter = (string)($this->configuration['fallbackCharacter'] ?? '-');
106 $slug = preg_replace('/[ \t\x{00A0}\-+_]+/u', $fallbackCharacter, $slug);
107
108 // Convert extended letters to ascii equivalents
109 // The specCharsToASCII() converts "€" to "EUR"
110 $slug = GeneralUtility::makeInstance(CharsetConverter::class)->specCharsToASCII('utf-8', $slug);
111
112 // Get rid of all invalid characters, but allow slashes
113 $slug = preg_replace('/[^\p{L}0-9\/' . preg_quote($fallbackCharacter) . ']/u', '', $slug);
114
115 // Convert multiple fallback characters to a single one
116 if ($fallbackCharacter !== '') {
117 $slug = preg_replace('/' . preg_quote($fallbackCharacter) . '{2,}/', $fallbackCharacter, $slug);
118 }
119
120 // Ensure slug is lower cased after all replacement was done
121 $slug = mb_strtolower($slug, 'utf-8');
122 // keep slashes: re-convert them after rawurlencode did everything
123 $slug = rawurlencode($slug);
124 // @todo: add a test and see if we need this
125 $slug = str_replace('%2F', '/', $slug);
126 // Extract slug, thus it does not have wrapping fallback and slash characters
127 $extractedSlug = $this->extract($slug);
128 // Remove trailing and beginning slashes, except if the trailing slash was added, then we'll re-add it
129 $appendTrailingSlash = $extractedSlug !== '' && substr($slug, -1) === '/';
130 $slug = $extractedSlug . ($appendTrailingSlash ? '/' : '');
131 if ($this->prependSlashInSlug && ($slug{0} ?? '') !== '/') {
132 $slug = '/' . $slug;
133 }
134 return $slug;
135 }
136
137 /**
138 * Extracts payload of slug and removes wrapping delimiters,
139 * e.g. `/hello/world/` will become `hello/world`.
140 *
141 * @param string $slug
142 * @return string
143 */
144 public function extract(string $slug): string
145 {
146 // Convert some special tokens (space, "_" and "-") to the space character
147 $fallbackCharacter = $this->configuration['fallbackCharacter'] ?? '-';
148 return trim($slug, $fallbackCharacter . '/');
149 }
150
151 /**
152 * Used when no slug exists for a record
153 *
154 * @param array $recordData
155 * @param int $pid
156 * @return string
157 */
158 public function generate(array $recordData, int $pid): string
159 {
160 if ($pid === 0 || (!empty($recordData['is_siteroot']) && $this->tableName === 'pages')) {
161 return '/';
162 }
163 $prefix = '';
164 if ($this->configuration['generatorOptions']['prefixParentPageSlug'] ?? false) {
165 $languageFieldName = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
166 $languageId = (int)($recordData[$languageFieldName] ?? 0);
167 $rootLine = $this->resolveRootLine($pid);
168 $parentPageRecord = reset($rootLine);
169 if ($languageId > 0) {
170 $localizedParentPageRecord = BackendUtility::getRecordLocalization('pages', $parentPageRecord['uid'], $languageId);
171 if (!empty($localizedParentPageRecord)) {
172 $parentPageRecord = reset($localizedParentPageRecord);
173 }
174 }
175 if (is_array($parentPageRecord)) {
176 // If the parent page has a slug, use that instead of "re-generating" the slug from the parents' page title
177 if (!empty($parentPageRecord['slug'])) {
178 $rootLineItemSlug = $parentPageRecord['slug'];
179 } else {
180 $rootLineItemSlug = $this->generate($parentPageRecord, (int)$parentPageRecord['pid']);
181 }
182 $rootLineItemSlug = trim($rootLineItemSlug, '/');
183 if (!empty($rootLineItemSlug)) {
184 $prefix = $rootLineItemSlug;
185 }
186 }
187 }
188
189 $fieldSeparator = $this->configuration['generatorOptions']['fieldSeparator'] ?? '/';
190 $slugParts = [];
191 foreach ($this->configuration['generatorOptions']['fields'] ?? [] as $fieldName) {
192 if (!empty($recordData[$fieldName])) {
193 $slugParts[] = $recordData[$fieldName];
194 }
195 }
196 $slug = implode($fieldSeparator, $slugParts);
197 $slug = $this->sanitize($slug);
198 // No valid data found
199 if ($slug === '' || $slug === '/') {
200 $slug = 'default-' . GeneralUtility::shortMD5(json_encode($recordData));
201 }
202 if ($this->prependSlashInSlug && ($slug{0} ?? '') !== '/') {
203 $slug = '/' . $slug;
204 }
205 if (!empty($prefix)) {
206 $slug = $prefix . $slug;
207 }
208
209 return $this->sanitize($slug);
210 }
211
212 /**
213 * Checks if there are other records with the same slug that are located on the same PID.
214 *
215 * @param string $slug
216 * @param string|int $recordId
217 * @param int $pageId
218 * @param int $languageId
219 * @return bool
220 */
221 public function isUniqueInPid(string $slug, $recordId, int $pageId, int $languageId): bool
222 {
223 if ($pageId < 0) {
224 $pageId = $this->resolveLivePageId($recordId);
225 }
226
227 $queryBuilder = $this->createPreparedQueryBuilder();
228 $this->applySlugConstraint($queryBuilder, $slug);
229 $this->applyPageIdConstraint($queryBuilder, $pageId);
230 $this->applyRecordConstraint($queryBuilder, $recordId);
231 $this->applyLanguageConstraint($queryBuilder, $languageId);
232 $this->applyWorkspaceConstraint($queryBuilder);
233 $statement = $queryBuilder->execute();
234
235 $records = $this->resolveVersionOverlays(
236 $statement->fetchAll()
237 );
238 return count($records) === 0;
239 }
240
241 /**
242 * Check if there are other records with the same slug that are located on the same site.
243 *
244 * @param string $slug
245 * @param string|int $recordId
246 * @param int $pageId
247 * @param int $languageId
248 * @return bool
249 */
250 public function isUniqueInSite(string $slug, $recordId, int $pageId, int $languageId): bool
251 {
252 if ($pageId < 0) {
253 $pageId = $this->resolveLivePageId($recordId);
254 }
255
256 $queryBuilder = $this->createPreparedQueryBuilder();
257 $this->applySlugConstraint($queryBuilder, $slug);
258 $this->applyRecordConstraint($queryBuilder, $recordId);
259 $this->applyLanguageConstraint($queryBuilder, $languageId);
260 $this->applyWorkspaceConstraint($queryBuilder);
261 $statement = $queryBuilder->execute();
262
263 $records = $this->resolveVersionOverlays(
264 $statement->fetchAll()
265 );
266 if (count($records) === 0) {
267 return true;
268 }
269
270 // The installation contains at least ONE other record with the same slug
271 // Now find out if it is the same root page ID
272 $siteMatcher = GeneralUtility::makeInstance(SiteMatcher::class);
273 $siteOfCurrentRecord = $siteMatcher->matchByPageId($pageId);
274 foreach ($records as $record) {
275 $siteOfExistingRecord = $siteMatcher->matchByPageId((int)$record['uid']);
276 if ($siteOfExistingRecord->getRootPageId() === $siteOfCurrentRecord->getRootPageId()) {
277 return false;
278 }
279 }
280
281 // Otherwise, everything is still fine
282 return true;
283 }
284
285 /**
286 * Generate a slug with a suffix "/mytitle-1" if that is in use already.
287 *
288 * @param string $slug proposed slug
289 * @param mixed $recordId can be a new record (non-int) or an existing record ID
290 * @param int $realPid pageID (already workspace-resolved)
291 * @param int $languageId the language ID realm to be searched for
292 * @return string
293 */
294 public function buildSlugForUniqueInSite(string $slug, $recordId, int $realPid, int $languageId): string
295 {
296 $slug = $this->sanitize($slug);
297 $rawValue = $this->extract($slug);
298 $newValue = $slug;
299 $counter = 0;
300 while (!$this->isUniqueInSite(
301 $newValue,
302 $recordId,
303 $realPid,
304 $languageId
305 ) && $counter++ < 100
306 ) {
307 $newValue = $this->sanitize($rawValue . '-' . $counter);
308 }
309 if ($counter === 100) {
310 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
311 }
312 return $newValue;
313 }
314
315 /**
316 * Generate a slug with a suffix "/mytitle-1" if the suggested slug is in use already.
317 *
318 * @param string $slug proposed slug
319 * @param mixed $recordId can be a new record (non-int) or an existing record ID
320 * @param int $realPid pageID (already workspace-resolved)
321 * @param int $languageId the language ID realm to be searched for
322 * @return string
323 */
324 public function buildSlugForUniqueInPid(string $slug, $recordId, int $realPid, int $languageId): string
325 {
326 $slug = $this->sanitize($slug);
327 $rawValue = $this->extract($slug);
328 $newValue = $slug;
329 $counter = 0;
330 while (!$this->isUniqueInPid(
331 $newValue,
332 $recordId,
333 $realPid,
334 $languageId
335 ) && $counter++ < 100
336 ) {
337 $newValue = $this->sanitize($rawValue . '-' . $counter);
338 }
339 if ($counter === 100) {
340 $newValue = $this->sanitize($rawValue . '-' . GeneralUtility::shortMD5($rawValue));
341 }
342 return $newValue;
343 }
344
345 /**
346 * @return QueryBuilder
347 */
348 protected function createPreparedQueryBuilder(): QueryBuilder
349 {
350 $fieldNames = ['uid', 'pid', $this->fieldName];
351 if ($this->workspaceEnabled) {
352 $fieldNames[] = 't3ver_state';
353 }
354
355 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
356 $queryBuilder->getRestrictions()
357 ->removeAll()
358 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
359 $queryBuilder
360 ->select(...$fieldNames)
361 ->from($this->tableName);
362 return $queryBuilder;
363 }
364
365 /**
366 * @param QueryBuilder $queryBuilder
367 */
368 protected function applyWorkspaceConstraint(QueryBuilder $queryBuilder)
369 {
370 if (!$this->workspaceEnabled) {
371 return;
372 }
373
374 $workspaceIds = [0];
375 if ($this->workspaceId > 0) {
376 $workspaceIds[] = $this->workspaceId;
377 }
378 $queryBuilder->andWhere(
379 $queryBuilder->expr()->in(
380 't3ver_wsid',
381 $queryBuilder->createNamedParameter($workspaceIds, Connection::PARAM_INT_ARRAY)
382 ),
383 $queryBuilder->expr()->neq(
384 'pid',
385 $queryBuilder->createNamedParameter(-1, \PDO::PARAM_INT)
386 )
387 );
388 }
389
390 /**
391 * @param QueryBuilder $queryBuilder
392 * @param int $languageId
393 */
394 protected function applyLanguageConstraint(QueryBuilder $queryBuilder, int $languageId)
395 {
396 $languageField = $GLOBALS['TCA'][$this->tableName]['ctrl']['languageField'] ?? null;
397 if (!is_string($languageField)) {
398 return;
399 }
400
401 // Only check records of the given language
402 $queryBuilder->andWhere(
403 $queryBuilder->expr()->eq(
404 $languageField,
405 $queryBuilder->createNamedParameter($languageId, \PDO::PARAM_INT)
406 )
407 );
408 }
409
410 /**
411 * @param QueryBuilder $queryBuilder
412 * @param string $slug
413 */
414 protected function applySlugConstraint(QueryBuilder $queryBuilder, string $slug)
415 {
416 $queryBuilder->where(
417 $queryBuilder->expr()->eq(
418 $this->fieldName,
419 $queryBuilder->createNamedParameter($slug)
420 )
421 );
422 }
423
424 /**
425 * @param QueryBuilder $queryBuilder
426 * @param int $pageId
427 */
428 protected function applyPageIdConstraint(QueryBuilder $queryBuilder, int $pageId)
429 {
430 if ($pageId < 0) {
431 throw new \RuntimeException(
432 sprintf(
433 'Page id must be positive "%d"',
434 $pageId
435 ),
436 1534962573
437 );
438 }
439
440 $queryBuilder->andWhere(
441 $queryBuilder->expr()->eq(
442 'pid',
443 $queryBuilder->createNamedParameter($pageId, \PDO::PARAM_INT)
444 )
445 );
446 }
447
448 /**
449 * @param QueryBuilder $queryBuilder
450 * @param string|int $recordId
451 */
452 protected function applyRecordConstraint(QueryBuilder $queryBuilder, $recordId)
453 {
454 // Exclude the current record if it is an existing record
455 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
456 return;
457 }
458
459 $queryBuilder->andWhere(
460 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($recordId, \PDO::PARAM_INT))
461 );
462 if ($this->workspaceId > 0 && $this->workspaceEnabled) {
463 $liveId = BackendUtility::getLiveVersionIdOfRecord($this->tableName, $recordId) ?? $recordId;
464 $queryBuilder->andWhere(
465 $queryBuilder->expr()->neq('uid', $queryBuilder->createNamedParameter($liveId, \PDO::PARAM_INT))
466 );
467 }
468 }
469
470 /**
471 * @param int $recordId
472 * @return int
473 * @throws \RuntimeException
474 */
475 protected function resolveLivePageId($recordId): int
476 {
477 if (!MathUtility::canBeInterpretedAsInteger($recordId)) {
478 throw new \RuntimeException(
479 sprintf(
480 'Cannot resolve live page id for non-numeric identifier "%s"',
481 $recordId
482 ),
483 1534951024
484 );
485 }
486
487 $liveVersion = BackendUtility::getLiveVersionOfRecord(
488 $this->tableName,
489 $recordId,
490 'pid'
491 );
492
493 if (empty($liveVersion)) {
494 throw new \RuntimeException(
495 sprintf(
496 'Cannot resolve live page id for record "%s:%d"',
497 $this->tableName,
498 $recordId
499 ),
500 1534951025
501 );
502 }
503
504 return (int)$liveVersion['pid'];
505 }
506
507 /**
508 * @param array $records
509 * @return array
510 */
511 protected function resolveVersionOverlays(array $records): array
512 {
513 if (!$this->workspaceEnabled) {
514 return $records;
515 }
516
517 return array_filter(
518 array_map(
519 function (array $record) {
520 BackendUtility::workspaceOL(
521 $this->tableName,
522 $record,
523 $this->workspaceId,
524 true
525 );
526 if (VersionState::cast($record['t3ver_state'] ?? null)
527 ->equals(VersionState::DELETE_PLACEHOLDER)) {
528 return null;
529 }
530 return $record;
531 },
532 $records
533 )
534 );
535 }
536
537 /**
538 * @param int $pid
539 * @return array
540 */
541 protected function resolveRootLine(int $pid): array
542 {
543 return BackendUtility::BEgetRootLine($pid, '', true, ['nav_title']);
544 }
545 }