[CLEANUP] Introduce early returns in class LinkAnalyzer
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / LinkAnalyzer.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Database\Connection;
19 use TYPO3\CMS\Core\Database\ConnectionPool;
20 use TYPO3\CMS\Core\Database\Query\QueryHelper;
21 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
22 use TYPO3\CMS\Core\Html\HtmlParser;
23 use TYPO3\CMS\Core\Localization\LanguageService;
24 use TYPO3\CMS\Core\Utility\GeneralUtility;
25
26 /**
27 * This class provides Processing plugin implementation
28 */
29 class LinkAnalyzer
30 {
31
32 /**
33 * Array of tables and fields to search for broken links
34 *
35 * @var array
36 */
37 protected $searchFields = [];
38
39 /**
40 * List of page uids (rootline downwards)
41 *
42 * @var array
43 */
44 protected $pids = [];
45
46 /**
47 * Array of tables and the number of external links they contain
48 *
49 * @var array
50 */
51 protected $linkCounts = [];
52
53 /**
54 * Array of tables and the number of broken external links they contain
55 *
56 * @var array
57 */
58 protected $brokenLinkCounts = [];
59
60 /**
61 * Array of tables and records containing broken links
62 *
63 * @var array
64 */
65 protected $recordsWithBrokenLinks = [];
66
67 /**
68 * Array for hooks for own checks
69 *
70 * @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype[]
71 */
72 protected $hookObjectsArr = [];
73
74 /**
75 * Array with information about the current page
76 *
77 * @var array
78 */
79 protected $extPageInTreeInfo = [];
80
81 /**
82 * Reference to the current element with table:uid, e.g. pages:85
83 *
84 * @var string
85 */
86 protected $recordReference = '';
87
88 /**
89 * Linked page together with a possible anchor, e.g. 85#c105
90 *
91 * @var string
92 */
93 protected $pageWithAnchor = '';
94
95 /**
96 * The currently active TSConfig. Will be passed to the init function.
97 *
98 * @var array
99 */
100 protected $tsConfig = [];
101
102 /**
103 * Fill hookObjectsArr with different link types and possible XClasses.
104 */
105 public function __construct()
106 {
107 $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
108 // Hook to handle own checks
109 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
110 $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
111 }
112 }
113
114 /**
115 * Store all the needed configuration values in class variables
116 *
117 * @param array $searchField List of fields in which to search for links
118 * @param string $pidList List of comma separated page uids in which to search for links
119 * @param array $tsConfig The currently active TSConfig.
120 */
121 public function init(array $searchField, $pidList, $tsConfig)
122 {
123 $this->searchFields = $searchField;
124 $this->pids = GeneralUtility::intExplode(',', $pidList, true);
125 $this->tsConfig = $tsConfig;
126 }
127
128 /**
129 * Find all supported broken links and store them in tx_linkvalidator_link
130 *
131 * @param array $checkOptions List of hook object to activate
132 * @param bool $considerHidden Defines whether to look into hidden fields
133 */
134 public function getLinkStatistics($checkOptions = [], $considerHidden = false)
135 {
136 $results = [];
137 if (empty($checkOptions) || empty($this->pids)) {
138 return;
139 }
140
141 $checkKeys = array_keys($checkOptions);
142
143 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
144 ->getQueryBuilderForTable('tx_linkvalidator_link');
145
146 $queryBuilder->delete('tx_linkvalidator_link')
147 ->where(
148 $queryBuilder->expr()->orX(
149 $queryBuilder->expr()->in(
150 'record_pid',
151 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
152 ),
153 $queryBuilder->expr()->andX(
154 $queryBuilder->expr()->in(
155 'record_uid',
156 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
157 ),
158 $queryBuilder->expr()->eq(
159 'table_name',
160 $queryBuilder->createNamedParameter('pages', \PDO::PARAM_STR)
161 )
162 )
163 ),
164 $queryBuilder->expr()->in(
165 'link_type',
166 $queryBuilder->createNamedParameter($checkKeys, Connection::PARAM_STR_ARRAY)
167 )
168 )
169 ->execute();
170
171 // Traverse all configured tables
172 foreach ($this->searchFields as $table => $fields) {
173 // If table is not configured, assume the extension is not installed
174 // and therefore no need to check it
175 if (!is_array($GLOBALS['TCA'][$table])) {
176 continue;
177 }
178 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
179 ->getQueryBuilderForTable($table);
180
181 if ($considerHidden) {
182 $queryBuilder->getRestrictions()
183 ->removeAll()
184 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
185 }
186
187 // Re-init selectFields for table
188 $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
189
190 $result = $queryBuilder->select(...$selectFields)
191 ->from($table)
192 ->where(
193 $queryBuilder->expr()->in(
194 ($table === 'pages' ? 'uid' : 'pid'),
195 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
196 )
197 )
198 ->execute();
199
200 // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
201 while ($row = $result->fetch()) {
202 $this->analyzeRecord($results, $table, $fields, $row);
203 }
204 }
205
206 foreach ($this->hookObjectsArr as $key => $hookObj) {
207 if (!is_array($results[$key]) || (!empty($checkOptions) && !$checkOptions[$key])) {
208 continue;
209 }
210
211 // Check them
212 foreach ($results[$key] as $entryKey => $entryValue) {
213 $table = $entryValue['table'];
214 $record = [];
215 $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
216 $record['record_pid'] = $entryValue['row']['pid'];
217 $record['record_uid'] = $entryValue['uid'];
218 $record['table_name'] = $table;
219 $record['link_title'] = $entryValue['link_title'];
220 $record['field'] = $entryValue['field'];
221 $record['last_check'] = time();
222 $this->recordReference = $entryValue['substr']['recordRef'];
223 $this->pageWithAnchor = $entryValue['pageAndAnchor'];
224 if (!empty($this->pageWithAnchor)) {
225 // Page with anchor, e.g. 18#1580
226 $url = $this->pageWithAnchor;
227 } else {
228 $url = $entryValue['substr']['tokenValue'];
229 }
230 $this->linkCounts[$table]++;
231 $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
232
233 // Broken link found
234 if (!$checkUrl) {
235 $response = [];
236 $response['valid'] = false;
237 $response['errorParams'] = $hookObj->getErrorParams();
238 $this->brokenLinkCounts[$table]++;
239 $record['link_type'] = $key;
240 $record['url'] = $url;
241 $record['url_response'] = serialize($response);
242 GeneralUtility::makeInstance(ConnectionPool::class)
243 ->getConnectionForTable('tx_linkvalidator_link')
244 ->insert('tx_linkvalidator_link', $record);
245 } elseif (GeneralUtility::_GP('showalllinks')) {
246 $response = [];
247 $response['valid'] = true;
248 $this->brokenLinkCounts[$table]++;
249 $record['url'] = $url;
250 $record['link_type'] = $key;
251 $record['url_response'] = serialize($response);
252 GeneralUtility::makeInstance(ConnectionPool::class)
253 ->getConnectionForTable('tx_linkvalidator_link')
254 ->insert('tx_linkvalidator_link', $record);
255 }
256 }
257 }
258 }
259
260 /**
261 * Find all supported broken links for a specific record
262 *
263 * @param array $results Array of broken links
264 * @param string $table Table name of the record
265 * @param array $fields Array of fields to analyze
266 * @param array $record Record to analyse
267 */
268 public function analyzeRecord(array &$results, $table, array $fields, array $record)
269 {
270 list($results, $record) = $this->emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields);
271
272 // Put together content of all relevant fields
273 $haystack = '';
274 /** @var $htmlParser HtmlParser */
275 $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
276 $idRecord = $record['uid'];
277 // Get all references
278 foreach ($fields as $field) {
279 $haystack .= $record[$field] . ' --- ';
280 $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
281 $valueField = $record[$field];
282
283 // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
284 if (!$conf['softref'] || (string)$valueField === '') {
285 continue;
286 }
287
288 // Explode the list of soft references/parameters
289 $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
290 if ($softRefs === false) {
291 continue;
292 }
293
294 // Traverse soft references
295 foreach ($softRefs as $spKey => $spParams) {
296 /** @var $softRefObj \TYPO3\CMS\Core\Database\SoftReferenceIndex */
297 $softRefObj = BackendUtility::softRefParserObj($spKey);
298 // If there is an object returned...
299 if (!is_object($softRefObj)) {
300 continue;
301 }
302
303 // Do processing
304 $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
305 if (empty($resultArray['elements'])) {
306 continue;
307 }
308
309 if ($spKey === 'typolink_tag') {
310 $this->analyseTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
311 } else {
312 $this->analyseLinks($resultArray, $results, $record, $field, $table);
313 }
314 }
315 }
316 }
317
318 /**
319 * Returns the TSConfig that was passed to the init() method.
320 *
321 * This can be used by link checkers that get a reference of this
322 * object passed to the checkLink() method.
323 *
324 * @return array
325 */
326 public function getTSConfig()
327 {
328 return $this->tsConfig;
329 }
330
331 /**
332 * Find all supported broken links for a specific link list
333 *
334 * @param array $resultArray findRef parsed records
335 * @param array $results Array of broken links
336 * @param array $record UID of the current record
337 * @param string $field The current field
338 * @param string $table The current table
339 */
340 protected function analyseLinks(array $resultArray, array &$results, array $record, $field, $table)
341 {
342 foreach ($resultArray['elements'] as $element) {
343 $r = $element['subst'];
344 $type = '';
345 $idRecord = $record['uid'];
346 if (empty($r)) {
347 continue;
348 }
349
350 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
351 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
352 $type = $hookObj->fetchType($r, $type, $keyArr);
353 // Store the type that was found
354 // This prevents overriding by internal validator
355 if (!empty($type)) {
356 $r['type'] = $type;
357 }
358 }
359 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
360 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
361 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
362 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
363 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
364 }
365 }
366
367 /**
368 * Find all supported broken links for a specific typoLink
369 *
370 * @param array $resultArray findRef parsed records
371 * @param array $results Array of broken links
372 * @param HtmlParser $htmlParser Instance of html parser
373 * @param array $record The current record
374 * @param string $field The current field
375 * @param string $table The current table
376 */
377 protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
378 {
379 $currentR = [];
380 $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
381 $idRecord = $record['uid'];
382 $type = '';
383 $title = '';
384 $countLinkTags = count($linkTags);
385 for ($i = 1; $i < $countLinkTags; $i += 2) {
386 $referencedRecordType = '';
387 foreach ($resultArray['elements'] as $element) {
388 $type = '';
389 $r = $element['subst'];
390 if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
391 continue;
392 }
393
394 // Type of referenced record
395 if (strpos($r['recordRef'], 'pages') !== false) {
396 $currentR = $r;
397 // Contains number of the page
398 $referencedRecordType = $r['tokenValue'];
399 $wasPage = true;
400 } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
401 $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
402 $wasPage = false;
403 } else {
404 $currentR = $r;
405 }
406 $title = strip_tags($linkTags[$i]);
407 }
408 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
409 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
410 $type = $hookObj->fetchType($currentR, $type, $keyArr);
411 // Store the type that was found
412 // This prevents overriding by internal validator
413 if (!empty($type)) {
414 $currentR['type'] = $type;
415 }
416 }
417 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
418 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
419 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
420 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
421 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
422 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
423 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
424 }
425 }
426
427 /**
428 * Fill a marker array with the number of links found in a list of pages
429 *
430 * @param string $curPage Comma separated list of page uids
431 * @return array Marker array with the number of links found
432 */
433 public function getLinkCounts($curPage)
434 {
435 $markerArray = [];
436
437 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
438 ->getQueryBuilderForTable('tx_linkvalidator_link');
439 $queryBuilder->getRestrictions()->removeAll();
440
441 $result = $queryBuilder->select('link_type')
442 ->addSelectLiteral($queryBuilder->expr()->count('uid', 'nbBrokenLinks'))
443 ->from('tx_linkvalidator_link')
444 ->where(
445 $queryBuilder->expr()->in(
446 'record_pid',
447 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
448 )
449 )
450 ->groupBy('link_type')
451 ->execute();
452
453 while ($row = $result->fetch()) {
454 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
455 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
456 }
457 return $markerArray;
458 }
459
460 /**
461 * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
462 * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
463 * this is necessary to create the object that is used recursively by the original function.
464 *
465 * Generates a list of page uids from $id. List does not include $id itself.
466 * The only pages excluded from the list are deleted pages.
467 *
468 * @param int $id Start page id
469 * @param int $depth Depth to traverse down the page tree.
470 * @param int $begin is an optional integer that determines at which
471 * @param string $permsClause Perms clause
472 * @param bool $considerHidden Whether to consider hidden pages or not
473 * @return string Returns the list with a comma in the end (if any pages selected!)
474 */
475 public function extGetTreeList($id, $depth, $begin = 0, $permsClause, $considerHidden = false)
476 {
477 $depth = (int)$depth;
478 $begin = (int)$begin;
479 $id = (int)$id;
480 $theList = '';
481 if ($depth === 0) {
482 return $theList;
483 }
484
485 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
486 $queryBuilder->getRestrictions()
487 ->removeAll()
488 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
489
490 $result = $queryBuilder
491 ->select('uid', 'title', 'hidden', 'extendToSubpages')
492 ->from('pages')
493 ->where(
494 $queryBuilder->expr()->eq(
495 'pid',
496 $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
497 ),
498 QueryHelper::stripLogicalOperatorPrefix($permsClause)
499 )
500 ->execute();
501
502 while ($row = $result->fetch()) {
503 if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
504 $theList .= $row['uid'] . ',';
505 $this->extPageInTreeInfo[] = [$row['uid'], htmlspecialchars($row['title'], $depth)];
506 }
507 if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
508 $theList .= $this->extGetTreeList(
509 $row['uid'],
510 $depth - 1,
511 $begin - 1,
512 $permsClause,
513 $considerHidden
514 );
515 }
516 }
517 return $theList;
518 }
519
520 /**
521 * Check if rootline contains a hidden page
522 *
523 * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
524 * @return bool TRUE if rootline contains a hidden page, FALSE if not
525 */
526 public function getRootLineIsHidden(array $pageInfo)
527 {
528 if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
529 return true;
530 }
531
532 if ($pageInfo['pid'] === 0) {
533 return false;
534 }
535 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
536 $queryBuilder->getRestrictions()->removeAll();
537
538 $row = $queryBuilder
539 ->select('uid', 'title', 'hidden', 'extendToSubpages')
540 ->from('pages')
541 ->where(
542 $queryBuilder->expr()->eq(
543 'uid',
544 $queryBuilder->createNamedParameter($pageInfo['pid'], \PDO::PARAM_INT)
545 )
546 )
547 ->execute()
548 ->fetch();
549
550 if ($row !== false) {
551 return $this->getRootLineIsHidden($row);
552 }
553 return false;
554 }
555
556 /**
557 * Emits a signal before the record is analyzed
558 *
559 * @param array $results Array of broken links
560 * @param array $record Record to analyse
561 * @param string $table Table name of the record
562 * @param array $fields Array of fields to analyze
563 * @return array
564 */
565 protected function emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields)
566 {
567 return $this->getSignalSlotDispatcher()->dispatch(
568 self::class,
569 'beforeAnalyzeRecord',
570 [$results, $record, $table, $fields, $this]
571 );
572 }
573
574 /**
575 * @return \TYPO3\CMS\Extbase\SignalSlot\Dispatcher
576 */
577 protected function getSignalSlotDispatcher()
578 {
579 return $this->getObjectManager()->get(\TYPO3\CMS\Extbase\SignalSlot\Dispatcher::class);
580 }
581
582 /**
583 * @return \TYPO3\CMS\Extbase\Object\ObjectManager
584 */
585 protected function getObjectManager()
586 {
587 return GeneralUtility::makeInstance(\TYPO3\CMS\Extbase\Object\ObjectManager::class);
588 }
589
590 /**
591 * @return LanguageService
592 */
593 protected function getLanguageService()
594 {
595 return $GLOBALS['LANG'];
596 }
597 }