4c37b1f743bfdd31d4d07d8ba93907769c8f5901
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / LinkAnalyzer.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Database\Connection;
19 use TYPO3\CMS\Core\Database\ConnectionPool;
20 use TYPO3\CMS\Core\Database\Query\QueryHelper;
21 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
22 use TYPO3\CMS\Core\Html\HtmlParser;
23 use TYPO3\CMS\Core\Localization\LanguageService;
24 use TYPO3\CMS\Core\Utility\GeneralUtility;
25
26 /**
27 * This class provides Processing plugin implementation
28 */
29 class LinkAnalyzer
30 {
31
32 /**
33 * Array of tables and fields to search for broken links
34 *
35 * @var array
36 */
37 protected $searchFields = [];
38
39 /**
40 * List of page uids (rootline downwards)
41 *
42 * @var array
43 */
44 protected $pids = [];
45
46 /**
47 * Array of tables and the number of external links they contain
48 *
49 * @var array
50 */
51 protected $linkCounts = [];
52
53 /**
54 * Array of tables and the number of broken external links they contain
55 *
56 * @var array
57 */
58 protected $brokenLinkCounts = [];
59
60 /**
61 * Array of tables and records containing broken links
62 *
63 * @var array
64 */
65 protected $recordsWithBrokenLinks = [];
66
67 /**
68 * Array for hooks for own checks
69 *
70 * @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype[]
71 */
72 protected $hookObjectsArr = [];
73
74 /**
75 * Array with information about the current page
76 *
77 * @var array
78 */
79 protected $extPageInTreeInfo = [];
80
81 /**
82 * Reference to the current element with table:uid, e.g. pages:85
83 *
84 * @var string
85 */
86 protected $recordReference = '';
87
88 /**
89 * Linked page together with a possible anchor, e.g. 85#c105
90 *
91 * @var string
92 */
93 protected $pageWithAnchor = '';
94
95 /**
96 * The currently active TSConfig. Will be passed to the init function.
97 *
98 * @var array
99 */
100 protected $tsConfig = [];
101
102 /**
103 * Fill hookObjectsArr with different link types and possible XClasses.
104 */
105 public function __construct()
106 {
107 $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
108 // Hook to handle own checks
109 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
110 $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
111 }
112 }
113
114 /**
115 * Store all the needed configuration values in class variables
116 *
117 * @param array $searchField List of fields in which to search for links
118 * @param string $pidList List of comma separated page uids in which to search for links
119 * @param array $tsConfig The currently active TSConfig.
120 */
121 public function init(array $searchField, $pidList, $tsConfig)
122 {
123 $this->searchFields = $searchField;
124 $this->pids = GeneralUtility::intExplode(',', $pidList, true);
125 $this->tsConfig = $tsConfig;
126 }
127
128 /**
129 * Find all supported broken links and store them in tx_linkvalidator_link
130 *
131 * @param array $checkOptions List of hook object to activate
132 * @param bool $considerHidden Defines whether to look into hidden fields
133 */
134 public function getLinkStatistics($checkOptions = [], $considerHidden = false)
135 {
136 $results = [];
137 if (!empty($checkOptions) && !empty($this->pids)) {
138 $checkKeys = array_keys($checkOptions);
139
140 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
141 ->getQueryBuilderForTable('tx_linkvalidator_link');
142
143 $queryBuilder->delete('tx_linkvalidator_link')
144 ->where(
145 $queryBuilder->expr()->orX(
146 $queryBuilder->expr()->in(
147 'record_pid',
148 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
149 ),
150 $queryBuilder->expr()->andX(
151 $queryBuilder->expr()->in(
152 'record_uid',
153 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
154 ),
155 $queryBuilder->expr()->eq(
156 'table_name',
157 $queryBuilder->createNamedParameter('pages', \PDO::PARAM_STR)
158 )
159 )
160 ),
161 $queryBuilder->expr()->in(
162 'link_type',
163 $queryBuilder->createNamedParameter($checkKeys, Connection::PARAM_STR_ARRAY)
164 )
165 )
166 ->execute();
167
168 // Traverse all configured tables
169 foreach ($this->searchFields as $table => $fields) {
170 // If table is not configured, assume the extension is not installed
171 // and therefore no need to check it
172 if (!is_array($GLOBALS['TCA'][$table])) {
173 continue;
174 }
175 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
176 ->getQueryBuilderForTable($table);
177
178 if ($considerHidden) {
179 $queryBuilder->getRestrictions()
180 ->removeAll()
181 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
182 }
183
184 // Re-init selectFields for table
185 $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
186
187 $result = $queryBuilder->select(...$selectFields)
188 ->from($table)
189 ->where(
190 $queryBuilder->expr()->in(
191 ($table === 'pages' ? 'uid' : 'pid'),
192 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
193 )
194 )
195 ->execute();
196
197 // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
198 while ($row = $result->fetch()) {
199 $this->analyzeRecord($results, $table, $fields, $row);
200 }
201 }
202
203 foreach ($this->hookObjectsArr as $key => $hookObj) {
204 if (is_array($results[$key]) && empty($checkOptions) || is_array($results[$key]) && $checkOptions[$key]) {
205 // Check them
206 foreach ($results[$key] as $entryKey => $entryValue) {
207 $table = $entryValue['table'];
208 $record = [];
209 $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
210 $record['record_pid'] = $entryValue['row']['pid'];
211 $record['record_uid'] = $entryValue['uid'];
212 $record['table_name'] = $table;
213 $record['link_title'] = $entryValue['link_title'];
214 $record['field'] = $entryValue['field'];
215 $record['last_check'] = time();
216 $this->recordReference = $entryValue['substr']['recordRef'];
217 $this->pageWithAnchor = $entryValue['pageAndAnchor'];
218 if (!empty($this->pageWithAnchor)) {
219 // Page with anchor, e.g. 18#1580
220 $url = $this->pageWithAnchor;
221 } else {
222 $url = $entryValue['substr']['tokenValue'];
223 }
224 $this->linkCounts[$table]++;
225 $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
226 // Broken link found
227 if (!$checkUrl) {
228 $response = [];
229 $response['valid'] = false;
230 $response['errorParams'] = $hookObj->getErrorParams();
231 $this->brokenLinkCounts[$table]++;
232 $record['link_type'] = $key;
233 $record['url'] = $url;
234 $record['url_response'] = serialize($response);
235 GeneralUtility::makeInstance(ConnectionPool::class)
236 ->getConnectionForTable('tx_linkvalidator_link')
237 ->insert('tx_linkvalidator_link', $record);
238 } elseif (GeneralUtility::_GP('showalllinks')) {
239 $response = [];
240 $response['valid'] = true;
241 $this->brokenLinkCounts[$table]++;
242 $record['url'] = $url;
243 $record['link_type'] = $key;
244 $record['url_response'] = serialize($response);
245 GeneralUtility::makeInstance(ConnectionPool::class)
246 ->getConnectionForTable('tx_linkvalidator_link')
247 ->insert('tx_linkvalidator_link', $record);
248 }
249 }
250 }
251 }
252 }
253 }
254
255 /**
256 * Find all supported broken links for a specific record
257 *
258 * @param array $results Array of broken links
259 * @param string $table Table name of the record
260 * @param array $fields Array of fields to analyze
261 * @param array $record Record to analyse
262 */
263 public function analyzeRecord(array &$results, $table, array $fields, array $record)
264 {
265 list($results, $record) = $this->emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields);
266
267 // Put together content of all relevant fields
268 $haystack = '';
269 /** @var $htmlParser HtmlParser */
270 $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
271 $idRecord = $record['uid'];
272 // Get all references
273 foreach ($fields as $field) {
274 $haystack .= $record[$field] . ' --- ';
275 $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
276 $valueField = $record[$field];
277 // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
278 if ($conf['softref'] && (string)$valueField !== '') {
279 // Explode the list of soft references/parameters
280 $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
281 if ($softRefs !== false) {
282 // Traverse soft references
283 foreach ($softRefs as $spKey => $spParams) {
284 /** @var $softRefObj \TYPO3\CMS\Core\Database\SoftReferenceIndex */
285 $softRefObj = BackendUtility::softRefParserObj($spKey);
286 // If there is an object returned...
287 if (is_object($softRefObj)) {
288 // Do processing
289 $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
290 if (!empty($resultArray['elements'])) {
291 if ($spKey === 'typolink_tag') {
292 $this->analyseTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
293 } else {
294 $this->analyseLinks($resultArray, $results, $record, $field, $table);
295 }
296 }
297 }
298 }
299 }
300 }
301 }
302 }
303
304 /**
305 * Returns the TSConfig that was passed to the init() method.
306 *
307 * This can be used by link checkers that get a reference of this
308 * object passed to the checkLink() method.
309 *
310 * @return array
311 */
312 public function getTSConfig()
313 {
314 return $this->tsConfig;
315 }
316
317 /**
318 * Find all supported broken links for a specific link list
319 *
320 * @param array $resultArray findRef parsed records
321 * @param array $results Array of broken links
322 * @param array $record UID of the current record
323 * @param string $field The current field
324 * @param string $table The current table
325 */
326 protected function analyseLinks(array $resultArray, array &$results, array $record, $field, $table)
327 {
328 foreach ($resultArray['elements'] as $element) {
329 $r = $element['subst'];
330 $type = '';
331 $idRecord = $record['uid'];
332 if (!empty($r)) {
333 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
334 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
335 $type = $hookObj->fetchType($r, $type, $keyArr);
336 // Store the type that was found
337 // This prevents overriding by internal validator
338 if (!empty($type)) {
339 $r['type'] = $type;
340 }
341 }
342 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
343 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
344 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
345 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
346 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
347 }
348 }
349 }
350
351 /**
352 * Find all supported broken links for a specific typoLink
353 *
354 * @param array $resultArray findRef parsed records
355 * @param array $results Array of broken links
356 * @param HtmlParser $htmlParser Instance of html parser
357 * @param array $record The current record
358 * @param string $field The current field
359 * @param string $table The current table
360 */
361 protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
362 {
363 $currentR = [];
364 $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
365 $idRecord = $record['uid'];
366 $type = '';
367 $title = '';
368 $countLinkTags = count($linkTags);
369 for ($i = 1; $i < $countLinkTags; $i += 2) {
370 $referencedRecordType = '';
371 foreach ($resultArray['elements'] as $element) {
372 $type = '';
373 $r = $element['subst'];
374 if (!empty($r['tokenID'])) {
375 if (substr_count($linkTags[$i], $r['tokenID'])) {
376 // Type of referenced record
377 if (strpos($r['recordRef'], 'pages') !== false) {
378 $currentR = $r;
379 // Contains number of the page
380 $referencedRecordType = $r['tokenValue'];
381 $wasPage = true;
382 } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
383 $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
384 $wasPage = false;
385 } else {
386 $currentR = $r;
387 }
388 $title = strip_tags($linkTags[$i]);
389 }
390 }
391 }
392 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
393 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
394 $type = $hookObj->fetchType($currentR, $type, $keyArr);
395 // Store the type that was found
396 // This prevents overriding by internal validator
397 if (!empty($type)) {
398 $currentR['type'] = $type;
399 }
400 }
401 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
402 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
403 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
404 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
405 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
406 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
407 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
408 }
409 }
410
411 /**
412 * Fill a marker array with the number of links found in a list of pages
413 *
414 * @param string $curPage Comma separated list of page uids
415 * @return array Marker array with the number of links found
416 */
417 public function getLinkCounts($curPage)
418 {
419 $markerArray = [];
420
421 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
422 ->getQueryBuilderForTable('tx_linkvalidator_link');
423 $queryBuilder->getRestrictions()->removeAll();
424
425 $result = $queryBuilder->select('link_type')
426 ->addSelectLiteral($queryBuilder->expr()->count('uid', 'nbBrokenLinks'))
427 ->from('tx_linkvalidator_link')
428 ->where(
429 $queryBuilder->expr()->in(
430 'record_pid',
431 $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
432 )
433 )
434 ->groupBy('link_type')
435 ->execute();
436
437 while ($row = $result->fetch()) {
438 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
439 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
440 }
441 return $markerArray;
442 }
443
444 /**
445 * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
446 * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
447 * this is necessary to create the object that is used recursively by the original function.
448 *
449 * Generates a list of page uids from $id. List does not include $id itself.
450 * The only pages excluded from the list are deleted pages.
451 *
452 * @param int $id Start page id
453 * @param int $depth Depth to traverse down the page tree.
454 * @param int $begin is an optional integer that determines at which
455 * @param string $permsClause Perms clause
456 * @param bool $considerHidden Whether to consider hidden pages or not
457 * @return string Returns the list with a comma in the end (if any pages selected!)
458 */
459 public function extGetTreeList($id, $depth, $begin = 0, $permsClause, $considerHidden = false)
460 {
461 $depth = (int)$depth;
462 $begin = (int)$begin;
463 $id = (int)$id;
464 $theList = '';
465 if ($depth > 0) {
466 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
467 $queryBuilder->getRestrictions()
468 ->removeAll()
469 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
470
471 $result = $queryBuilder
472 ->select('uid', 'title', 'hidden', 'extendToSubpages')
473 ->from('pages')
474 ->where(
475 $queryBuilder->expr()->eq(
476 'pid',
477 $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
478 ),
479 QueryHelper::stripLogicalOperatorPrefix($permsClause)
480 )
481 ->execute();
482
483 while ($row = $result->fetch()) {
484 if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
485 $theList .= $row['uid'] . ',';
486 $this->extPageInTreeInfo[] = [$row['uid'], htmlspecialchars($row['title'], $depth)];
487 }
488 if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
489 $theList .= $this->extGetTreeList(
490 $row['uid'],
491 $depth - 1,
492 $begin - 1,
493 $permsClause,
494 $considerHidden
495 );
496 }
497 }
498 }
499 return $theList;
500 }
501
502 /**
503 * Check if rootline contains a hidden page
504 *
505 * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
506 * @return bool TRUE if rootline contains a hidden page, FALSE if not
507 */
508 public function getRootLineIsHidden(array $pageInfo)
509 {
510 $hidden = false;
511 if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
512 $hidden = true;
513 } else {
514 if ($pageInfo['pid'] > 0) {
515 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
516 $queryBuilder->getRestrictions()->removeAll();
517
518 $row = $queryBuilder
519 ->select('uid', 'title', 'hidden', 'extendToSubpages')
520 ->from('pages')
521 ->where(
522 $queryBuilder->expr()->eq(
523 'uid',
524 $queryBuilder->createNamedParameter($pageInfo['pid'], \PDO::PARAM_INT)
525 )
526 )
527 ->execute()
528 ->fetch();
529
530 if ($row !== false) {
531 $hidden = $this->getRootLineIsHidden($row);
532 }
533 }
534 }
535
536 return $hidden;
537 }
538
539 /**
540 * Emits a signal before the record is analyzed
541 *
542 * @param array $results Array of broken links
543 * @param array $record Record to analyse
544 * @param string $table Table name of the record
545 * @param array $fields Array of fields to analyze
546 * @return array
547 */
548 protected function emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields)
549 {
550 return $this->getSignalSlotDispatcher()->dispatch(
551 self::class,
552 'beforeAnalyzeRecord',
553 [$results, $record, $table, $fields, $this]
554 );
555 }
556
557 /**
558 * @return \TYPO3\CMS\Extbase\SignalSlot\Dispatcher
559 */
560 protected function getSignalSlotDispatcher()
561 {
562 return $this->getObjectManager()->get(\TYPO3\CMS\Extbase\SignalSlot\Dispatcher::class);
563 }
564
565 /**
566 * @return \TYPO3\CMS\Extbase\Object\ObjectManager
567 */
568 protected function getObjectManager()
569 {
570 return GeneralUtility::makeInstance(\TYPO3\CMS\Extbase\Object\ObjectManager::class);
571 }
572
573 /**
574 * @return LanguageService
575 */
576 protected function getLanguageService()
577 {
578 return $GLOBALS['LANG'];
579 }
580 }