[TASK] Use strict comparison for strings
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / LinkAnalyzer.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Database\Connection;
19 use TYPO3\CMS\Core\Database\ConnectionPool;
20 use TYPO3\CMS\Core\Database\Query\QueryHelper;
21 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
22 use TYPO3\CMS\Core\Html\HtmlParser;
23 use TYPO3\CMS\Core\Utility\GeneralUtility;
24 use TYPO3\CMS\Lang\LanguageService;
25
26 /**
27 * This class provides Processing plugin implementation
28 */
29 class LinkAnalyzer
30 {
31
32 /**
33 * Array of tables and fields to search for broken links
34 *
35 * @var array
36 */
37 protected $searchFields = [];
38
39 /**
40 * List of comma separated page uids (rootline downwards)
41 *
42 * @var string
43 */
44 protected $pidList = '';
45
46 /**
47 * Array of tables and the number of external links they contain
48 *
49 * @var array
50 */
51 protected $linkCounts = [];
52
53 /**
54 * Array of tables and the number of broken external links they contain
55 *
56 * @var array
57 */
58 protected $brokenLinkCounts = [];
59
60 /**
61 * Array of tables and records containing broken links
62 *
63 * @var array
64 */
65 protected $recordsWithBrokenLinks = [];
66
67 /**
68 * Array for hooks for own checks
69 *
70 * @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype[]
71 */
72 protected $hookObjectsArr = [];
73
74 /**
75 * Array with information about the current page
76 *
77 * @var array
78 */
79 protected $extPageInTreeInfo = [];
80
81 /**
82 * Reference to the current element with table:uid, e.g. pages:85
83 *
84 * @var string
85 */
86 protected $recordReference = '';
87
88 /**
89 * Linked page together with a possible anchor, e.g. 85#c105
90 *
91 * @var string
92 */
93 protected $pageWithAnchor = '';
94
95 /**
96 * The currently active TSConfig. Will be passed to the init function.
97 *
98 * @var array
99 */
100 protected $tsConfig = [];
101
102 /**
103 * Fill hookObjectsArr with different link types and possible XClasses.
104 */
105 public function __construct()
106 {
107 $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
108 // Hook to handle own checks
109 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'])) {
110 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] as $key => $classRef) {
111 $this->hookObjectsArr[$key] = GeneralUtility::getUserObj($classRef);
112 }
113 }
114 }
115
116 /**
117 * Store all the needed configuration values in class variables
118 *
119 * @param array $searchField List of fields in which to search for links
120 * @param string $pid List of comma separated page uids in which to search for links
121 * @param array $tsConfig The currently active TSConfig.
122 * @return void
123 */
124 public function init(array $searchField, $pid, $tsConfig)
125 {
126 $this->searchFields = $searchField;
127 $this->pidList = $pid;
128 $this->tsConfig = $tsConfig;
129 }
130
131 /**
132 * Find all supported broken links and store them in tx_linkvalidator_link
133 *
134 * @param array $checkOptions List of hook object to activate
135 * @param bool $considerHidden Defines whether to look into hidden fields
136 * @return void
137 */
138 public function getLinkStatistics($checkOptions = [], $considerHidden = false)
139 {
140 $results = [];
141 $pidList = GeneralUtility::intExplode(',', $this->pidList, true);
142 if (!empty($checkOptions) && !empty($pidList)) {
143 $checkKeys = array_keys($checkOptions);
144
145 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
146 ->getQueryBuilderForTable('tx_linkvalidator_link');
147
148 $queryBuilder->delete('tx_linkvalidator_link')
149 ->where(
150 $queryBuilder->expr()->orX(
151 $queryBuilder->expr()->in(
152 'record_pid',
153 $queryBuilder->createNamedParameter($pidList, Connection::PARAM_INT_ARRAY)
154 ),
155 $queryBuilder->expr()->andX(
156 $queryBuilder->expr()->in(
157 'record_uid',
158 $queryBuilder->createNamedParameter($pidList, Connection::PARAM_INT_ARRAY)
159 ),
160 $queryBuilder->expr()->eq(
161 'table_name',
162 $queryBuilder->createNamedParameter('pages', \PDO::PARAM_STR)
163 )
164 )
165 ),
166 $queryBuilder->expr()->in(
167 'link_type',
168 $queryBuilder->createNamedParameter($checkKeys, Connection::PARAM_STR_ARRAY)
169 )
170 )
171 ->execute();
172
173 // Traverse all configured tables
174 foreach ($this->searchFields as $table => $fields) {
175 // If table is not configured, assume the extension is not installed
176 // and therefore no need to check it
177 if (!is_array($GLOBALS['TCA'][$table])) {
178 continue;
179 }
180 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
181 ->getQueryBuilderForTable($table);
182
183 if ($considerHidden) {
184 $queryBuilder->getRestrictions()
185 ->removeAll()
186 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
187 }
188
189 // Re-init selectFields for table
190 $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
191
192 $result = $queryBuilder->select(...$selectFields)
193 ->from($table)
194 ->where(
195 $queryBuilder->expr()->in(
196 ($table === 'pages' ? 'uid' : 'pid'),
197 $queryBuilder->createNamedParameter($pidList, Connection::PARAM_INT_ARRAY)
198 )
199 )
200 ->execute();
201
202 // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
203 while ($row = $result->fetch()) {
204 $this->analyzeRecord($results, $table, $fields, $row);
205 }
206 }
207
208 foreach ($this->hookObjectsArr as $key => $hookObj) {
209 if (is_array($results[$key]) && empty($checkOptions) || is_array($results[$key]) && $checkOptions[$key]) {
210 // Check them
211 foreach ($results[$key] as $entryKey => $entryValue) {
212 $table = $entryValue['table'];
213 $record = [];
214 $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
215 $record['record_pid'] = $entryValue['row']['pid'];
216 $record['record_uid'] = $entryValue['uid'];
217 $record['table_name'] = $table;
218 $record['link_title'] = $entryValue['link_title'];
219 $record['field'] = $entryValue['field'];
220 $record['last_check'] = time();
221 $this->recordReference = $entryValue['substr']['recordRef'];
222 $this->pageWithAnchor = $entryValue['pageAndAnchor'];
223 if (!empty($this->pageWithAnchor)) {
224 // Page with anchor, e.g. 18#1580
225 $url = $this->pageWithAnchor;
226 } else {
227 $url = $entryValue['substr']['tokenValue'];
228 }
229 $this->linkCounts[$table]++;
230 $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
231 // Broken link found
232 if (!$checkUrl) {
233 $response = [];
234 $response['valid'] = false;
235 $response['errorParams'] = $hookObj->getErrorParams();
236 $this->brokenLinkCounts[$table]++;
237 $record['link_type'] = $key;
238 $record['url'] = $url;
239 $record['url_response'] = serialize($response);
240 GeneralUtility::makeInstance(ConnectionPool::class)
241 ->getConnectionForTable('tx_linkvalidator_link')
242 ->insert('tx_linkvalidator_link', $record);
243 } elseif (GeneralUtility::_GP('showalllinks')) {
244 $response = [];
245 $response['valid'] = true;
246 $this->brokenLinkCounts[$table]++;
247 $record['url'] = $url;
248 $record['link_type'] = $key;
249 $record['url_response'] = serialize($response);
250 GeneralUtility::makeInstance(ConnectionPool::class)
251 ->getConnectionForTable('tx_linkvalidator_link')
252 ->insert('tx_linkvalidator_link', $record);
253 }
254 }
255 }
256 }
257 }
258 }
259
260 /**
261 * Find all supported broken links for a specific record
262 *
263 * @param array $results Array of broken links
264 * @param string $table Table name of the record
265 * @param array $fields Array of fields to analyze
266 * @param array $record Record to analyse
267 * @return void
268 */
269 public function analyzeRecord(array &$results, $table, array $fields, array $record)
270 {
271 list($results, $record) = $this->emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields);
272
273 // Put together content of all relevant fields
274 $haystack = '';
275 /** @var $htmlParser HtmlParser */
276 $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
277 $idRecord = $record['uid'];
278 // Get all references
279 foreach ($fields as $field) {
280 $haystack .= $record[$field] . ' --- ';
281 $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
282 $valueField = $record[$field];
283 // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
284 if ($conf['softref'] && (string)$valueField !== '') {
285 // Explode the list of soft references/parameters
286 $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
287 if ($softRefs !== false) {
288 // Traverse soft references
289 foreach ($softRefs as $spKey => $spParams) {
290 /** @var $softRefObj \TYPO3\CMS\Core\Database\SoftReferenceIndex */
291 $softRefObj = BackendUtility::softRefParserObj($spKey);
292 // If there is an object returned...
293 if (is_object($softRefObj)) {
294 // Do processing
295 $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
296 if (!empty($resultArray['elements'])) {
297 if ($spKey === 'typolink_tag') {
298 $this->analyseTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
299 } else {
300 $this->analyseLinks($resultArray, $results, $record, $field, $table);
301 }
302 }
303 }
304 }
305 }
306 }
307 }
308 }
309
310 /**
311 * Returns the TSConfig that was passed to the init() method.
312 *
313 * This can be used by link checkers that get a reference of this
314 * object passed to the checkLink() method.
315 *
316 * @return array
317 */
318 public function getTSConfig()
319 {
320 return $this->tsConfig;
321 }
322
323 /**
324 * Find all supported broken links for a specific link list
325 *
326 * @param array $resultArray findRef parsed records
327 * @param array $results Array of broken links
328 * @param array $record UID of the current record
329 * @param string $field The current field
330 * @param string $table The current table
331 * @return void
332 */
333 protected function analyseLinks(array $resultArray, array &$results, array $record, $field, $table)
334 {
335 foreach ($resultArray['elements'] as $element) {
336 $r = $element['subst'];
337 $type = '';
338 $idRecord = $record['uid'];
339 if (!empty($r)) {
340 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
341 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
342 $type = $hookObj->fetchType($r, $type, $keyArr);
343 // Store the type that was found
344 // This prevents overriding by internal validator
345 if (!empty($type)) {
346 $r['type'] = $type;
347 }
348 }
349 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
350 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
351 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
352 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
353 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
354 }
355 }
356 }
357
358 /**
359 * Find all supported broken links for a specific typoLink
360 *
361 * @param array $resultArray findRef parsed records
362 * @param array $results Array of broken links
363 * @param HtmlParser $htmlParser Instance of html parser
364 * @param array $record The current record
365 * @param string $field The current field
366 * @param string $table The current table
367 * @return void
368 */
369 protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
370 {
371 $currentR = [];
372 $linkTags = $htmlParser->splitIntoBlock('link', $resultArray['content']);
373 $idRecord = $record['uid'];
374 $type = '';
375 $title = '';
376 $countLinkTags = count($linkTags);
377 for ($i = 1; $i < $countLinkTags; $i += 2) {
378 $referencedRecordType = '';
379 foreach ($resultArray['elements'] as $element) {
380 $type = '';
381 $r = $element['subst'];
382 if (!empty($r['tokenID'])) {
383 if (substr_count($linkTags[$i], $r['tokenID'])) {
384 // Type of referenced record
385 if (strpos($r['recordRef'], 'pages') !== false) {
386 $currentR = $r;
387 // Contains number of the page
388 $referencedRecordType = $r['tokenValue'];
389 $wasPage = true;
390 } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
391 $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
392 $wasPage = false;
393 } else {
394 $currentR = $r;
395 }
396 $title = strip_tags($linkTags[$i]);
397 }
398 }
399 }
400 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
401 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
402 $type = $hookObj->fetchType($currentR, $type, $keyArr);
403 // Store the type that was found
404 // This prevents overriding by internal validator
405 if (!empty($type)) {
406 $currentR['type'] = $type;
407 }
408 }
409 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
410 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
411 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
412 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
413 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
414 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
415 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
416 }
417 }
418
419 /**
420 * Fill a marker array with the number of links found in a list of pages
421 *
422 * @param string $curPage Comma separated list of page uids
423 * @return array Marker array with the number of links found
424 */
425 public function getLinkCounts($curPage)
426 {
427 $markerArray = [];
428 $this->pidList = GeneralUtility::intExplode(',', ($this->pidList ?: $curPage), true);
429
430 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
431 ->getQueryBuilderForTable('tx_linkvalidator_link');
432 $queryBuilder->getRestrictions()->removeAll();
433
434 $result = $queryBuilder->select('link_type')
435 ->addSelectLiteral($queryBuilder->expr()->count('uid', 'nbBrokenLinks'))
436 ->from('tx_linkvalidator_link')
437 ->where(
438 $queryBuilder->expr()->in(
439 'record_pid',
440 $queryBuilder->createNamedParameter($this->pidList, Connection::PARAM_INT_ARRAY)
441 )
442 )
443 ->groupBy('link_type')
444 ->execute();
445
446 while ($row = $result->fetch()) {
447 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
448 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
449 }
450 return $markerArray;
451 }
452
453 /**
454 * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
455 * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
456 * this is necessary to create the object that is used recursively by the original function.
457 *
458 * Generates a list of page uids from $id. List does not include $id itself.
459 * The only pages excluded from the list are deleted pages.
460 *
461 * @param int $id Start page id
462 * @param int $depth Depth to traverse down the page tree.
463 * @param int $begin is an optional integer that determines at which
464 * @param string $permsClause Perms clause
465 * @param bool $considerHidden Whether to consider hidden pages or not
466 * @return string Returns the list with a comma in the end (if any pages selected!)
467 */
468 public function extGetTreeList($id, $depth, $begin = 0, $permsClause, $considerHidden = false)
469 {
470 $depth = (int)$depth;
471 $begin = (int)$begin;
472 $id = (int)$id;
473 $theList = '';
474 if ($depth > 0) {
475 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
476 $queryBuilder->getRestrictions()
477 ->removeAll()
478 ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
479
480 $result = $queryBuilder
481 ->select('uid', 'title', 'hidden', 'extendToSubpages')
482 ->from('pages')
483 ->where(
484 $queryBuilder->expr()->eq(
485 'pid',
486 $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
487 ),
488 QueryHelper::stripLogicalOperatorPrefix($permsClause)
489 )
490 ->execute();
491
492 while ($row = $result->fetch()) {
493 if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
494 $theList .= $row['uid'] . ',';
495 $this->extPageInTreeInfo[] = [$row['uid'], htmlspecialchars($row['title'], $depth)];
496 }
497 if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
498 $theList .= $this->extGetTreeList(
499 $row['uid'],
500 $depth - 1,
501 $begin - 1,
502 $permsClause,
503 $considerHidden
504 );
505 }
506 }
507 }
508 return $theList;
509 }
510
511 /**
512 * Check if rootline contains a hidden page
513 *
514 * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
515 * @return bool TRUE if rootline contains a hidden page, FALSE if not
516 */
517 public function getRootLineIsHidden(array $pageInfo)
518 {
519 $hidden = false;
520 if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
521 $hidden = true;
522 } else {
523 if ($pageInfo['pid'] > 0) {
524 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
525 $queryBuilder->getRestrictions()->removeAll();
526
527 $row = $queryBuilder
528 ->select('uid', 'title', 'hidden', 'extendToSubpages')
529 ->from('pages')
530 ->where(
531 $queryBuilder->expr()->eq(
532 'uid',
533 $queryBuilder->createNamedParameter($pageInfo['pid'], \PDO::PARAM_INT)
534 )
535 )
536 ->execute()
537 ->fetch();
538
539 if ($row !== false) {
540 $hidden = $this->getRootLineIsHidden($row);
541 }
542 }
543 }
544
545 return $hidden;
546 }
547
548 /**
549 * Emits a signal before the record is analyzed
550 *
551 * @param array $results Array of broken links
552 * @param array $record Record to analyse
553 * @param string $table Table name of the record
554 * @param array $fields Array of fields to analyze
555 * @return array
556 */
557 protected function emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields)
558 {
559 return $this->getSignalSlotDispatcher()->dispatch(
560 self::class,
561 'beforeAnalyzeRecord',
562 [$results, $record, $table, $fields, $this]
563 );
564 }
565
566 /**
567 * @return \TYPO3\CMS\Extbase\SignalSlot\Dispatcher
568 */
569 protected function getSignalSlotDispatcher()
570 {
571 return $this->getObjectManager()->get(\TYPO3\CMS\Extbase\SignalSlot\Dispatcher::class);
572 }
573
574 /**
575 * @return \TYPO3\CMS\Extbase\Object\ObjectManager
576 */
577 protected function getObjectManager()
578 {
579 return GeneralUtility::makeInstance(\TYPO3\CMS\Extbase\Object\ObjectManager::class);
580 }
581
582 /**
583 * @return LanguageService
584 */
585 protected function getLanguageService()
586 {
587 return $GLOBALS['LANG'];
588 }
589 }