[BUGFIX] Use BackendUtility::deleteClause in LinkAnalyzer
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / LinkAnalyzer.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Html\HtmlParser;
19 use TYPO3\CMS\Core\Utility\GeneralUtility;
20 use TYPO3\CMS\Lang\LanguageService;
21
22 /**
23 * This class provides Processing plugin implementation
24 */
25 class LinkAnalyzer
26 {
27 /**
28 * Array of tables and fields to search for broken links
29 *
30 * @var array
31 */
32 protected $searchFields = array();
33
34 /**
35 * List of comma separated page uids (rootline downwards)
36 *
37 * @var string
38 */
39 protected $pidList = '';
40
41 /**
42 * Array of tables and the number of external links they contain
43 *
44 * @var array
45 */
46 protected $linkCounts = array();
47
48 /**
49 * Array of tables and the number of broken external links they contain
50 *
51 * @var array
52 */
53 protected $brokenLinkCounts = array();
54
55 /**
56 * Array of tables and records containing broken links
57 *
58 * @var array
59 */
60 protected $recordsWithBrokenLinks = array();
61
62 /**
63 * Array for hooks for own checks
64 *
65 * @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype[]
66 */
67 protected $hookObjectsArr = array();
68
69 /**
70 * Array with information about the current page
71 *
72 * @var array
73 */
74 protected $extPageInTreeInfo = array();
75
76 /**
77 * Reference to the current element with table:uid, e.g. pages:85
78 *
79 * @var string
80 */
81 protected $recordReference = '';
82
83 /**
84 * Linked page together with a possible anchor, e.g. 85#c105
85 *
86 * @var string
87 */
88 protected $pageWithAnchor = '';
89
90 /**
91 * The currently active TSConfig. Will be passed to the init function.
92 *
93 * @var array
94 */
95 protected $tsConfig = array();
96
97 /**
98 * Fill hookObjectsArr with different link types and possible XClasses.
99 */
100 public function __construct()
101 {
102 $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
103 // Hook to handle own checks
104 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'])) {
105 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] as $key => $classRef) {
106 $this->hookObjectsArr[$key] = GeneralUtility::getUserObj($classRef);
107 }
108 }
109 }
110
111 /**
112 * Store all the needed configuration values in class variables
113 *
114 * @param array $searchField List of fields in which to search for links
115 * @param string $pid List of comma separated page uids in which to search for links
116 * @param array $tsConfig The currently active TSConfig.
117 * @return void
118 */
119 public function init(array $searchField, $pid, $tsConfig)
120 {
121 $this->searchFields = $searchField;
122 $this->pidList = $pid;
123 $this->tsConfig = $tsConfig;
124 }
125
126 /**
127 * Find all supported broken links and store them in tx_linkvalidator_link
128 *
129 * @param array $checkOptions List of hook object to activate
130 * @param bool $considerHidden Defines whether to look into hidden fields
131 * @return void
132 */
133 public function getLinkStatistics($checkOptions = array(), $considerHidden = false)
134 {
135 $results = array();
136 if (!empty($checkOptions)) {
137 $checkKeys = array_keys($checkOptions);
138 $checkLinkTypeCondition = ' AND link_type IN (\'' . implode('\',\'', $checkKeys) . '\')';
139 $this->getDatabaseConnection()->exec_DELETEquery(
140 'tx_linkvalidator_link',
141 '(record_pid IN (' . $this->pidList . ')' .
142 ' OR ( record_uid IN (' . $this->pidList . ') AND table_name like \'pages\'))' .
143 $checkLinkTypeCondition
144 );
145 // Traverse all configured tables
146 foreach ($this->searchFields as $table => $fields) {
147 if ($table === 'pages') {
148 $where = 'uid IN (' . $this->pidList . ')';
149 } else {
150 $where = 'pid IN (' . $this->pidList . ')';
151 }
152 $where .= BackendUtility::deleteClause($table);
153 if (!$considerHidden) {
154 $where .= BackendUtility::BEenableFields($table);
155 }
156 // If table is not configured, assume the extension is not installed
157 // and therefore no need to check it
158 if (!is_array($GLOBALS['TCA'][$table])) {
159 continue;
160 }
161 // Re-init selectFields for table
162 $selectFields = 'uid, pid';
163 $selectFields .= ', ' . $GLOBALS['TCA'][$table]['ctrl']['label'] . ', ' . implode(', ', $fields);
164
165 // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
166 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows($selectFields, $table, $where);
167 if (!empty($rows)) {
168 foreach ($rows as $row) {
169 $this->analyzeRecord($results, $table, $fields, $row);
170 }
171 }
172 }
173 foreach ($this->hookObjectsArr as $key => $hookObj) {
174 if (is_array($results[$key]) && empty($checkOptions) || is_array($results[$key]) && $checkOptions[$key]) {
175 // Check them
176 foreach ($results[$key] as $entryKey => $entryValue) {
177 $table = $entryValue['table'];
178 $record = array();
179 $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
180 $record['record_pid'] = $entryValue['row']['pid'];
181 $record['record_uid'] = $entryValue['uid'];
182 $record['table_name'] = $table;
183 $record['link_title'] = $entryValue['link_title'];
184 $record['field'] = $entryValue['field'];
185 $record['last_check'] = time();
186 $this->recordReference = $entryValue['substr']['recordRef'];
187 $this->pageWithAnchor = $entryValue['pageAndAnchor'];
188 if (!empty($this->pageWithAnchor)) {
189 // Page with anchor, e.g. 18#1580
190 $url = $this->pageWithAnchor;
191 } else {
192 $url = $entryValue['substr']['tokenValue'];
193 }
194 $this->linkCounts[$table]++;
195 $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
196 // Broken link found
197 if (!$checkUrl) {
198 $response = array();
199 $response['valid'] = false;
200 $response['errorParams'] = $hookObj->getErrorParams();
201 $this->brokenLinkCounts[$table]++;
202 $record['link_type'] = $key;
203 $record['url'] = $url;
204 $record['url_response'] = serialize($response);
205 $this->getDatabaseConnection()->exec_INSERTquery('tx_linkvalidator_link', $record);
206 } elseif (GeneralUtility::_GP('showalllinks')) {
207 $response = array();
208 $response['valid'] = true;
209 $this->brokenLinkCounts[$table]++;
210 $record['url'] = $url;
211 $record['link_type'] = $key;
212 $record['url_response'] = serialize($response);
213 $this->getDatabaseConnection()->exec_INSERTquery('tx_linkvalidator_link', $record);
214 }
215 }
216 }
217 }
218 }
219 }
220
221 /**
222 * Find all supported broken links for a specific record
223 *
224 * @param array $results Array of broken links
225 * @param string $table Table name of the record
226 * @param array $fields Array of fields to analyze
227 * @param array $record Record to analyse
228 * @return void
229 */
230 public function analyzeRecord(array &$results, $table, array $fields, array $record)
231 {
232 list($results, $record) = $this->emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields);
233
234 // Put together content of all relevant fields
235 $haystack = '';
236 /** @var $htmlParser HtmlParser */
237 $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
238 $idRecord = $record['uid'];
239 // Get all references
240 foreach ($fields as $field) {
241 $haystack .= $record[$field] . ' --- ';
242 $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
243 $valueField = $record[$field];
244 // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
245 if ($conf['softref'] && (string)$valueField !== '') {
246 // Explode the list of soft references/parameters
247 $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
248 if ($softRefs !== false) {
249 // Traverse soft references
250 foreach ($softRefs as $spKey => $spParams) {
251 /** @var $softRefObj \TYPO3\CMS\Core\Database\SoftReferenceIndex */
252 $softRefObj = BackendUtility::softRefParserObj($spKey);
253 // If there is an object returned...
254 if (is_object($softRefObj)) {
255 // Do processing
256 $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
257 if (!empty($resultArray['elements'])) {
258 if ($spKey == 'typolink_tag') {
259 $this->analyseTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
260 } else {
261 $this->analyseLinks($resultArray, $results, $record, $field, $table);
262 }
263 }
264 }
265 }
266 }
267 }
268 }
269 }
270
271 /**
272 * Returns the TSConfig that was passed to the init() method.
273 *
274 * This can be used by link checkers that get a reference of this
275 * object passed to the checkLink() method.
276 *
277 * @return array
278 */
279 public function getTSConfig()
280 {
281 return $this->tsConfig;
282 }
283
284 /**
285 * Find all supported broken links for a specific link list
286 *
287 * @param array $resultArray findRef parsed records
288 * @param array $results Array of broken links
289 * @param array $record UID of the current record
290 * @param string $field The current field
291 * @param string $table The current table
292 * @return void
293 */
294 protected function analyseLinks(array $resultArray, array &$results, array $record, $field, $table)
295 {
296 foreach ($resultArray['elements'] as $element) {
297 $r = $element['subst'];
298 $type = '';
299 $idRecord = $record['uid'];
300 if (!empty($r)) {
301 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
302 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
303 $type = $hookObj->fetchType($r, $type, $keyArr);
304 // Store the type that was found
305 // This prevents overriding by internal validator
306 if (!empty($type)) {
307 $r['type'] = $type;
308 }
309 }
310 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
311 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
312 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
313 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
314 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
315 }
316 }
317 }
318
319 /**
320 * Find all supported broken links for a specific typoLink
321 *
322 * @param array $resultArray findRef parsed records
323 * @param array $results Array of broken links
324 * @param HtmlParser $htmlParser Instance of html parser
325 * @param array $record The current record
326 * @param string $field The current field
327 * @param string $table The current table
328 * @return void
329 */
330 protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
331 {
332 $currentR = array();
333 $linkTags = $htmlParser->splitIntoBlock('link', $resultArray['content']);
334 $idRecord = $record['uid'];
335 $type = '';
336 $title = '';
337 $countLinkTags = count($linkTags);
338 for ($i = 1; $i < $countLinkTags; $i += 2) {
339 $referencedRecordType = '';
340 foreach ($resultArray['elements'] as $element) {
341 $type = '';
342 $r = $element['subst'];
343 if (!empty($r['tokenID'])) {
344 if (substr_count($linkTags[$i], $r['tokenID'])) {
345 // Type of referenced record
346 if (strpos($r['recordRef'], 'pages') !== false) {
347 $currentR = $r;
348 // Contains number of the page
349 $referencedRecordType = $r['tokenValue'];
350 $wasPage = true;
351 } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
352 $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
353 $wasPage = false;
354 } else {
355 $currentR = $r;
356 }
357 $title = strip_tags($linkTags[$i]);
358 }
359 }
360 }
361 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
362 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
363 $type = $hookObj->fetchType($currentR, $type, $keyArr);
364 // Store the type that was found
365 // This prevents overriding by internal validator
366 if (!empty($type)) {
367 $currentR['type'] = $type;
368 }
369 }
370 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
371 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
372 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
373 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
374 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
375 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
376 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
377 }
378 }
379
380 /**
381 * Fill a marker array with the number of links found in a list of pages
382 *
383 * @param string $curPage Comma separated list of page uids
384 * @return array Marker array with the number of links found
385 */
386 public function getLinkCounts($curPage)
387 {
388 $markerArray = array();
389 if (empty($this->pidList)) {
390 $this->pidList = $curPage;
391 }
392 $this->pidList = rtrim($this->pidList, ',');
393
394 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows(
395 'count(uid) as nbBrokenLinks,link_type',
396 'tx_linkvalidator_link',
397 'record_pid in (' . $this->pidList . ')',
398 'link_type'
399 );
400 if (!empty($rows)) {
401 foreach ($rows as $row) {
402 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
403 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
404 }
405 }
406 return $markerArray;
407 }
408
409 /**
410 * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
411 * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
412 * this is necessary to create the object that is used recursively by the original function.
413 *
414 * Generates a list of page uids from $id. List does not include $id itself.
415 * The only pages excluded from the list are deleted pages.
416 *
417 * @param int $id Start page id
418 * @param int $depth Depth to traverse down the page tree.
419 * @param int $begin is an optional integer that determines at which
420 * @param string $permsClause Perms clause
421 * @param bool $considerHidden Whether to consider hidden pages or not
422 * @return string Returns the list with a comma in the end (if any pages selected!)
423 */
424 public function extGetTreeList($id, $depth, $begin = 0, $permsClause, $considerHidden = false)
425 {
426 $depth = (int)$depth;
427 $begin = (int)$begin;
428 $id = (int)$id;
429 $theList = '';
430 if ($depth > 0) {
431 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows(
432 'uid,title,hidden,extendToSubpages',
433 'pages',
434 'pid=' . $id . ' AND deleted=0 AND ' . $permsClause
435 );
436 if (!empty($rows)) {
437 foreach ($rows as $row) {
438 if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
439 $theList .= $row['uid'] . ',';
440 $this->extPageInTreeInfo[] = array($row['uid'], htmlspecialchars($row['title'], $depth));
441 }
442 if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
443 $theList .= $this->extGetTreeList($row['uid'], $depth - 1, $begin - 1, $permsClause, $considerHidden);
444 }
445 }
446 }
447 }
448 return $theList;
449 }
450
451 /**
452 * Check if rootline contains a hidden page
453 *
454 * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
455 * @return bool TRUE if rootline contains a hidden page, FALSE if not
456 */
457 public function getRootLineIsHidden(array $pageInfo)
458 {
459 $hidden = false;
460 if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
461 $hidden = true;
462 } else {
463 if ($pageInfo['pid'] > 0) {
464 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows(
465 'uid,title,hidden,extendToSubpages',
466 'pages',
467 'uid=' . $pageInfo['pid']
468 );
469 if (!empty($rows)) {
470 foreach ($rows as $row) {
471 $hidden = $this->getRootLineIsHidden($row);
472 }
473 }
474 }
475 }
476 return $hidden;
477 }
478
479 /**
480 * Emits a signal before the record is analyzed
481 *
482 * @param array $results Array of broken links
483 * @param array $record Record to analyse
484 * @param string $table Table name of the record
485 * @param array $fields Array of fields to analyze
486 * @return array
487 */
488 protected function emitBeforeAnalyzeRecordSignal($results, $record, $table, $fields)
489 {
490 return $this->getSignalSlotDispatcher()->dispatch(
491 self::class,
492 'beforeAnalyzeRecord',
493 array($results, $record, $table, $fields, $this)
494 );
495 }
496
497 /**
498 * @return \TYPO3\CMS\Extbase\SignalSlot\Dispatcher
499 */
500 protected function getSignalSlotDispatcher()
501 {
502 return $this->getObjectManager()->get(\TYPO3\CMS\Extbase\SignalSlot\Dispatcher::class);
503 }
504
505 /**
506 * @return \TYPO3\CMS\Extbase\Object\ObjectManager
507 */
508 protected function getObjectManager()
509 {
510 return GeneralUtility::makeInstance(\TYPO3\CMS\Extbase\Object\ObjectManager::class);
511 }
512
513 /**
514 * @return \TYPO3\CMS\Core\Database\DatabaseConnection
515 */
516 protected function getDatabaseConnection()
517 {
518 return $GLOBALS['TYPO3_DB'];
519 }
520
521 /**
522 * @return LanguageService
523 */
524 protected function getLanguageService()
525 {
526 return $GLOBALS['LANG'];
527 }
528 }