[BUGFIX] Cache results of explodeSoftRefParserList()
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / LinkAnalyzer.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use TYPO3\CMS\Backend\Utility\BackendUtility;
18 use TYPO3\CMS\Core\Html\HtmlParser;
19 use TYPO3\CMS\Core\Utility\GeneralUtility;
20 use TYPO3\CMS\Lang\LanguageService;
21
22 /**
23 * This class provides Processing plugin implementation
24 *
25 * @author Michael Miousse <michael.miousse@infoglobe.ca>
26 * @author Jochen Rieger <j.rieger@connecta.ag>
27 */
28 class LinkAnalyzer {
29
30 /**
31 * Array of tables and fields to search for broken links
32 *
33 * @var array
34 */
35 protected $searchFields = array();
36
37 /**
38 * List of comma separated page uids (rootline downwards)
39 *
40 * @var string
41 */
42 protected $pidList = '';
43
44 /**
45 * Array of tables and the number of external links they contain
46 *
47 * @var array
48 */
49 protected $linkCounts = array();
50
51 /**
52 * Array of tables and the number of broken external links they contain
53 *
54 * @var array
55 */
56 protected $brokenLinkCounts = array();
57
58 /**
59 * Array of tables and records containing broken links
60 *
61 * @var array
62 */
63 protected $recordsWithBrokenLinks = array();
64
65 /**
66 * Array for hooks for own checks
67 *
68 * @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype[]
69 */
70 protected $hookObjectsArr = array();
71
72 /**
73 * Array with information about the current page
74 *
75 * @var array
76 */
77 protected $extPageInTreeInfo = array();
78
79 /**
80 * Reference to the current element with table:uid, e.g. pages:85
81 *
82 * @var string
83 */
84 protected $recordReference = '';
85
86 /**
87 * Linked page together with a possible anchor, e.g. 85#c105
88 *
89 * @var string
90 */
91 protected $pageWithAnchor = '';
92
93 /**
94 * The currently active TSConfig. Will be passed to the init function.
95 *
96 * @var array
97 */
98 protected $tsConfig = array();
99
100 /**
101 * Fill hookObjectsArr with different link types and possible XClasses.
102 */
103 public function __construct() {
104 $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
105 // Hook to handle own checks
106 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'])) {
107 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] as $key => $classRef) {
108 $this->hookObjectsArr[$key] = GeneralUtility::getUserObj($classRef);
109 }
110 }
111 }
112
113 /**
114 * Store all the needed configuration values in class variables
115 *
116 * @param array $searchField List of fields in which to search for links
117 * @param string $pid List of comma separated page uids in which to search for links
118 * @param array $tsConfig The currently active TSConfig.
119 * @return void
120 */
121 public function init(array $searchField, $pid, $tsConfig) {
122 $this->searchFields = $searchField;
123 $this->pidList = $pid;
124 $this->tsConfig = $tsConfig;
125 }
126
127 /**
128 * Find all supported broken links and store them in tx_linkvalidator_link
129 *
130 * @param array $checkOptions List of hook object to activate
131 * @param bool $considerHidden Defines whether to look into hidden fields
132 * @return void
133 */
134 public function getLinkStatistics($checkOptions = array(), $considerHidden = FALSE) {
135 $results = array();
136 if (count($checkOptions) > 0) {
137 $checkKeys = array_keys($checkOptions);
138 $checkLinkTypeCondition = ' AND link_type IN (\'' . implode('\',\'', $checkKeys) . '\')';
139 $this->getDatabaseConnection()->exec_DELETEquery(
140 'tx_linkvalidator_link',
141 '(record_pid IN (' . $this->pidList . ')' .
142 ' OR ( record_uid IN (' . $this->pidList . ') AND table_name like \'pages\'))' .
143 $checkLinkTypeCondition
144 );
145 // Traverse all configured tables
146 foreach ($this->searchFields as $table => $fields) {
147 if ($table === 'pages') {
148 $where = 'deleted = 0 AND uid IN (' . $this->pidList . ')';
149 } else {
150 $where = 'deleted = 0 AND pid IN (' . $this->pidList . ')';
151 }
152 if (!$considerHidden) {
153 $where .= BackendUtility::BEenableFields($table);
154 }
155 // If table is not configured, assume the extension is not installed
156 // and therefore no need to check it
157 if (!is_array($GLOBALS['TCA'][$table])) {
158 continue;
159 }
160 // Re-init selectFields for table
161 $selectFields = 'uid, pid';
162 $selectFields .= ', ' . $GLOBALS['TCA'][$table]['ctrl']['label'] . ', ' . implode(', ', $fields);
163
164 // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
165 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows($selectFields, $table, $where);
166 if (!empty($rows)) {
167 foreach ($rows as $row) {
168 $this->analyzeRecord($results, $table, $fields, $row);
169 }
170 }
171 }
172 foreach ($this->hookObjectsArr as $key => $hookObj) {
173 if (is_array($results[$key]) && empty($checkOptions) || is_array($results[$key]) && $checkOptions[$key]) {
174 // Check them
175 foreach ($results[$key] as $entryKey => $entryValue) {
176 $table = $entryValue['table'];
177 $record = array();
178 $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
179 $record['record_pid'] = $entryValue['row']['pid'];
180 $record['record_uid'] = $entryValue['uid'];
181 $record['table_name'] = $table;
182 $record['link_title'] = $entryValue['link_title'];
183 $record['field'] = $entryValue['field'];
184 $record['last_check'] = time();
185 $this->recordReference = $entryValue['substr']['recordRef'];
186 $this->pageWithAnchor = $entryValue['pageAndAnchor'];
187 if (!empty($this->pageWithAnchor)) {
188 // Page with anchor, e.g. 18#1580
189 $url = $this->pageWithAnchor;
190 } else {
191 $url = $entryValue['substr']['tokenValue'];
192 }
193 $this->linkCounts[$table]++;
194 $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
195 // Broken link found
196 if (!$checkUrl) {
197 $response = array();
198 $response['valid'] = FALSE;
199 $response['errorParams'] = $hookObj->getErrorParams();
200 $this->brokenLinkCounts[$table]++;
201 $record['link_type'] = $key;
202 $record['url'] = $url;
203 $record['url_response'] = serialize($response);
204 $this->getDatabaseConnection()->exec_INSERTquery('tx_linkvalidator_link', $record);
205 } elseif (GeneralUtility::_GP('showalllinks')) {
206 $response = array();
207 $response['valid'] = TRUE;
208 $this->brokenLinkCounts[$table]++;
209 $record['url'] = $url;
210 $record['link_type'] = $key;
211 $record['url_response'] = serialize($response);
212 $this->getDatabaseConnection()->exec_INSERTquery('tx_linkvalidator_link', $record);
213 }
214 }
215 }
216 }
217 }
218 }
219
220 /**
221 * Find all supported broken links for a specific record
222 *
223 * @param array $results Array of broken links
224 * @param string $table Table name of the record
225 * @param array $fields Array of fields to analyze
226 * @param array $record Record to analyse
227 * @return void
228 */
229 public function analyzeRecord(array &$results, $table, array $fields, array $record) {
230 // Put together content of all relevant fields
231 $haystack = '';
232 /** @var $htmlParser HtmlParser */
233 $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
234 $idRecord = $record['uid'];
235 // Get all references
236 foreach ($fields as $field) {
237 $haystack .= $record[$field] . ' --- ';
238 $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
239 $valueField = $record[$field];
240 // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
241 if ($conf['softref'] && (string)$valueField !== '') {
242 // Explode the list of soft references/parameters
243 $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
244 if ($softRefs !== FALSE) {
245 // Traverse soft references
246 foreach ($softRefs as $spKey => $spParams) {
247 /** @var $softRefObj \TYPO3\CMS\Core\Database\SoftReferenceIndex */
248 $softRefObj = BackendUtility::softRefParserObj($spKey);
249 // If there is an object returned...
250 if (is_object($softRefObj)) {
251 // Do processing
252 $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
253 if (!empty($resultArray['elements'])) {
254 if ($spKey == 'typolink_tag') {
255 $this->analyseTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
256 } else {
257 $this->analyseLinks($resultArray, $results, $record, $field, $table);
258 }
259 }
260 }
261 }
262 }
263 }
264 }
265 }
266
267 /**
268 * Returns the TSConfig that was passed to the init() method.
269 *
270 * This can be used by link checkers that get a reference of this
271 * object passed to the checkLink() method.
272 *
273 * @return array
274 */
275 public function getTSConfig() {
276 return $this->tsConfig;
277 }
278
279 /**
280 * Find all supported broken links for a specific link list
281 *
282 * @param array $resultArray findRef parsed records
283 * @param array $results Array of broken links
284 * @param array $record UID of the current record
285 * @param string $field The current field
286 * @param string $table The current table
287 * @return void
288 */
289 protected function analyseLinks(array $resultArray, array &$results, array $record, $field, $table) {
290 foreach ($resultArray['elements'] as $element) {
291 $r = $element['subst'];
292 $type = '';
293 $idRecord = $record['uid'];
294 if (!empty($r)) {
295 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
296 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
297 $type = $hookObj->fetchType($r, $type, $keyArr);
298 // Store the type that was found
299 // This prevents overriding by internal validator
300 if (!empty($type)) {
301 $r['type'] = $type;
302 }
303 }
304 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
305 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
306 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
307 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
308 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
309 }
310 }
311 }
312
313 /**
314 * Find all supported broken links for a specific typoLink
315 *
316 * @param array $resultArray findRef parsed records
317 * @param array $results Array of broken links
318 * @param HtmlParser $htmlParser Instance of html parser
319 * @param array $record The current record
320 * @param string $field The current field
321 * @param string $table The current table
322 * @return void
323 */
324 protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table) {
325 $currentR = array();
326 $linkTags = $htmlParser->splitIntoBlock('link', $resultArray['content']);
327 $idRecord = $record['uid'];
328 $type = '';
329 $title = '';
330 $countLinkTags = count($linkTags);
331 for ($i = 1; $i < $countLinkTags; $i += 2) {
332 $referencedRecordType = '';
333 foreach ($resultArray['elements'] as $element) {
334 $type = '';
335 $r = $element['subst'];
336 if (!empty($r['tokenID'])) {
337 if (substr_count($linkTags[$i], $r['tokenID'])) {
338 // Type of referenced record
339 if (strpos($r['recordRef'], 'pages') !== FALSE) {
340 $currentR = $r;
341 // Contains number of the page
342 $referencedRecordType = $r['tokenValue'];
343 $wasPage = TRUE;
344 } elseif (strpos($r['recordRef'], 'tt_content') !== FALSE && (isset($wasPage) && $wasPage === TRUE)) {
345 $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
346 $wasPage = FALSE;
347 } else {
348 $currentR = $r;
349 }
350 $title = strip_tags($linkTags[$i]);
351 }
352 }
353 }
354 /** @var $hookObj \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype */
355 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
356 $type = $hookObj->fetchType($currentR, $type, $keyArr);
357 // Store the type that was found
358 // This prevents overriding by internal validator
359 if (!empty($type)) {
360 $currentR['type'] = $type;
361 }
362 }
363 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
364 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
365 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
366 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
367 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
368 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
369 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
370 }
371 }
372
373 /**
374 * Fill a marker array with the number of links found in a list of pages
375 *
376 * @param string $curPage Comma separated list of page uids
377 * @return array Marker array with the number of links found
378 */
379 public function getLinkCounts($curPage) {
380 $markerArray = array();
381 if (empty($this->pidList)) {
382 $this->pidList = $curPage;
383 }
384 $this->pidList = rtrim($this->pidList, ',');
385
386 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows(
387 'count(uid) as nbBrokenLinks,link_type',
388 'tx_linkvalidator_link',
389 'record_pid in (' . $this->pidList . ')',
390 'link_type'
391 );
392 if (!empty($rows)) {
393 foreach ($rows as $row) {
394 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
395 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
396 }
397 }
398 return $markerArray;
399 }
400
401 /**
402 * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
403 * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
404 * this is necessary to create the object that is used recursively by the original function.
405 *
406 * Generates a list of page uids from $id. List does not include $id itself.
407 * The only pages excluded from the list are deleted pages.
408 *
409 * @param int $id Start page id
410 * @param int $depth Depth to traverse down the page tree.
411 * @param int $begin is an optional integer that determines at which
412 * @param string $permsClause Perms clause
413 * @param bool $considerHidden Whether to consider hidden pages or not
414 * @return string Returns the list with a comma in the end (if any pages selected!)
415 */
416 public function extGetTreeList($id, $depth, $begin = 0, $permsClause, $considerHidden = FALSE) {
417 $depth = (int)$depth;
418 $begin = (int)$begin;
419 $id = (int)$id;
420 $theList = '';
421 if ($depth > 0) {
422 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows(
423 'uid,title,hidden,extendToSubpages',
424 'pages',
425 'pid=' . $id . ' AND deleted=0 AND ' . $permsClause
426 );
427 if (!empty($rows)) {
428 foreach ($rows as $row) {
429 if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
430 $theList .= $row['uid'] . ',';
431 $this->extPageInTreeInfo[] = array($row['uid'], htmlspecialchars($row['title'], $depth));
432 }
433 if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
434 $theList .= $this->extGetTreeList($row['uid'], $depth - 1, $begin - 1, $permsClause, $considerHidden);
435 }
436 }
437 }
438 }
439 return $theList;
440 }
441
442 /**
443 * Check if rootline contains a hidden page
444 *
445 * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
446 * @return bool TRUE if rootline contains a hidden page, FALSE if not
447 */
448 public function getRootLineIsHidden(array $pageInfo) {
449 $hidden = FALSE;
450 if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
451 $hidden = TRUE;
452 } else {
453 if ($pageInfo['pid'] > 0) {
454 $rows = $this->getDatabaseConnection()->exec_SELECTgetRows(
455 'uid,title,hidden,extendToSubpages',
456 'pages',
457 'uid=' . $pageInfo['pid']
458 );
459 if (!empty($rows)) {
460 foreach ($rows as $row) {
461 $hidden = $this->getRootLineIsHidden($row);
462 }
463 }
464 }
465 }
466 return $hidden;
467 }
468
469 /**
470 * @return \TYPO3\CMS\Core\Database\DatabaseConnection
471 */
472 protected function getDatabaseConnection() {
473 return $GLOBALS['TYPO3_DB'];
474 }
475
476 /**
477 * @return LanguageService
478 */
479 protected function getLanguageService() {
480 return $GLOBALS['LANG'];
481 }
482 }