0e36e5bcd67440ae8c4fc25e3c1fa34535169f70
[Packages/TYPO3.CMS.git] / typo3 / sysext / linkvalidator / Classes / LinkAnalyzer.php
1 <?php
2 namespace TYPO3\CMS\Linkvalidator;
3
4 /***************************************************************
5 * Copyright notice
6 *
7 * (c) 2010 - 2013 Michael Miousse (michael.miousse@infoglobe.ca)
8 * All rights reserved
9 *
10 * This script is part of the TYPO3 project. The TYPO3 project is
11 * free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * The GNU General Public License can be found at
17 * http://www.gnu.org/copyleft/gpl.html.
18 *
19 * This script is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * This copyright notice MUST APPEAR in all copies of the script!
25 ***************************************************************/
26
27 $GLOBALS['LANG']->includeLLFile('EXT:linkvalidator/modfuncreport/locallang.xml');
28
29 /**
30 * This class provides Processing plugin implementation
31 *
32 * @author Michael Miousse <michael.miousse@infoglobe.ca>
33 * @author Jochen Rieger <j.rieger@connecta.ag>
34 */
35 class LinkAnalyzer {
36
37 /**
38 * Array of tables and fields to search for broken links
39 *
40 * @var array
41 */
42 protected $searchFields = array();
43
44 /**
45 * List of comma separated page uids (rootline downwards)
46 *
47 * @var string
48 */
49 protected $pidList = '';
50
51 /**
52 * Array of tables and the number of external links they contain
53 *
54 * @var array
55 */
56 protected $linkCounts = array();
57
58 /**
59 * Array of tables and the number of broken external links they contain
60 *
61 * @var array
62 */
63 protected $brokenLinkCounts = array();
64
65 /**
66 * Array of tables and records containing broken links
67 *
68 * @var array
69 */
70 protected $recordsWithBrokenLinks = array();
71
72 /**
73 * Array for hooks for own checks
74 *
75 * @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype[]
76 */
77 protected $hookObjectsArr = array();
78
79 /**
80 * Array with information about the current page
81 *
82 * @var array
83 */
84 protected $extPageInTreeInfo = array();
85
86 /**
87 * Reference to the current element with table:uid, e.g. pages:85
88 *
89 * @var string
90 */
91 protected $recordReference = '';
92
93 /**
94 * Linked page together with a possible anchor, e.g. 85#c105
95 *
96 * @var string
97 */
98 protected $pageWithAnchor = '';
99
100 /**
101 * Fill hookObjectsArr with different link types and possible XClasses.
102 */
103 public function __construct() {
104 // Hook to handle own checks
105 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'])) {
106 foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] as $key => $classRef) {
107 $this->hookObjectsArr[$key] = \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($classRef);
108 }
109 }
110 }
111
112 /**
113 * Store all the needed configuration values in class variables
114 *
115 * @param array $searchField List of fields in which to search for links
116 * @param string $pid List of comma separated page uids in which to search for links
117 * @return void
118 */
119 public function init(array $searchField, $pid) {
120 $this->searchFields = $searchField;
121 $this->pidList = $pid;
122 }
123
124 /**
125 * Find all supported broken links and store them in tx_linkvalidator_link
126 *
127 * @param array $checkOptions List of hook object to activate
128 * @param boolean $considerHidden Defines whether to look into hidden fields
129 * @return void
130 */
131 public function getLinkStatistics($checkOptions = array(), $considerHidden = FALSE) {
132 $results = array();
133 if (count($checkOptions) > 0) {
134 $checkKeys = array_keys($checkOptions);
135 $checkLinkTypeCondition = ' AND link_type IN (\'' . implode('\',\'', $checkKeys) . '\')';
136 $GLOBALS['TYPO3_DB']->exec_DELETEquery(
137 'tx_linkvalidator_link',
138 '(record_pid IN (' . $this->pidList . ')' .
139 ' OR ( record_uid IN (' . $this->pidList . ') AND table_name like \'pages\'))' .
140 $checkLinkTypeCondition
141 );
142 // Traverse all configured tables
143 foreach ($this->searchFields as $table => $fields) {
144 if ($table === 'pages') {
145 $where = 'deleted = 0 AND uid IN (' . $this->pidList . ')';
146 } else {
147 $where = 'deleted = 0 AND pid IN (' . $this->pidList . ')';
148 }
149 if (!$considerHidden) {
150 $where .= \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields($table);
151 }
152 // If table is not configured, assume the extension is not installed
153 // and therefore no need to check it
154 if (!is_array($GLOBALS['TCA'][$table])) {
155 continue;
156 }
157 // Re-init selectFields for table
158 $selectFields = 'uid, pid';
159 $selectFields .= ', ' . $GLOBALS['TCA'][$table]['ctrl']['label'] . ', ' . implode(', ', $fields);
160 // TODO: only select rows that have content in at least one of the relevant fields (via OR)
161 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery($selectFields, $table, $where);
162 // Get record rows of table
163 while (($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) !== FALSE) {
164 // Analyse each record
165 $this->analyzeRecord($results, $table, $fields, $row);
166 }
167 $GLOBALS['TYPO3_DB']->sql_free_result($res);
168 }
169 foreach ($this->hookObjectsArr as $key => $hookObj) {
170 if (is_array($results[$key]) && empty($checkOptions) || is_array($results[$key]) && $checkOptions[$key]) {
171 // Check them
172 foreach ($results[$key] as $entryKey => $entryValue) {
173 $table = $entryValue['table'];
174 $record = array();
175 $record['headline'] = $entryValue['row'][$GLOBALS['TCA'][$table]['ctrl']['label']];
176 $record['record_pid'] = $entryValue['row']['pid'];
177 $record['record_uid'] = $entryValue['uid'];
178 $record['table_name'] = $table;
179 $record['link_title'] = $entryValue['link_title'];
180 $record['field'] = $entryValue['field'];
181 $record['last_check'] = time();
182 $this->recordReference = $entryValue['substr']['recordRef'];
183 $this->pageWithAnchor = $entryValue['pageAndAnchor'];
184 if (!empty($this->pageWithAnchor)) {
185 // Page with anchor, e.g. 18#1580
186 $url = $this->pageWithAnchor;
187 } else {
188 $url = $entryValue['substr']['tokenValue'];
189 }
190 $this->linkCounts[$table]++;
191 $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
192 // Broken link found
193 if (!$checkUrl) {
194 $response = array();
195 $response['valid'] = FALSE;
196 $response['errorParams'] = $hookObj->getErrorParams();
197 $this->brokenLinkCounts[$table]++;
198 $record['link_type'] = $key;
199 $record['url'] = $url;
200 $record['url_response'] = serialize($response);
201 $GLOBALS['TYPO3_DB']->exec_INSERTquery('tx_linkvalidator_link', $record);
202 } elseif (\TYPO3\CMS\Core\Utility\GeneralUtility::_GP('showalllinks')) {
203 $response = array();
204 $response['valid'] = TRUE;
205 $this->brokenLinkCounts[$table]++;
206 $record['url'] = $url;
207 $record['link_type'] = $key;
208 $record['url_response'] = serialize($response);
209 $GLOBALS['TYPO3_DB']->exec_INSERTquery('tx_linkvalidator_link', $record);
210 }
211 }
212 }
213 }
214 }
215 }
216
217 /**
218 * Find all supported broken links for a specific record
219 *
220 * @param array $results Array of broken links
221 * @param string $table Table name of the record
222 * @param array $fields Array of fields to analyze
223 * @param array $record Record to analyse
224 * @return void
225 */
226 public function analyzeRecord(array &$results, $table, array $fields, array $record) {
227 // Put together content of all relevant fields
228 $haystack = '';
229 /** @var \TYPO3\CMS\Core\Html\HtmlParser $htmlParser */
230 $htmlParser = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\Html\\HtmlParser');
231 $idRecord = $record['uid'];
232 // Get all references
233 foreach ($fields as $field) {
234 $haystack .= $record[$field] . ' --- ';
235 $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
236 $valueField = $record[$field];
237 // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
238 if ($conf['softref'] && strlen($valueField)) {
239 // Explode the list of soft references/parameters
240 $softRefs = \TYPO3\CMS\Backend\Utility\BackendUtility::explodeSoftRefParserList($conf['softref']);
241 // Traverse soft references
242 foreach ($softRefs as $spKey => $spParams) {
243 /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj Create or get the soft reference object */
244 $softRefObj = \TYPO3\CMS\Backend\Utility\BackendUtility::softRefParserObj($spKey);
245 // If there is an object returned...
246 if (is_object($softRefObj)) {
247 // Do processing
248 $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
249 if (!empty($resultArray['elements'])) {
250 if ($spKey == 'typolink_tag') {
251 $this->analyseTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
252 } else {
253 $this->analyseLinks($resultArray, $results, $record, $field, $table);
254 }
255 }
256 }
257 }
258 }
259 }
260 }
261
262 /**
263 * Find all supported broken links for a specific link list
264 *
265 * @param array $resultArray findRef parsed records
266 * @param array $results Array of broken links
267 * @param array $record UID of the current record
268 * @param string $field The current field
269 * @param string $table The current table
270 * @return void
271 */
272 protected function analyseLinks(array $resultArray, array &$results, array $record, $field, $table) {
273 foreach ($resultArray['elements'] as $element) {
274 $r = $element['subst'];
275 $type = '';
276 $idRecord = $record['uid'];
277 if (!empty($r)) {
278 /** @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype $hookObj */
279 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
280 $type = $hookObj->fetchType($r, $type, $keyArr);
281 // Store the type that was found
282 // This prevents overriding by internal validator
283 if (!empty($type)) {
284 $r['type'] = $type;
285 }
286 }
287 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
288 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
289 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
290 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
291 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
292 }
293 }
294 }
295
296 /**
297 * Find all supported broken links for a specific typoLink
298 *
299 * @param array $resultArray findRef parsed records
300 * @param array $results Array of broken links
301 * @param \TYPO3\CMS\Core\Html\HtmlParser $htmlParser Instance of html parser
302 * @param array $record The current record
303 * @param string $field The current field
304 * @param string $table The current table
305 * @return void
306 */
307 protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table) {
308 $currentR = array();
309 $linkTags = $htmlParser->splitIntoBlock('link', $resultArray['content']);
310 $idRecord = $record['uid'];
311 $type = '';
312 $title = '';
313 for ($i = 1; $i < count($linkTags); $i += 2) {
314 $referencedRecordType = '';
315 foreach ($resultArray['elements'] as $element) {
316 $type = '';
317 $r = $element['subst'];
318 if (!empty($r['tokenID'])) {
319 if (substr_count($linkTags[$i], $r['tokenID'])) {
320 // Type of referenced record
321 if (strpos($r['recordRef'], 'pages') !== FALSE) {
322 $currentR = $r;
323 // Contains number of the page
324 $referencedRecordType = $r['tokenValue'];
325 $wasPage = TRUE;
326 } elseif (strpos($r['recordRef'], 'tt_content') !== FALSE && (isset($wasPage) && $wasPage === TRUE)) {
327 $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
328 $wasPage = FALSE;
329 } else {
330 $currentR = $r;
331 }
332 $title = strip_tags($linkTags[$i]);
333 }
334 }
335 }
336 /** @var \TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype $hookObj */
337 foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
338 $type = $hookObj->fetchType($currentR, $type, $keyArr);
339 // Store the type that was found
340 // This prevents overriding by internal validator
341 if (!empty($type)) {
342 $currentR['type'] = $type;
343 }
344 }
345 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
346 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
347 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
348 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
349 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
350 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
351 $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
352 }
353 }
354
355 /**
356 * Fill a marker array with the number of links found in a list of pages
357 *
358 * @param string $curPage Comma separated list of page uids
359 * @return array Marker array with the number of links found
360 */
361 public function getLinkCounts($curPage) {
362 $markerArray = array();
363 if (empty($this->pidList)) {
364 $this->pidList = $curPage;
365 }
366 $this->pidList = rtrim($this->pidList, ',');
367 if (($res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
368 'count(uid) as nbBrokenLinks,link_type',
369 'tx_linkvalidator_link',
370 'record_pid in (' . $this->pidList . ')', 'link_type')
371 )) {
372 while (($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) !== FALSE) {
373 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
374 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
375 }
376 }
377 $GLOBALS['TYPO3_DB']->sql_free_result($res);
378 return $markerArray;
379 }
380
381 /**
382 * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
383 * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
384 * this is necessary to create the object that is used recursively by the original function.
385 *
386 * Generates a list of page uids from $id. List does not include $id itself.
387 * The only pages excluded from the list are deleted pages.
388 *
389 * @param integer $id Start page id
390 * @param integer $depth Depth to traverse down the page tree.
391 * @param integer $begin is an optional integer that determines at which
392 * @param string $permsClause Perms clause
393 * @param boolean $considerHidden Whether to consider hidden pages or not
394 * @return string Returns the list with a comma in the end (if any pages selected!)
395 */
396 public function extGetTreeList($id, $depth, $begin = 0, $permsClause, $considerHidden = FALSE) {
397 $depth = intval($depth);
398 $begin = intval($begin);
399 $id = intval($id);
400 $theList = '';
401 if ($depth > 0) {
402 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
403 'uid,title,hidden,extendToSubpages',
404 'pages', 'pid=' . $id . ' AND deleted=0 AND ' . $permsClause
405 );
406 while (($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) !== FALSE) {
407 if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden == 1)) {
408 $theList .= $row['uid'] . ',';
409 $this->extPageInTreeInfo[] = array($row['uid'], htmlspecialchars($row['title'], $depth));
410 }
411 if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden == 1)) {
412 $theList .= $this->extGetTreeList($row['uid'], $depth - 1, $begin - 1, $permsClause, $considerHidden);
413 }
414 }
415 $GLOBALS['TYPO3_DB']->sql_free_result($res);
416 }
417 return $theList;
418 }
419
420 /**
421 * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
422 * @return boolean TRUE if rootline contains a hidden page, FALSE if not
423 */
424 public function getRootLineIsHidden(array $pageInfo) {
425 $hidden = FALSE;
426 if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
427 $hidden = TRUE;
428 } else {
429 if ($pageInfo['pid'] > 0) {
430 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('uid,title,hidden,extendToSubpages', 'pages', 'uid=' . $pageInfo['pid']);
431 while (($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) !== FALSE) {
432 $hidden = $this->getRootLineIsHidden($row);
433 }
434 $GLOBALS['TYPO3_DB']->sql_free_result($res);
435 } else {
436 $hidden = FALSE;
437 }
438 }
439 return $hidden;
440 }
441
442 }
443 ?>