[BUGFIX] Linkvalidator: Links (from rte) not detected correctly 92/52592/10
authorThorben Nissen <thorben.nissen@kapp-hamburg.de>
Tue, 25 Apr 2017 07:15:11 +0000 (09:15 +0200)
committerBenni Mack <benni@typo3.org>
Thu, 15 Jun 2017 06:49:02 +0000 (08:49 +0200)
Due to the changed link syntax ("t3://...") instead of "<link ...>" the
links to files and internal pages are not recognised anymore. Furthermore
are external links now written with <a href="..."> and are therefore also
not recognized anymore.

Rewrite code for detecting "typolink_tags" correctly. When recognizing
urls, e.g. from "external url" pages or plain urls in texts, include https
and not only http and ftp.

Resolves: #80991
Resolves: #80988
Releases: master, 8.7
Change-Id: I5a9267b1d4ab53a6a39efdbd411e610dc9233ead
Reviewed-on: https://review.typo3.org/52592
Reviewed-by: Georg Ringer <georg.ringer@gmail.com>
Tested-by: Georg Ringer <georg.ringer@gmail.com>
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Christer V <cvi@systime.dk>
Tested-by: Christer V <cvi@systime.dk>
Reviewed-by: Benni Mack <benni@typo3.org>
Tested-by: Benni Mack <benni@typo3.org>
typo3/sysext/core/Classes/Database/SoftReferenceIndex.php
typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php
typo3/sysext/linkvalidator/Classes/Linktype/InternalLinktype.php

index 98beccd..8a12d24 100644 (file)
@@ -17,6 +17,7 @@ namespace TYPO3\CMS\Core\Database;
 use TYPO3\CMS\Core\Database\Query\Restriction\BackendWorkspaceRestriction;
 use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
 use TYPO3\CMS\Core\LinkHandling\LinkService;
+use TYPO3\CMS\Core\Resource\File;
 use TYPO3\CMS\Core\Utility\GeneralUtility;
 use TYPO3\CMS\Frontend\Service\TypoLinkCodecService;
 
@@ -246,7 +247,7 @@ class SoftReferenceIndex
 
     /**
      * TypoLink tag processing.
-     * Will search for <link ...> tags in the content string and process any found.
+     * Will search for <link ...> and <a> tags in the content string and process any found.
      *
      * @param string $content The input content to analyse
      * @param array $spParams Parameters set for the softref parser key in TCA/columns
@@ -257,14 +258,54 @@ class SoftReferenceIndex
     {
         // Parse string for special TYPO3 <link> tag:
         $htmlParser = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Html\HtmlParser::class);
-        $linkTags = $htmlParser->splitTags('link', $content);
+        $linkService = GeneralUtility::makeInstance(LinkService::class);
+        $linkTags = $htmlParser->splitTags('a', $content);
         // Traverse result:
         $elements = [];
-        foreach ($linkTags as $k => $foundValue) {
-            if ($k % 2) {
-                $typolinkValue = preg_replace('/<LINK[[:space:]]+/i', '', substr($foundValue, 0, -1));
-                $tLP = $this->getTypoLinkParts($typolinkValue);
-                $linkTags[$k] = '<LINK ' . $this->setTypoLinkPartsElement($tLP, $elements, $typolinkValue, $k) . '>';
+        foreach ($linkTags as $key => $foundValue) {
+            if ($key % 2) {
+                if (preg_match('/href="([^"]+)"/', $foundValue, $matches)) {
+                    try {
+                        $linkDetails = $linkService->resolve($matches[1]);
+                        if ($linkDetails['type'] === LinkService::TYPE_FILE && preg_match('/file\?uid=(\d+)/', $matches[1], $fileIdMatch)) {
+                            $token = $this->makeTokenID($key);
+                            $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]);
+                            $elements[$key] = $linkDetails;
+                            $elements[$key]['subst'] = [
+                                'type' => $linkDetails['type'],
+                                'relFileName' => $fileIdMatch[1],
+                                'tokenID' => $token,
+                                'tokenValue' => 'file:' . ($linkDetails['file'] instanceof File ? $linkDetails['file']->getUid() : $fileIdMatch[1])
+                            ];
+                        } elseif ($linkDetails['type'] === LinkService::TYPE_PAGE && preg_match('/page\?uid=(\d+)#?(\d+)?/', $matches[1], $pageAndAnchorMatches)) {
+                            $token = $this->makeTokenID($key);
+                            $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]);
+                            $elements[$key] = $linkDetails;
+                            $elements[$key]['subst'] = [
+                                'type' => 'db',
+                                'recordRef' => 'pages:' . $linkDetails['pageuid'] . (isset($pageAndAnchorMatches[2]) ? '#c' . $pageAndAnchorMatches[2] : ''),
+                                'tokenID' => $token,
+                                'tokenValue' => $linkDetails['pageuid'] . (isset($pageAndAnchorMatches[2]) ? '#c' . $pageAndAnchorMatches[2] : '')
+                            ];
+                        } elseif ($linkDetails['type'] === LinkService::TYPE_URL) {
+                            $token = $this->makeTokenID($key);
+                            $linkTags[$key] = str_replace($matches[1], '{softref:' . $token . '}', $linkTags[$key]);
+                            $elements[$key] = $linkDetails;
+                            $elements[$key]['subst'] = [
+                                'type' => 'external',
+                                'tokenID' => $token,
+                                'tokenValue' => $linkDetails['url']
+                            ];
+                        }
+                    } catch (\Exception $e) {
+                        // skip invalid links
+                    }
+                } else {
+                    // keep the legacy code for now
+                    $typolinkValue = preg_replace('/<LINK[[:space:]]+/i', '', substr($foundValue, 0, -1));
+                    $tLP = $this->getTypoLinkParts($typolinkValue);
+                    $linkTags[$k] = '<LINK ' . $this->setTypoLinkPartsElement($tLP, $elements, $typolinkValue, $k) . '>';
+                }
             }
         }
         // Return output:
@@ -323,7 +364,7 @@ class SoftReferenceIndex
     public function findRef_url($content, $spParams)
     {
         // URLs
-        $parts = preg_split('/([^[:alnum:]"\']+)((http|ftp):\\/\\/[^[:space:]"\'<>]*)([[:space:]])/', ' ' . $content . ' ', 10000, PREG_SPLIT_DELIM_CAPTURE);
+        $parts = preg_split('/([^[:alnum:]"\']+)((https?|ftp):\\/\\/[^[:space:]"\'<>]*)([[:space:]])/', ' ' . $content . ' ', 10000, PREG_SPLIT_DELIM_CAPTURE);
         foreach ($parts as $idx => $value) {
             if ($idx % 5 == 3) {
                 unset($parts[$idx]);
index a719a27..21d4545 100644 (file)
@@ -363,7 +363,7 @@ class LinkAnalyzer
     protected function analyseTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
     {
         $currentR = [];
-        $linkTags = $htmlParser->splitIntoBlock('link', $resultArray['content']);
+        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
         $idRecord = $record['uid'];
         $type = '';
         $title = '';
index b68fcbf..ffd47db 100644 (file)
@@ -173,15 +173,16 @@ class InternalLinktype extends AbstractLinktype
         $this->responseContent = true;
         // this content element exists
         if ($row) {
+            $page = (int)$page;
             // page ID on which this CE is in fact located.
-            $correctPageID = $row['pid'];
+            $correctPageID = (int)$row['pid'];
             // Check if the element is on the linked page
             // (The element might have been moved to another page)
-            if (!($correctPageID === $page)) {
+            if ($correctPageID !== $page) {
                 $this->errorParams['errorType']['content'] = self::MOVED;
                 $this->errorParams['content']['uid'] = (int)$anchor;
-                $this->errorParams['content']['wrongPage'] = (int)$page;
-                $this->errorParams['content']['rightPage'] = (int)$correctPageID;
+                $this->errorParams['content']['wrongPage'] = $page;
+                $this->errorParams['content']['rightPage'] = $correctPageID;
                 $this->responseContent = false;
             } else {
                 // The element is located on the page to which the link is pointing