[!!!][TASK] Migrate Lowlevel "double_files" command to Symfony Console
[Packages/TYPO3.CMS.git] / typo3 / sysext / lowlevel / Classes / Command / FilesWithMultipleReferencesCommand.php
1 <?php
2 declare(strict_types=1);
3 namespace TYPO3\CMS\Lowlevel\Command;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Symfony\Component\Console\Command\Command;
19 use Symfony\Component\Console\Input\InputInterface;
20 use Symfony\Component\Console\Input\InputOption;
21 use Symfony\Component\Console\Output\OutputInterface;
22 use Symfony\Component\Console\Style\SymfonyStyle;
23 use TYPO3\CMS\Core\Database\ConnectionPool;
24 use TYPO3\CMS\Core\Database\ReferenceIndex;
25 use TYPO3\CMS\Core\Utility\ArrayUtility;
26 use TYPO3\CMS\Core\Utility\File\BasicFileUtility;
27 use TYPO3\CMS\Core\Utility\GeneralUtility;
28 use TYPO3\CMS\Core\Utility\PathUtility;
29
30 /**
31 * Finds files within uploads/ which are used multiple times by relations within the database
32 */
33 class FilesWithMultipleReferencesCommand extends Command
34 {
35
36 /**
37 * Configure the command by defining the name, options and arguments
38 */
39 public function configure()
40 {
41 $this
42 ->setDescription('Looking for files from TYPO3 managed records which are referenced more than once')
43 ->setHelp('
44 Assumptions:
45 - a perfect integrity of the reference index table (always update the reference index table before using this tool!)
46 - files found in deleted records are included (otherwise you would see a false list of lost files)
47
48 Files attached to records in TYPO3 using a "group" type configuration in TCA or FlexForm DataStructure are managed exclusively by the system and there must always exist a 1-1 reference between the file and the reference in the record.
49 This tool will expose when such files are referenced from multiple locations which is considered an integrity error.
50 If a multi-reference is found it was typically created because the record was copied or modified outside of DataHandler which will otherwise maintain the relations correctly.
51 Multi-references should be resolved to 1-1 references as soon as possible. The danger of keeping multi-references is that if the file is removed from one of the referring records it will actually be deleted in the file system, leaving missing files for the remaining referers!
52
53 If the option "--dry-run" is not set, the files that are referenced multiple times are copied with a new name
54 and the references are updated accordingly.
55 Warning: First, make sure those files are not used somewhere TYPO3 does not know about!
56
57 If you want to get more detailed information, use the --verbose option.')
58 ->addOption(
59 'dry-run',
60 null,
61 InputOption::VALUE_NONE,
62 'If this option is set, the files will not actually be deleted, but just the output which files would be deleted are shown'
63 )
64 ->addOption(
65 'update-refindex',
66 null,
67 InputOption::VALUE_NONE,
68 'Setting this option automatically updates the reference index and does not ask on command line. Alternatively, use -n to avoid the interactive mode'
69 );
70 }
71
72 /**
73 * Executes the command to
74 * - optionally update the reference index (to have clean data)
75 * - find files within the reference index which are referenced more than once
76 * - copy these files if --dry-run is not set and update the references accordingly
77 *
78 * @param InputInterface $input
79 * @param OutputInterface $output
80 *
81 * @return void
82 */
83 protected function execute(InputInterface $input, OutputInterface $output)
84 {
85 $io = new SymfonyStyle($input, $output);
86 $io->title($this->getDescription());
87
88 $dryRun = $input->hasOption('dry-run') && $input->getOption('dry-run') != false ? true : false;
89
90 $this->updateReferenceIndex($input, $io);
91
92 // Find files which are referenced multiple times
93 $doubleFiles = $this->findMultipleReferencedFiles();
94
95 if (count($doubleFiles)) {
96 if (!$io->isQuiet()) {
97 $io->note('Found ' . count($doubleFiles) . ' files that are referenced more than once.');
98 if ($io->isVerbose()) {
99 $io->listing($doubleFiles);
100 }
101 }
102
103 $this->copyMultipleReferencedFiles($doubleFiles, $dryRun, $io);
104 $io->success('Cleaned up ' . count($doubleFiles) . ' files which have been referenced multiple times.');
105 } else {
106 $io->success('Nothing to do, no files found which are referenced more than once.');
107 }
108 }
109
110 /**
111 * Function to update the reference index
112 * - if the option --update-refindex is set, do it
113 * - otherwise, if in interactive mode (not having -n set), ask the user
114 * - otherwise assume everything is fine
115 *
116 * @param InputInterface $input holds information about entered parameters
117 * @param SymfonyStyle $io necessary for outputting information
118 * @return void
119 */
120 protected function updateReferenceIndex(InputInterface $input, SymfonyStyle $io)
121 {
122 // Check for reference index to update
123 $io->note('Finding files referenced multiple times in records managed by TYPO3 requires a clean reference index (sys_refindex)');
124 $updateReferenceIndex = false;
125 if ($input->hasOption('update-refindex') && $input->getOption('update-refindex')) {
126 $updateReferenceIndex = true;
127 } elseif ($input->isInteractive()) {
128 $updateReferenceIndex = $io->confirm('Should the reference index be updated right now?', false);
129 }
130
131 // Update the reference index
132 if ($updateReferenceIndex) {
133 $referenceIndex = GeneralUtility::makeInstance(ReferenceIndex::class);
134 $referenceIndex->updateIndex(false, !$io->isQuiet());
135 } else {
136 $io->writeln('Reference index is assumed to be up to date, continuing.');
137 }
138 }
139
140 /**
141 * Find files which are referenced multiple times in uploads/ folder
142 *
143 * @return array an array of files and their reference hashes that are referenced multiple times
144 */
145 protected function findMultipleReferencedFiles(): array
146 {
147 $multipleReferencesList = [];
148
149 // Select all files in the reference table not found by a soft reference parser (thus TCA configured)
150 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
151 ->getQueryBuilderForTable('sys_refindex');
152
153 $result = $queryBuilder
154 ->select('*')
155 ->from('sys_refindex')
156 ->where(
157 $queryBuilder->expr()->eq('ref_table', $queryBuilder->createNamedParameter('_FILE', \PDO::PARAM_STR)),
158 $queryBuilder->expr()->eq('softref_key', $queryBuilder->createNamedParameter('', \PDO::PARAM_STR))
159 )
160 ->execute();
161
162 // Traverse the files and put into a large table
163 $allReferencesToFiles = [];
164 while ($record = $result->fetch()) {
165 // Compile info string for location of reference
166 $infoString = $this->formatReferenceIndexEntryToString($record);
167 $hash = $record['hash'];
168 $fileName = $record['ref_string'];
169 // Add entry if file has multiple references pointing to it
170 if (isset($allReferencesToFiles[$fileName])) {
171 if (!is_array($multipleReferencesList[$fileName])) {
172 $multipleReferencesList[$fileName] = [];
173 $multipleReferencesList[$fileName][$allReferencesToFiles[$fileName]['hash']] = $allReferencesToFiles[$fileName]['infoString'];
174 }
175 $multipleReferencesList[$fileName][$hash] = $infoString;
176 } else {
177 $allReferencesToFiles[$fileName] = [
178 'infoString' => $infoString,
179 'hash' => $hash
180 ];
181 }
182 }
183
184 return ArrayUtility::sortByKeyRecursive($multipleReferencesList);
185 }
186
187 /**
188 * Copies files which are referenced multiple times and updates the reference index so they are only used once
189 *
190 * @param array $multipleReferencesToFiles Contains files which have been referenced multiple times
191 * @param bool $dryRun if set, the info is just displayed, but no files are copied nor reference index updated
192 * @param SymfonyStyle $io the IO object for output
193 * @return void
194 */
195 protected function copyMultipleReferencedFiles(array $multipleReferencesToFiles, bool $dryRun, SymfonyStyle $io)
196 {
197 $fileFunc = GeneralUtility::makeInstance(BasicFileUtility::class);
198 $referenceIndex = GeneralUtility::makeInstance(ReferenceIndex::class);
199
200 foreach ($multipleReferencesToFiles as $fileName => $usages) {
201 $absoluteFileName = GeneralUtility::getFileAbsFileName($fileName);
202 if ($absoluteFileName && @is_file($absoluteFileName)) {
203 if ($io->isVeryVerbose()) {
204 $io->writeln('Processing file "' . $absoluteFileName . '"');
205 }
206 $counter = 0;
207 foreach ($usages as $hash => $recReference) {
208 if ($counter++ === 0) {
209 $io->writeln('Keeping "' . $fileName . '" for record "' . $recReference . '"');
210 } else {
211 // Create unique name for file
212 $newName = $fileFunc->getUniqueName(basename($fileName), dirname($absoluteFileName));
213 $io->writeln('Copying "' . $fileName . '" to "' . PathUtility::stripPathSitePrefix($newName) . '" for record "' . $recReference . '"');
214 if (!$dryRun) {
215 GeneralUtility::upload_copy_move($absoluteFileName, $newName);
216 clearstatcache();
217 if (@is_file($newName)) {
218 $error = $referenceIndex->setReferenceValue($hash, basename($newName));
219 if ($error) {
220 $io->error('ReferenceIndex::setReferenceValue() reported "' . $error . '"');
221 }
222 } else {
223 $io->error('File "' . $newName . '" could not be created.');
224 }
225 }
226 }
227 }
228 } else {
229 $io->error('File "' . $absoluteFileName . '" was not found.');
230 }
231 }
232 }
233
234 /**
235 * Formats a sys_refindex entry to something readable
236 *
237 * @param array $record
238 * @return string
239 */
240 protected function formatReferenceIndexEntryToString(array $record): string
241 {
242 return $record['tablename']
243 . ':' . $record['recuid']
244 . ':' . $record['field']
245 . ($record['flexpointer'] ? ':' . $record['flexpointer'] : '')
246 . ($record['softref_key'] ? ':' . $record['softref_key'] . ' (Soft Reference) ' : '')
247 . ($record['deleted'] ? ' (DELETED)' : '');
248 }
249 }