2eb10da4d0074ba32a76d2cbb19ab4d6e59739fc
[Packages/TYPO3.CMS.git] / typo3 / sysext / lowlevel / Classes / Command / LostFilesCommand.php
1 <?php
2 declare(strict_types = 1);
3 namespace TYPO3\CMS\Lowlevel\Command;
4
5 /*
6 * This file is part of the TYPO3 CMS project.
7 *
8 * It is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License, either version 2
10 * of the License, or any later version.
11 *
12 * For the full copyright and license information, please read the
13 * LICENSE.txt file that was distributed with this source code.
14 *
15 * The TYPO3 project - inspiring people to share!
16 */
17
18 use Symfony\Component\Console\Command\Command;
19 use Symfony\Component\Console\Input\InputInterface;
20 use Symfony\Component\Console\Input\InputOption;
21 use Symfony\Component\Console\Output\OutputInterface;
22 use Symfony\Component\Console\Style\SymfonyStyle;
23 use TYPO3\CMS\Core\Core\Bootstrap;
24 use TYPO3\CMS\Core\Database\ConnectionPool;
25 use TYPO3\CMS\Core\Database\ReferenceIndex;
26 use TYPO3\CMS\Core\Utility\GeneralUtility;
27 use TYPO3\CMS\Core\Utility\PathUtility;
28
29 /**
30 * Finds files within uploads/ which are not needed anymore
31 */
32 class LostFilesCommand extends Command
33 {
34
35 /**
36 * Configure the command by defining the name, options and arguments
37 */
38 public function configure()
39 {
40 $this
41 ->setDescription('Looking for files in the uploads/ folder which does not have a reference in TYPO3 managed records.')
42 ->setHelp('
43 Assumptions:
44 - a perfect integrity of the reference index table (always update the reference index table before using this tool!)
45 - that all contents in the uploads folder are files attached to TCA records and exclusively managed by DataHandler through "group" type fields
46 - index.html, .htaccess files and RTEmagic* image files (ignored)
47 - Files found in deleted records are included (otherwise you would see a false list of lost files)
48
49 The assumptions are not requirements by the TYPO3 API but reflects the de facto implementation of most TYPO3 installations and therefore a practical approach to cleaning up the uploads/ or costum folder.
50 Therefore, if all "group" type fields in TCA and flexforms are positioned inside the uploads/ folder and if no files inside are managed manually it should be safe to clean out files with no relations found in the system.
51 Under such circumstances there should theoretically be no lost files in the uploads/ or custom folder since DataHandler should have managed relations automatically including adding and deleting files.
52 However, there is at least one reason known to why files might be found lost and that is when FlexForms are used. In such a case a change of/in the Data Structure XML (or the ability of the system to find the Data Structure definition!) used for the flexform could leave lost files behind. This is not unlikely to happen when records are deleted. More details can be found in a note to the function FlexFormTools->getDataStructureIdentifier()
53 Another scenario could of course be de-installation of extensions which managed files in the uploads/ or custom folders.
54
55 If the option "--dry-run" is not set, the files are then deleted automatically.
56 Warning: First, make sure those files are not used somewhere TYPO3 does not know about! See the assumptions above.
57
58 If you want to get more detailed information, use the --verbose option.')
59 ->addOption(
60 'exclude',
61 null,
62 InputOption::VALUE_REQUIRED,
63 'Comma-separated list of paths that should be excluded, e.g. "uploads/pics,uploads/media"'
64 )
65 ->addOption(
66 'dry-run',
67 null,
68 InputOption::VALUE_NONE,
69 'If this option is set, the files will not actually be deleted, but just the output which files would be deleted are shown'
70 )
71 ->addOption(
72 'update-refindex',
73 null,
74 InputOption::VALUE_NONE,
75 'Setting this option automatically updates the reference index and does not ask on command line. Alternatively, use -n to avoid the interactive mode'
76 )
77 ->addOption(
78 'custom-path',
79 null,
80 InputOption::VALUE_REQUIRED,
81 'Comma separated list of paths to process. Example: "fileadmin/[path1],fileadmin/[path2],...", if not passed, uploads/ will be used by default.'
82 );
83 }
84
85 /**
86 * Executes the command to
87 * - optionally update the reference index (to have clean data)
88 * - find files within uploads/* which are not connected to the reference index
89 * - remove these files if --dry-run is not set
90 *
91 * @param InputInterface $input
92 * @param OutputInterface $output
93 */
94 protected function execute(InputInterface $input, OutputInterface $output)
95 {
96 // Make sure the _cli_ user is loaded
97 Bootstrap::initializeBackendAuthentication();
98
99 $io = new SymfonyStyle($input, $output);
100 $io->title($this->getDescription());
101
102 $dryRun = $input->hasOption('dry-run') && $input->getOption('dry-run') != false ? true : false;
103
104 $this->updateReferenceIndex($input, $io);
105
106 // Find the lost files
107 if ($input->hasOption('exclude') && !empty($input->getOption('exclude'))) {
108 $excludedPaths = GeneralUtility::trimExplode(',', $input->getOption('exclude'), true);
109 } else {
110 $excludedPaths = [];
111 }
112
113 // Use custom-path
114 $customPaths = '';
115 if ($input->hasOption('custom-path') && !empty($input->getOption('custom-path'))) {
116 $customPaths = $input->getOption('custom-path');
117 }
118
119 $lostFiles = $this->findLostFiles($excludedPaths, $customPaths);
120
121 if (count($lostFiles)) {
122 if (!$io->isQuiet()) {
123 $io->note('Found ' . count($lostFiles) . ' lost files, ready to be deleted.');
124 if ($io->isVerbose()) {
125 $io->listing($lostFiles);
126 }
127 }
128
129 // Delete them
130 $this->deleteLostFiles($lostFiles, $dryRun, $io);
131
132 $io->success('Deleted ' . count($lostFiles) . ' lost files.');
133 } else {
134 $io->success('Nothing to do, no lost files found');
135 }
136 }
137
138 /**
139 * Function to update the reference index
140 * - if the option --update-refindex is set, do it
141 * - otherwise, if in interactive mode (not having -n set), ask the user
142 * - otherwise assume everything is fine
143 *
144 * @param InputInterface $input holds information about entered parameters
145 * @param SymfonyStyle $io necessary for outputting information
146 */
147 protected function updateReferenceIndex(InputInterface $input, SymfonyStyle $io)
148 {
149 // Check for reference index to update
150 $io->note('Finding lost files managed by TYPO3 requires a clean reference index (sys_refindex)');
151 $updateReferenceIndex = false;
152 if ($input->hasOption('update-refindex') && $input->getOption('update-refindex')) {
153 $updateReferenceIndex = true;
154 } elseif ($input->isInteractive()) {
155 $updateReferenceIndex = $io->confirm('Should the reference index be updated right now?', false);
156 }
157
158 // Update the reference index
159 if ($updateReferenceIndex) {
160 $referenceIndex = GeneralUtility::makeInstance(ReferenceIndex::class);
161 $referenceIndex->updateIndex(false, !$io->isQuiet());
162 } else {
163 $io->writeln('Reference index is assumed to be up to date, continuing.');
164 }
165 }
166
167 /**
168 * Find lost files in uploads/ or custom folder
169 *
170 * @param array $excludedPaths list of paths to be excluded, can be uploads/pics/
171 * @param string $customPaths list of paths to be checked instead of uploads/
172 * @return array an array of files (relative to PATH_site) that are not connected
173 */
174 protected function findLostFiles($excludedPaths = [], $customPaths = ''): array
175 {
176 $lostFiles = [];
177
178 // Get all files
179 $files = [];
180 if (!empty($customPaths)) {
181 $customPaths = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $customPaths, true);
182 foreach ($customPaths as $customPath) {
183 if (false === realpath(PATH_site . $customPath)
184 || !GeneralUtility::isFirstPartOfStr(realpath(PATH_site . $customPath), realpath(PATH_site))) {
185 throw new \Exception('The path: "' . $customPath . '" is invalid', 1450086736);
186 }
187 $files = GeneralUtility::getAllFilesAndFoldersInPath($files, PATH_site . $customPath);
188 }
189 } else {
190 $files = GeneralUtility::getAllFilesAndFoldersInPath($files, PATH_site . 'uploads/');
191 }
192
193 $files = GeneralUtility::removePrefixPathFromList($files, PATH_site);
194
195 $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
196 ->getQueryBuilderForTable('sys_refindex');
197
198 // Traverse files and for each, look up if its found in the reference index.
199 foreach ($files as $key => $value) {
200
201 // First, allow "index.html", ".htaccess" files since they are often used for good reasons
202 if (substr($value, -11) === '/index.html' || substr($value, -10) === '/.htaccess') {
203 continue;
204 }
205
206 // If the file is a RTEmagic-image name and if so, we allow it
207 if (preg_match('/^RTEmagic[P|C]_/', PathUtility::basenameDuringBootstrap($value))) {
208 continue;
209 }
210
211 $fileIsInExcludedPath = false;
212 foreach ($excludedPaths as $exclPath) {
213 if (GeneralUtility::isFirstPartOfStr($value, $exclPath)) {
214 $fileIsInExcludedPath = true;
215 break;
216 }
217 }
218
219 if ($fileIsInExcludedPath) {
220 continue;
221 }
222
223 // Looking for a reference from a field which is NOT a soft reference (thus, only fields with a proper TCA/Flexform configuration)
224 $result = $queryBuilder
225 ->select('hash')
226 ->from('sys_refindex')
227 ->where(
228 $queryBuilder->expr()->eq(
229 'ref_table',
230 $queryBuilder->createNamedParameter('_FILE', \PDO::PARAM_STR)
231 ),
232 $queryBuilder->expr()->eq(
233 'ref_string',
234 $queryBuilder->createNamedParameter($value, \PDO::PARAM_STR)
235 ),
236 $queryBuilder->expr()->eq(
237 'softref_key',
238 $queryBuilder->createNamedParameter('', \PDO::PARAM_STR)
239 )
240 )
241 ->orderBy('sorting', 'DESC')
242 ->execute();
243
244 $rowCount = $queryBuilder->count('hash')->execute()->fetchColumn(0);
245 // We conclude that the file is lost
246 if ($rowCount === 0) {
247 $lostFiles[] = $value;
248 }
249 }
250
251 return $lostFiles;
252 }
253
254 /**
255 * Removes given files from the uploads/ folder
256 *
257 * @param array $lostFiles Contains the lost files found
258 * @param bool $dryRun if set, the files are just displayed, but not deleted
259 * @param SymfonyStyle $io the IO object for output
260 */
261 protected function deleteLostFiles(array $lostFiles, bool $dryRun, SymfonyStyle $io)
262 {
263 foreach ($lostFiles as $lostFile) {
264 $absoluteFileName = GeneralUtility::getFileAbsFileName($lostFile);
265 if ($io->isVeryVerbose()) {
266 $io->writeln('Deleting file "' . $absoluteFileName . '"');
267 }
268 if (!$dryRun) {
269 if ($absoluteFileName && @is_file($absoluteFileName)) {
270 unlink($absoluteFileName);
271 if (!$io->isQuiet()) {
272 $io->writeln('Permanently deleted file record "' . $absoluteFileName . '".');
273 }
274 } else {
275 $io->error('File "' . $absoluteFileName . '" was not found!');
276 }
277 }
278 }
279 }
280 }