[TASK] Consolidate regexp for utf8 and non-utf8 file systems
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Utility / File / BasicFileUtility.php
1 <?php
2 namespace TYPO3\CMS\Core\Utility\File;
3
4 /***************************************************************
5 * Copyright notice
6 *
7 * (c) 1999-2013 Kasper Skårhøj (kasperYYYY@typo3.com)
8 * All rights reserved
9 *
10 * This script is part of the TYPO3 project. The TYPO3 project is
11 * free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * The GNU General Public License can be found at
17 * http://www.gnu.org/copyleft/gpl.html.
18 * A copy is found in the text file GPL.txt and important notices to the license
19 * from the author is found in LICENSE.txt distributed with these scripts.
20 *
21 *
22 * This script is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * This copyright notice MUST APPEAR in all copies of the script!
28 ***************************************************************/
29
30 use TYPO3\CMS\Core\Utility\GeneralUtility;
31 use TYPO3\CMS\Core\Utility\PathUtility;
32
33 /**
34 * Contains class with basic file management functions
35 *
36 * Contains functions for management, validation etc of files in TYPO3,
37 * using the concepts of web- and ftp-space. Please see the comment for the
38 * init() function
39 *
40 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
41 */
42 class BasicFileUtility {
43 /**
44 * @var string
45 */
46 const UNSAFE_FILENAME_CHARACTER_EXPRESSION = '\\x00-\\x2C\\/\\x3A-\\x3F\\x5B-\\x60\\x7B-\\xBF';
47
48 /**
49 * @todo Define visibility
50 */
51 public $getUniqueNamePrefix = '';
52
53 // Prefix which will be prepended the file when using the getUniqueName-function
54 /**
55 * @todo Define visibility
56 */
57 public $maxNumber = 99;
58
59 // This number decides the highest allowed appended number used on a filename before we use naming with unique strings
60 /**
61 * @todo Define visibility
62 */
63 public $uniquePrecision = 6;
64
65 // This number decides how many characters out of a unique MD5-hash that is appended to a filename if getUniqueName is asked to find an available filename.
66 /**
67 * @todo Define visibility
68 */
69 public $maxInputNameLen = 60;
70
71 // This is the maximum length of names treated by cleanFileName()
72 /**
73 * @todo Define visibility
74 */
75 public $tempFN = '_temp_';
76
77 // Temp-foldername. A folder in the root of one of the mounts with this name is regarded a TEMP-folder (used for upload from clipboard)
78 // internal
79 /**
80 * @todo Define visibility
81 */
82 public $f_ext = array();
83
84 // See comment in header
85 /**
86 * @todo Define visibility
87 */
88 public $mounts = array();
89
90 // See comment in header
91 /**
92 * @todo Define visibility
93 */
94 public $webPath = '';
95
96 // Set to DOCUMENT_ROOT.
97 /**
98 * @todo Define visibility
99 */
100 public $isInit = 0;
101
102 /**
103 * @var \TYPO3\CMS\Core\Charset\CharsetConverter
104 */
105 public $csConvObj;
106
107 // Set to TRUE after init()/start();
108 /**********************************
109 *
110 * Checking functions
111 *
112 **********************************/
113 /**
114 * Constructor
115 * This function should be called to initialise the internal arrays $this->mounts and $this->f_ext
116 *
117 * A typical example of the array $mounts is this:
118 * $mounts[xx][path] = (..a mounted path..)
119 * the 'xx'-keys is just numerical from zero. There are also a [name] and [type] value that just denotes the mountname and type. Not used for athentication here.
120 * $this->mounts is traversed in the function checkPathAgainstMounts($thePath), and it is checked that $thePath is actually below one of the mount-paths
121 * The mountpaths are with a trailing '/'. $thePath must be with a trailing '/' also!
122 * As you can see, $this->mounts is very critical! This is the array that decides where the user will be allowed to copy files!!
123 * Typically the global var $WEBMOUNTS would be passed along as $mounts
124 *
125 * A typical example of the array $f_ext is this:
126 * $f_ext['webspace']['allow']='';
127 * $f_ext['webspace']['deny']= PHP_EXTENSIONS_DEFAULT;
128 * $f_ext['ftpspace']['allow']='*';
129 * $f_ext['ftpspace']['deny']='';
130 * The control of fileextensions goes in two catagories. Webspace and Ftpspace. Webspace is folders accessible from a webbrowser (below TYPO3_DOCUMENT_ROOT) and ftpspace is everything else.
131 * The control is done like this: If an extension matches 'allow' then the check returns TRUE. If not and an extension matches 'deny' then the check return FALSE. If no match at all, returns TRUE.
132 * You list extensions comma-separated. If the value is a '*' every extension is allowed
133 * The list is case-insensitive when used in this class (see init())
134 * Typically TYPO3_CONF_VARS['BE']['fileExtensions'] would be passed along as $f_ext.
135 *
136 * Example:
137 * $basicff->init($GLOBALS['FILEMOUNTS'],$GLOBALS['TYPO3_CONF_VARS']['BE']['fileExtensions']);
138 *
139 * @param array Contains the paths of the file mounts for the current BE user. Normally $GLOBALS['FILEMOUNTS'] is passed. This variable is set during backend user initialization; $FILEMOUNTS = $GLOBALS['BE_USER']->returnFilemounts(); (see typo3/init.php)
140 * @param array Array with information about allowed and denied file extensions. Typically passed: $GLOBALS['TYPO3_CONF_VARS']['BE']['fileExtensions']
141 * @return void
142 * @see typo3/init.php, \TYPO3\CMS\Core\Authentication\BackendUserAuthentication::returnFilemounts()
143 * @todo Define visibility
144 * @deprecated All methods in this class should not be used anymore since TYPO3 6.0. Please use corresponding TYPO3\\CMS\\Core\\Resource\\ResourceStorage (fetched via BE_USERS->getFileStorages()), as all functions should be found there (in a cleaner manner).
145 */
146 public function init($mounts, $f_ext) {
147 GeneralUtility::logDeprecatedFunction('All methods in this class should not be used anymore since TYPO3 6.0. Please use corresponding TYPO3\\CMS\\Core\\Resource\\ResourceStorage (fetched via BE_USERS->getFileStorages()), as all functions should be found there (in a cleaner manner).');
148 $this->f_ext['webspace']['allow'] = GeneralUtility::uniqueList(strtolower($f_ext['webspace']['allow']));
149 $this->f_ext['webspace']['deny'] = GeneralUtility::uniqueList(strtolower($f_ext['webspace']['deny']));
150 $this->f_ext['ftpspace']['allow'] = GeneralUtility::uniqueList(strtolower($f_ext['ftpspace']['allow']));
151 $this->f_ext['ftpspace']['deny'] = GeneralUtility::uniqueList(strtolower($f_ext['ftpspace']['deny']));
152
153 $this->mounts = $mounts;
154 $this->webPath = GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT');
155 $this->isInit = 1;
156 $this->maxInputNameLen = $GLOBALS['TYPO3_CONF_VARS']['SYS']['maxFileNameLength'] ?: $this->maxInputNameLen;
157 }
158
159 /**
160 * Returns an array with a whole lot of fileinformation.
161 * Information includes:
162 * - path : path part of give file
163 * - file : filename
164 * - filebody : filename without extension
165 * - fileext : lowercase extension
166 * - realFileext : extension
167 * - tstamp : timestamp of modification
168 * - size : file size
169 * - type : file type (block/char/dir/fifo/file/link)
170 * - owner : user ID of owner of file
171 * - perms : numerical representation of file permissions
172 * - writable : is file writeable by web user (FALSE = yes; TRUE = no) *)
173 * - readable : is file readable by web user (FALSE = yes; TRUE = no) *)
174 *
175 * ) logic is reversed because of handling by functions in TYPO3\CMS\Filelist\FileList
176 *
177 * @param string Filepath to existing file. Should probably be absolute. Filefunctions are performed on this value.
178 * @return array Information about the file in the filepath
179 * @todo Define visibility
180 */
181 public function getTotalFileInfo($wholePath) {
182 // @todo: deprecate this function, and replace its use in the storage/mounts
183 $theuser = getmyuid();
184 $info = GeneralUtility::split_fileref($wholePath);
185 $info['tstamp'] = @filemtime($wholePath);
186 $info['size'] = @filesize($wholePath);
187 $info['type'] = @filetype($wholePath);
188 $info['owner'] = @fileowner($wholePath);
189 $info['perms'] = @fileperms($wholePath);
190 $info['writable'] = !@is_writable($wholePath);
191 $info['readable'] = !@is_readable($wholePath);
192 return $info;
193 }
194
195 /**
196 * Checks if a $iconkey (fileextension) is allowed according to $this->f_ext.
197 *
198 * @param string The extension to check, eg. "php" or "html" etc.
199 * @param string Either "webspage" or "ftpspace" - points to a key in $this->f_ext
200 * @return boolean TRUE if file extension is allowed.
201 * @todo Define visibility
202 */
203 public function is_allowed($iconkey, $type) {
204 if (isset($this->f_ext[$type])) {
205 $ik = strtolower($iconkey);
206 if ($ik) {
207 // If the extension is found amongst the allowed types, we return TRUE immediately
208 if ($this->f_ext[$type]['allow'] == '*' || GeneralUtility::inList($this->f_ext[$type]['allow'], $ik)) {
209 return TRUE;
210 }
211 // If the extension is found amongst the denied types, we return FALSE immediately
212 if ($this->f_ext[$type]['deny'] == '*' || GeneralUtility::inList($this->f_ext[$type]['deny'], $ik)) {
213 return FALSE;
214 }
215 // If no match we return TRUE
216 return TRUE;
217 } else {
218 // If no extension:
219 if ($this->f_ext[$type]['allow'] == '*') {
220 return TRUE;
221 }
222 if ($this->f_ext[$type]['deny'] == '*') {
223 return FALSE;
224 }
225 return TRUE;
226 }
227 }
228 return FALSE;
229 }
230
231 /**
232 * Returns TRUE if you can operate of ANY file ('*') in the space $theDest is in ('webspace' / 'ftpspace')
233 *
234 * @param string Absolute path
235 * @return boolean
236 * @todo Define visibility
237 */
238 public function checkIfFullAccess($theDest) {
239 $type = $this->is_webpath($theDest) ? 'webspace' : 'ftpspace';
240 if (isset($this->f_ext[$type])) {
241 if ((string) $this->f_ext[$type]['deny'] == '' || $this->f_ext[$type]['allow'] == '*') {
242 return TRUE;
243 }
244 }
245 }
246
247 /**
248 * Checks if $this->webPath (should be TYPO3_DOCUMENT_ROOT) is in the first part of $path
249 * Returns TRUE also if $this->init is not set or if $path is empty...
250 *
251 * @param string Absolute path to check
252 * @return boolean
253 * @todo Define visibility
254 */
255 public function is_webpath($path) {
256 if ($this->isInit) {
257 $testPath = $this->slashPath($path);
258 $testPathWeb = $this->slashPath($this->webPath);
259 if ($testPathWeb && $testPath) {
260 return GeneralUtility::isFirstPartOfStr($testPath, $testPathWeb);
261 }
262 }
263 return TRUE;
264 }
265
266 /**
267 * If the filename is given, check it against the TYPO3_CONF_VARS[BE][fileDenyPattern] +
268 * Checks if the $ext fileextension is allowed in the path $theDest (this is based on whether $theDest is below the $this->webPath)
269 *
270 * @param string File extension, eg. "php" or "html
271 * @param string Absolute path for which to test
272 * @param string Filename to check against TYPO3_CONF_VARS[BE][fileDenyPattern]
273 * @return boolean TRUE if extension/filename is allowed
274 * @todo Define visibility
275 */
276 public function checkIfAllowed($ext, $theDest, $filename = '') {
277 return GeneralUtility::verifyFilenameAgainstDenyPattern($filename) && $this->is_allowed($ext, ($this->is_webpath($theDest) ? 'webspace' : 'ftpspace'));
278 }
279
280 /**
281 * Returns TRUE if the input filename string is shorter than $this->maxInputNameLen.
282 *
283 * @param string Filename, eg "somefile.html
284 * @return boolean
285 * @todo Define visibility
286 */
287 public function checkFileNameLen($fileName) {
288 // @todo: should go into the LocalDriver in a protected way (not important to the outside world)
289 return strlen($fileName) <= $this->maxInputNameLen;
290 }
291
292 /**
293 * Cleans $theDir for slashes in the end of the string and returns the new path, if it exists on the server.
294 *
295 * @param string Directory path to check
296 * @return string Returns the cleaned up directory name if OK, otherwise FALSE.
297 * @todo Define visibility
298 */
299 public function is_directory($theDir) {
300 // @todo: should go into the LocalDriver in a protected way (not important to the outside world)
301 if ($this->isPathValid($theDir)) {
302 $theDir = PathUtility::getCanonicalPath($theDir);
303 if (@is_dir($theDir)) {
304 return $theDir;
305 }
306 }
307 return FALSE;
308 }
309
310 /**
311 * Wrapper for \TYPO3\CMS\Core\Utility\GeneralUtility::validPathStr()
312 *
313 * @param string Filepath to evaluate
314 * @return boolean TRUE, if no '//', '..' or '\' is in the $theFile
315 * @see \TYPO3\CMS\Core\Utility\GeneralUtility::validPathStr()
316 * @todo Define visibility
317 */
318 public function isPathValid($theFile) {
319 // @todo: should go into the LocalDriver in a protected way (not important to the outside world)
320 return GeneralUtility::validPathStr($theFile);
321 }
322
323 /**
324 * Returns the destination path/filename of a unique filename/foldername in that path.
325 * If $theFile exists in $theDest (directory) the file have numbers appended up to $this->maxNumber. Hereafter a unique string will be appended.
326 * This function is used by fx. TCEmain when files are attached to records and needs to be uniquely named in the uploads/* folders
327 *
328 * @param string The input filename to check
329 * @param string The directory for which to return a unique filename for $theFile. $theDest MUST be a valid directory. Should be absolute.
330 * @param boolean If set the filename is returned with the path prepended without checking whether it already existed!
331 * @return string The destination absolute filepath (not just the name!) of a unique filename/foldername in that path.
332 * @see \TYPO3\CMS\Core\DataHandling\DataHandler::checkValue()
333 * @todo Define visibility
334 */
335 public function getUniqueName($theFile, $theDest, $dontCheckForUnique = 0) {
336 // @todo: should go into the LocalDriver in a protected way (not important to the outside world)
337 $theDest = $this->is_directory($theDest);
338 // $theDest is cleaned up
339 $origFileInfo = GeneralUtility::split_fileref($theFile);
340 // Fetches info about path, name, extension of $theFile
341 if ($theDest) {
342 if ($this->getUniqueNamePrefix) {
343 // Adds prefix
344 $origFileInfo['file'] = $this->getUniqueNamePrefix . $origFileInfo['file'];
345 $origFileInfo['filebody'] = $this->getUniqueNamePrefix . $origFileInfo['filebody'];
346 }
347 // Check if the file exists and if not - return the filename...
348 $fileInfo = $origFileInfo;
349 $theDestFile = $theDest . '/' . $fileInfo['file'];
350 // The destinations file
351 if (!file_exists($theDestFile) || $dontCheckForUnique) {
352 // If the file does NOT exist we return this filename
353 return $theDestFile;
354 }
355 // Well the filename in its pure form existed. Now we try to append numbers / unique-strings and see if we can find an available filename...
356 $theTempFileBody = preg_replace('/_[0-9][0-9]$/', '', $origFileInfo['filebody']);
357 // This removes _xx if appended to the file
358 $theOrigExt = $origFileInfo['realFileext'] ? '.' . $origFileInfo['realFileext'] : '';
359 for ($a = 1; $a <= $this->maxNumber + 1; $a++) {
360 if ($a <= $this->maxNumber) {
361 // First we try to append numbers
362 $insert = '_' . sprintf('%02d', $a);
363 } else {
364 // .. then we try unique-strings...
365 $insert = '_' . substr(md5(uniqId('')), 0, $this->uniquePrecision);
366 }
367 $theTestFile = $theTempFileBody . $insert . $theOrigExt;
368 $theDestFile = $theDest . '/' . $theTestFile;
369 // The destinations file
370 if (!file_exists($theDestFile)) {
371 // If the file does NOT exist we return this filename
372 return $theDestFile;
373 }
374 }
375 }
376 }
377
378 /**
379 * Checks if $thePath is a path under one of the paths in $this->mounts
380 * See comment in the header of this class.
381 *
382 * @param string $thePath MUST HAVE a trailing '/' in order to match correctly with the mounts
383 * @return string The key to the first mount found, otherwise nothing is returned.
384 * @see init()
385 * @todo Define visibility
386 */
387 public function checkPathAgainstMounts($thePath) {
388 // @todo: deprecate this function, now done in the Storage object
389 if ($thePath && $this->isPathValid($thePath) && is_array($this->mounts)) {
390 foreach ($this->mounts as $k => $val) {
391 if (GeneralUtility::isFirstPartOfStr($thePath, $val['path'])) {
392 return $k;
393 }
394 }
395 }
396 }
397
398 /**
399 * Find first web folder (relative to PATH_site.'fileadmin') in filemounts array
400 *
401 * @return string The key to the first mount inside PATH_site."fileadmin" found, otherwise nothing is returned.
402 * @todo Define visibility
403 */
404 public function findFirstWebFolder() {
405 // @todo: where and when to use this function?
406 if (is_array($this->mounts)) {
407 foreach ($this->mounts as $k => $val) {
408 if (GeneralUtility::isFirstPartOfStr($val['path'], PATH_site . $GLOBALS['TYPO3_CONF_VARS']['BE']['fileadminDir'])) {
409 return $k;
410 }
411 }
412 }
413 }
414
415 /**
416 * Removes filemount part of a path, thus blinding the position.
417 * Takes a path, $thePath, and removes the part of the path which equals the filemount.
418 *
419 * @param string $thePath is a path which MUST be found within one of the internally set filemounts, $this->mounts
420 * @return string The processed input path
421 * @todo Define visibility
422 */
423 public function blindPath($thePath) {
424 // @todo: where and when to use this function?
425 $k = $this->checkPathAgainstMounts($thePath);
426 if ($k) {
427 $name = '';
428 $name .= '[' . $this->mounts[$k]['name'] . ']: ';
429 $name .= substr($thePath, strlen($this->mounts[$k]['path']));
430 return $name;
431 }
432 }
433
434 /**
435 * Find temporary folder
436 * Finds the first $this->tempFN ('_temp_' usually) -folder in the internal array of filemounts, $this->mounts
437 *
438 * @return string Returns the path if found, otherwise nothing if error.
439 * @todo Define visibility
440 */
441 public function findTempFolder() {
442 // @todo: where and when to use this function?
443 if ($this->tempFN && is_array($this->mounts)) {
444 foreach ($this->mounts as $k => $val) {
445 $tDir = $val['path'] . $this->tempFN;
446 if (@is_dir($tDir)) {
447 return $tDir;
448 }
449 }
450 }
451 }
452
453 /*********************
454 *
455 * Cleaning functions
456 *
457 *********************/
458 /**
459 * Removes all dots, slashes and spaces after a path
460 *
461 * @param string $theDir Input string
462 * @return string Output string
463 * @deprecated since 6.1, will be removed in two versions, use \TYPO3\CMS\Core\Utility\PathUtility::getCanonicalPath() instead
464 */
465 public function cleanDirectoryName($theDir) {
466 GeneralUtility::logDeprecatedFunction();
467 return PathUtility::getCanonicalPath($theDir);
468 }
469
470 /**
471 * Converts any double slashes (//) to a single slash (/)
472 *
473 * @param string Input value
474 * @return string Returns the converted string
475 * @todo Define visibility
476 */
477 public function rmDoubleSlash($string) {
478 // @todo: should go into the LocalDriver in a protected way (not important to the outside world)
479 return str_replace('//', '/', $string);
480 }
481
482 /**
483 * Returns a string which has a slash '/' appended if it doesn't already have that slash
484 *
485 * @param string Input string
486 * @return string Output string with a slash in the end (if not already there)
487 * @todo Define visibility
488 */
489 public function slashPath($path) {
490 // @todo: should go into the LocalDriver in a protected way (not important to the outside world)
491 // @todo: should be done with rtrim($path, '/') . '/';
492 if (substr($path, -1) != '/') {
493 return $path . '/';
494 }
495 return $path;
496 }
497
498 /**
499 * Returns a string where any character not matching [.a-zA-Z0-9_-] is substituted by '_'
500 * Trailing dots are removed
501 *
502 * @param string $fileName Input string, typically the body of a filename
503 * @param string $charset Charset of the a filename (defaults to current charset; depending on context)
504 * @return string Output string with any characters not matching [.a-zA-Z0-9_-] is substituted by '_' and trailing dots removed
505 * @todo Define visibility
506 */
507 public function cleanFileName($fileName, $charset = '') {
508 // Handle UTF-8 characters
509 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['UTF8filesystem']) {
510 // allow ".", "-", 0-9, a-z, A-Z and everything beyond U+C0 (latin capital letter a with grave)
511 $cleanFileName = preg_replace('/[' . self::UNSAFE_FILENAME_CHARACTER_EXPRESSION . ']/u', '_', trim($fileName));
512 } else {
513 // Get conversion object or initialize if needed
514 if (!is_object($this->csConvObj)) {
515 if (TYPO3_MODE == 'FE') {
516 $this->csConvObj = $GLOBALS['TSFE']->csConvObj;
517 } elseif (is_object($GLOBALS['LANG'])) {
518 // BE assumed:
519 $this->csConvObj = $GLOBALS['LANG']->csConvObj;
520 } else {
521 // The object may not exist yet, so we need to create it now. Happens in the Install Tool for example.
522 $this->csConvObj = GeneralUtility::makeInstance('TYPO3\\CMS\\Core\\Charset\\CharsetConverter');
523 }
524 }
525 // Define character set
526 if (!$charset) {
527 if (TYPO3_MODE == 'FE') {
528 $charset = $GLOBALS['TSFE']->renderCharset;
529 } else {
530 // Backend
531 $charset = 'utf-8';
532 }
533 }
534 // If a charset was found, convert filename
535 if ($charset) {
536 $fileName = $this->csConvObj->specCharsToASCII($charset, $fileName);
537 }
538 // Replace unwanted characters by underscores
539 $cleanFileName = preg_replace('/[' . self::UNSAFE_FILENAME_CHARACTER_EXPRESSION . '\\xC0-\\xFF]/', '_', trim($fileName));
540 }
541 // Strip trailing dots and return
542 return preg_replace('/\\.*$/', '', $cleanFileName);
543 }
544
545 }