[CLEANUP] Alwas put null at the last position
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Http / Uri.php
1 <?php
2 namespace TYPO3\CMS\Core\Http;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Psr\Http\Message\UriInterface;
18
19 /**
20 * Represents a URI based on the PSR-7 Standard.
21 *
22 * Highly inspired by https://github.com/phly/http/
23 *
24 * @internal Note that this is not public API yet.
25 */
26 class Uri implements UriInterface
27 {
28 /**
29 * Sub-delimiters used in query strings and fragments.
30 *
31 * @const string
32 */
33 const SUBDELIMITER_CHARLIST = '!\$&\'\(\)\*\+,;=';
34
35 /**
36 * Unreserved characters used in paths, query strings, and fragments.
37 *
38 * @const string
39 */
40 const UNRESERVED_CHARLIST = 'a-zA-Z0-9_\-\.~';
41
42 /**
43 * The default scheme for the URI
44 * @var string
45 */
46 protected $scheme;
47
48 /**
49 * @var int[] Associative array containing schemes and their default ports.
50 */
51 protected $supportedSchemes = [
52 'http' => 80,
53 'https' => 443
54 ];
55
56 /**
57 * The authority part of the URI
58 * @var string
59 */
60 protected $authority = '';
61
62 /**
63 * The userInfo part of the URI
64 * @var string
65 */
66 protected $userInfo = '';
67
68 /**
69 * The host part of the URI
70 * @var string
71 */
72 protected $host = '';
73
74 /**
75 * The port of the URI (empty if it is the standard port for the scheme)
76 * @var int|null
77 */
78 protected $port = null;
79
80 /**
81 * The path part of the URI (can be empty or /)
82 * @var string
83 */
84 protected $path = '';
85
86 /**
87 * The query part of the URI without the ?
88 * @var string
89 */
90 protected $query;
91
92 /**
93 * The fragment part of the URI without the # before
94 * @var string
95 */
96 protected $fragment;
97
98 /**
99 * @param string|null $uri The full URI including query string and fragment
100 * @throws \InvalidArgumentException when the URI is not a string
101 */
102 public function __construct($uri = '')
103 {
104 if (!is_string($uri)) {
105 $argumentType = is_object($uri) ? get_class($uri) : gettype($uri);
106 throw new \InvalidArgumentException('URI passed must be a string, but is of type "' . $argumentType . '"', 1436717320);
107 }
108 if (!empty($uri)) {
109 $this->parseUri($uri);
110 }
111 }
112
113 /**
114 * helper function for parsing the full URI string
115 * @param string $uri
116 * @throws \InvalidArgumentException if the URI is malformed.
117 */
118 protected function parseUri($uri)
119 {
120 $uriParts = parse_url($uri);
121
122 if ($uriParts === false) {
123 throw new \InvalidArgumentException('The parsedUri string appears to be malformed', 1436717322);
124 }
125
126 if (isset($uriParts['scheme'])) {
127 $this->scheme = $this->sanitizeScheme($uriParts['scheme']);
128 }
129
130 if (isset($uriParts['user'])) {
131 $this->userInfo = $uriParts['user'];
132 if (isset($uriParts['pass'])) {
133 $this->userInfo .= ':' . $uriParts['pass'];
134 }
135 }
136
137 if (isset($uriParts['host'])) {
138 $this->host = $uriParts['host'];
139 }
140
141 if (isset($uriParts['port'])) {
142 $this->port = (int)$uriParts['port'];
143 }
144
145 if (isset($uriParts['path'])) {
146 $this->path = $this->sanitizePath($uriParts['path']);
147 }
148
149 if (isset($uriParts['query'])) {
150 $this->query = $this->sanitizeQuery($uriParts['query']);
151 }
152
153 if (isset($uriParts['fragment'])) {
154 $this->fragment = $this->sanitizeFragment($uriParts['fragment']);
155 }
156 }
157
158 /**
159 * Retrieve the scheme component of the URI.
160 *
161 * If no scheme is present, this method MUST return an empty string.
162 *
163 * The value returned MUST be normalized to lowercase, per RFC 3986
164 * Section 3.1.
165 *
166 * The trailing ":" character is not part of the scheme and MUST NOT be
167 * added.
168 *
169 * @see https://tools.ietf.org/html/rfc3986#section-3.1
170 * @return string The URI scheme.
171 */
172 public function getScheme()
173 {
174 return $this->scheme;
175 }
176
177 /**
178 * Retrieve the authority component of the URI.
179 *
180 * If no authority information is present, this method MUST return an empty
181 * string.
182 *
183 * The authority syntax of the URI is:
184 *
185 * <pre>
186 * [user-info@]host[:port]
187 * </pre>
188 *
189 * If the port component is not set or is the standard port for the current
190 * scheme, it SHOULD NOT be included.
191 *
192 * @see https://tools.ietf.org/html/rfc3986#section-3.2
193 * @return string The URI authority, in "[user-info@]host[:port]" format.
194 */
195 public function getAuthority()
196 {
197 if (empty($this->host)) {
198 return '';
199 }
200
201 $authority = $this->host;
202 if (!empty($this->userInfo)) {
203 $authority = $this->userInfo . '@' . $authority;
204 }
205
206 if ($this->isNonStandardPort($this->scheme, $this->host, $this->port)) {
207 $authority .= ':' . $this->port;
208 }
209
210 return $authority;
211 }
212
213 /**
214 * Retrieve the user information component of the URI.
215 *
216 * If no user information is present, this method MUST return an empty
217 * string.
218 *
219 * If a user is present in the URI, this will return that value;
220 * additionally, if the password is also present, it will be appended to the
221 * user value, with a colon (":") separating the values.
222 *
223 * The trailing "@" character is not part of the user information and MUST
224 * NOT be added.
225 *
226 * @return string The URI user information, in "username[:password]" format.
227 */
228 public function getUserInfo()
229 {
230 return $this->userInfo;
231 }
232
233 /**
234 * Retrieve the host component of the URI.
235 *
236 * If no host is present, this method MUST return an empty string.
237 *
238 * The value returned MUST be normalized to lowercase, per RFC 3986
239 * Section 3.2.2.
240 *
241 * @see http://tools.ietf.org/html/rfc3986#section-3.2.2
242 * @return string The URI host.
243 */
244 public function getHost()
245 {
246 return $this->host;
247 }
248
249 /**
250 * Retrieve the port component of the URI.
251 *
252 * If a port is present, and it is non-standard for the current scheme,
253 * this method MUST return it as an integer. If the port is the standard port
254 * used with the current scheme, this method SHOULD return null.
255 *
256 * If no port is present, and no scheme is present, this method MUST return
257 * a null value.
258 *
259 * If no port is present, but a scheme is present, this method MAY return
260 * the standard port for that scheme, but SHOULD return null.
261 *
262 * @return int|null The URI port.
263 */
264 public function getPort()
265 {
266 return $this->isNonStandardPort($this->scheme, $this->host, $this->port) ? $this->port : null;
267 }
268
269 /**
270 * Retrieve the path component of the URI.
271 *
272 * The path can either be empty or absolute (starting with a slash) or
273 * rootless (not starting with a slash). Implementations MUST support all
274 * three syntaxes.
275 *
276 * Normally, the empty path "" and absolute path "/" are considered equal as
277 * defined in RFC 7230 Section 2.7.3. But this method MUST NOT automatically
278 * do this normalization because in contexts with a trimmed base path, e.g.
279 * the front controller, this difference becomes significant. It's the task
280 * of the user to handle both "" and "/".
281 *
282 * The value returned MUST be percent-encoded, but MUST NOT double-encode
283 * any characters. To determine what characters to encode, please refer to
284 * RFC 3986, Sections 2 and 3.3.
285 *
286 * As an example, if the value should include a slash ("/") not intended as
287 * delimiter between path segments, that value MUST be passed in encoded
288 * form (e.g., "%2F") to the instance.
289 *
290 * @see https://tools.ietf.org/html/rfc3986#section-2
291 * @see https://tools.ietf.org/html/rfc3986#section-3.3
292 * @return string The URI path.
293 */
294 public function getPath()
295 {
296 return $this->path;
297 }
298
299 /**
300 * Retrieve the query string of the URI.
301 *
302 * If no query string is present, this method MUST return an empty string.
303 *
304 * The leading "?" character is not part of the query and MUST NOT be
305 * added.
306 *
307 * The value returned MUST be percent-encoded, but MUST NOT double-encode
308 * any characters. To determine what characters to encode, please refer to
309 * RFC 3986, Sections 2 and 3.4.
310 *
311 * As an example, if a value in a key/value pair of the query string should
312 * include an ampersand ("&") not intended as a delimiter between values,
313 * that value MUST be passed in encoded form (e.g., "%26") to the instance.
314 *
315 * @see https://tools.ietf.org/html/rfc3986#section-2
316 * @see https://tools.ietf.org/html/rfc3986#section-3.4
317 * @return string The URI query string.
318 */
319 public function getQuery()
320 {
321 return $this->query;
322 }
323
324 /**
325 * Retrieve the fragment component of the URI.
326 *
327 * If no fragment is present, this method MUST return an empty string.
328 *
329 * The leading "#" character is not part of the fragment and MUST NOT be
330 * added.
331 *
332 * The value returned MUST be percent-encoded, but MUST NOT double-encode
333 * any characters. To determine what characters to encode, please refer to
334 * RFC 3986, Sections 2 and 3.5.
335 *
336 * @see https://tools.ietf.org/html/rfc3986#section-2
337 * @see https://tools.ietf.org/html/rfc3986#section-3.5
338 * @return string The URI fragment.
339 */
340 public function getFragment()
341 {
342 return $this->fragment;
343 }
344
345 /**
346 * Return an instance with the specified scheme.
347 *
348 * This method MUST retain the state of the current instance, and return
349 * an instance that contains the specified scheme.
350 *
351 * Implementations MUST support the schemes "http" and "https" case
352 * insensitively, and MAY accommodate other schemes if required.
353 *
354 * An empty scheme is equivalent to removing the scheme.
355 *
356 * @param string $scheme The scheme to use with the new instance.
357 *
358 * @return self A new instance with the specified scheme.
359 * @throws \InvalidArgumentException for invalid or unsupported schemes.
360 */
361 public function withScheme($scheme)
362 {
363 $scheme = $this->sanitizeScheme($scheme);
364
365 $clonedObject = clone $this;
366 $clonedObject->scheme = $scheme;
367 return $clonedObject;
368 }
369
370 /**
371 * Return an instance with the specified user information.
372 *
373 * This method MUST retain the state of the current instance, and return
374 * an instance that contains the specified user information.
375 *
376 * Password is optional, but the user information MUST include the
377 * user; an empty string for the user is equivalent to removing user
378 * information.
379 *
380 * @param string $user The user name to use for authority.
381 * @param string|null $password The password associated with $user.
382 *
383 * @return self A new instance with the specified user information.
384 */
385 public function withUserInfo($user, $password = null)
386 {
387 $userInfo = $user;
388 if (!empty($password)) {
389 $userInfo .= ':' . $password;
390 }
391
392 $clonedObject = clone $this;
393 $clonedObject->userInfo = $userInfo;
394 return $clonedObject;
395 }
396
397 /**
398 * Return an instance with the specified host.
399 *
400 * This method MUST retain the state of the current instance, and return
401 * an instance that contains the specified host.
402 *
403 * An empty host value is equivalent to removing the host.
404 *
405 * @param string $host The hostname to use with the new instance.
406 *
407 * @return self A new instance with the specified host.
408 * @throws \InvalidArgumentException for invalid hostnames.
409 */
410 public function withHost($host)
411 {
412 $clonedObject = clone $this;
413 $clonedObject->host = $host;
414 return $clonedObject;
415 }
416
417 /**
418 * Return an instance with the specified port.
419 *
420 * This method MUST retain the state of the current instance, and return
421 * an instance that contains the specified port.
422 *
423 * Implementations MUST raise an exception for ports outside the
424 * established TCP and UDP port ranges.
425 *
426 * A null value provided for the port is equivalent to removing the port
427 * information.
428 *
429 * @param int|null $port The port to use with the new instance; a null value
430 * removes the port information.
431 *
432 * @return self A new instance with the specified port.
433 * @throws \InvalidArgumentException for invalid ports.
434 */
435 public function withPort($port)
436 {
437 if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($port) === false) {
438 $argumentType = is_object($port) ? get_class($port) : gettype($port);
439 throw new \InvalidArgumentException('Invalid port "' . $argumentType . '" specified, must be an integer.', 1436717324);
440 }
441
442 $port = (int)$port;
443 if ($port < 1 || $port > 65535) {
444 throw new \InvalidArgumentException('Invalid port "' . $port . '" specified, must be a valid TCP/UDP port.', 1436717326);
445 }
446
447 $clonedObject = clone $this;
448 $clonedObject->port = $port;
449 return $clonedObject;
450 }
451
452 /**
453 * Return an instance with the specified path.
454 *
455 * This method MUST retain the state of the current instance, and return
456 * an instance that contains the specified path.
457 *
458 * The path can either be empty or absolute (starting with a slash) or
459 * rootless (not starting with a slash). Implementations MUST support all
460 * three syntaxes.
461 *
462 * If the path is intended to be domain-relative rather than path relative then
463 * it must begin with a slash ("/"). Paths not starting with a slash ("/")
464 * are assumed to be relative to some base path known to the application or
465 * consumer.
466 *
467 * Users can provide both encoded and decoded path characters.
468 * Implementations ensure the correct encoding as outlined in getPath().
469 *
470 * @param string $path The path to use with the new instance.
471 *
472 * @return self A new instance with the specified path.
473 * @throws \InvalidArgumentException for invalid paths.
474 */
475 public function withPath($path)
476 {
477 if (!is_string($path)) {
478 throw new \InvalidArgumentException('Invalid path provided. Must be of type string.', 1436717328);
479 }
480
481 if (strpos($path, '?') !== false) {
482 throw new \InvalidArgumentException('Invalid path provided. Must not contain a query string.', 1436717330);
483 }
484
485 if (strpos($path, '#') !== false) {
486 throw new \InvalidArgumentException('Invalid path provided; must not contain a URI fragment', 1436717332);
487 }
488
489 $path = $this->sanitizePath($path);
490 $clonedObject = clone $this;
491 $clonedObject->path = $path;
492 return $clonedObject;
493 }
494
495 /**
496 * Return an instance with the specified query string.
497 *
498 * This method MUST retain the state of the current instance, and return
499 * an instance that contains the specified query string.
500 *
501 * Users can provide both encoded and decoded query characters.
502 * Implementations ensure the correct encoding as outlined in getQuery().
503 *
504 * An empty query string value is equivalent to removing the query string.
505 *
506 * @param string $query The query string to use with the new instance.
507 *
508 * @return self A new instance with the specified query string.
509 * @throws \InvalidArgumentException for invalid query strings.
510 */
511 public function withQuery($query)
512 {
513 if (!is_string($query)) {
514 throw new \InvalidArgumentException('Query string must be a string.', 1436717334);
515 }
516
517 if (strpos($query, '#') !== false) {
518 throw new \InvalidArgumentException('Query string must not include a URI fragment.', 1436717336);
519 }
520
521 $query = $this->sanitizeQuery($query);
522 $clonedObject = clone $this;
523 $clonedObject->query = $query;
524 return $clonedObject;
525 }
526
527 /**
528 * Return an instance with the specified URI fragment.
529 *
530 * This method MUST retain the state of the current instance, and return
531 * an instance that contains the specified URI fragment.
532 *
533 * Users can provide both encoded and decoded fragment characters.
534 * Implementations ensure the correct encoding as outlined in getFragment().
535 *
536 * An empty fragment value is equivalent to removing the fragment.
537 *
538 * @param string $fragment The fragment to use with the new instance.
539 *
540 * @return self A new instance with the specified fragment.
541 */
542 public function withFragment($fragment)
543 {
544 $fragment = $this->sanitizeFragment($fragment);
545 $clonedObject = clone $this;
546 $clonedObject->fragment = $fragment;
547 return $clonedObject;
548 }
549
550 /**
551 * Return the string representation as a URI reference.
552 *
553 * Depending on which components of the URI are present, the resulting
554 * string is either a full URI or relative reference according to RFC 3986,
555 * Section 4.1. The method concatenates the various components of the URI,
556 * using the appropriate delimiters:
557 *
558 * - If a scheme is present, it MUST be suffixed by ":".
559 * - If an authority is present, it MUST be prefixed by "//".
560 * - The path can be concatenated without delimiters. But there are two
561 * cases where the path has to be adjusted to make the URI reference
562 * valid as PHP does not allow to throw an exception in __toString():
563 * - If the path is rootless and an authority is present, the path MUST
564 * be prefixed by "/".
565 * - If the path is starting with more than one "/" and no authority is
566 * present, the starting slashes MUST be reduced to one.
567 * - If a query is present, it MUST be prefixed by "?".
568 * - If a fragment is present, it MUST be prefixed by "#".
569 *
570 * @see http://tools.ietf.org/html/rfc3986#section-4.1
571 * @return string
572 */
573 public function __toString()
574 {
575 $uri = '';
576
577 if (!empty($this->scheme)) {
578 $uri .= $this->scheme . '://';
579 }
580
581 $authority = $this->getAuthority();
582 if (!empty($authority)) {
583 $uri .= $authority;
584 }
585
586 $path = $this->getPath();
587 if (!empty($path)) {
588 $uri .= '/' . ltrim($path, '/');
589 }
590
591 if ($this->query) {
592 $uri .= '?' . $this->query;
593 }
594 if ($this->fragment) {
595 $uri .= '#' . $this->fragment;
596 }
597 return $uri;
598 }
599
600 /**
601 * Is a given port non-standard for the current scheme?
602 *
603 * @param string $scheme
604 * @param string $host
605 * @param int $port
606 * @return bool
607 */
608 protected function isNonStandardPort($scheme, $host, $port)
609 {
610 if (empty($scheme)) {
611 return true;
612 }
613
614 if (empty($host) || empty($port)) {
615 return false;
616 }
617
618 return !isset($this->supportedSchemes[$scheme]) || $port !== $this->supportedSchemes[$scheme];
619 }
620
621 /**
622 * Filters the scheme to ensure it is a valid scheme.
623 *
624 * @param string $scheme Scheme name.
625 *
626 * @return string Filtered scheme.
627 * @throws \InvalidArgumentException when a scheme is given which is not supported
628 */
629 protected function sanitizeScheme($scheme)
630 {
631 $scheme = strtolower($scheme);
632 $scheme = preg_replace('#:(//)?$#', '', $scheme);
633
634 if (empty($scheme)) {
635 return '';
636 }
637
638 if (!array_key_exists($scheme, $this->supportedSchemes)) {
639 throw new \InvalidArgumentException('Unsupported scheme "' . $scheme . '"; must be any empty string or in the set (' . implode(', ', array_keys($this->supportedSchemes)) . ')', 1436717338);
640 }
641
642 return $scheme;
643 }
644
645 /**
646 * Filters the path of a URI to ensure it is properly encoded.
647 *
648 * @param string $path
649 * @return string
650 */
651 protected function sanitizePath($path)
652 {
653 return preg_replace_callback(
654 '/(?:[^' . self::UNRESERVED_CHARLIST . ':@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/',
655 function ($matches) {
656 return rawurlencode($matches[0]);
657 },
658 $path
659 );
660 }
661
662 /**
663 * Filter a query string to ensure it is propertly encoded.
664 *
665 * Ensures that the values in the query string are properly urlencoded.
666 *
667 * @param string $query
668 * @return string
669 */
670 protected function sanitizeQuery($query)
671 {
672 if (!empty($query) && strpos($query, '?') === 0) {
673 $query = substr($query, 1);
674 }
675
676 $parts = explode('&', $query);
677 foreach ($parts as $index => $part) {
678 list($key, $value) = $this->splitQueryValue($part);
679 if ($value === null) {
680 $parts[$index] = $this->sanitizeQueryOrFragment($key);
681 continue;
682 }
683 $parts[$index] = $this->sanitizeQueryOrFragment($key) . '=' . $this->sanitizeQueryOrFragment($value);
684 }
685
686 return implode('&', $parts);
687 }
688
689 /**
690 * Split a query value into a key/value tuple.
691 *
692 * @param string $value
693 * @return array A value with exactly two elements, key and value
694 */
695 protected function splitQueryValue($value)
696 {
697 $data = explode('=', $value, 2);
698 if (count($data) === 1) {
699 $data[] = null;
700 }
701 return $data;
702 }
703
704 /**
705 * Filter a fragment value to ensure it is properly encoded.
706 *
707 * @param string|null $fragment
708 * @return string
709 */
710 protected function sanitizeFragment($fragment)
711 {
712 if ($fragment === null) {
713 $fragment = '';
714 }
715
716 if (!empty($fragment) && strpos($fragment, '#') === 0) {
717 $fragment = substr($fragment, 1);
718 }
719
720 return $this->sanitizeQueryOrFragment($fragment);
721 }
722
723 /**
724 * Filter a query string key or value, or a fragment.
725 *
726 * @param string $value
727 * @return string
728 */
729 protected function sanitizeQueryOrFragment($value)
730 {
731 return preg_replace_callback(
732 '/(?:[^' . self::UNRESERVED_CHARLIST . self::SUBDELIMITER_CHARLIST . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/',
733 function ($matches) {
734 return rawurlencode($matches[0]);
735 },
736 $value
737 );
738 }
739 }