59774d936b5149db187168320e59e377a404133a
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Http / Uri.php
1 <?php
2 namespace TYPO3\CMS\Core\Http;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Psr\Http\Message\UriInterface;
18
19 /**
20 * Represents a URI based on the PSR-7 Standard.
21 *
22 * Highly inspired by https://github.com/phly/http/
23 *
24 * @internal Note that this is not public API yet.
25 */
26 class Uri implements UriInterface
27 {
28 /**
29 * Sub-delimiters used in query strings and fragments.
30 *
31 * @var string
32 */
33 const SUBDELIMITER_CHARLIST = '!\$&\'\(\)\*\+,;=';
34
35 /**
36 * Unreserved characters used in paths, query strings, and fragments.
37 *
38 * @var string
39 */
40 const UNRESERVED_CHARLIST = 'a-zA-Z0-9_\-\.~';
41
42 /**
43 * The default scheme for the URI
44 * @var string
45 */
46 protected $scheme;
47
48 /**
49 * @var int[] Associative array containing schemes and their default ports.
50 */
51 protected $supportedSchemes = [
52 'http' => 80,
53 'https' => 443
54 ];
55
56 /**
57 * The authority part of the URI
58 * @var string
59 */
60 protected $authority = '';
61
62 /**
63 * The userInfo part of the URI
64 * @var string
65 */
66 protected $userInfo = '';
67
68 /**
69 * The host part of the URI
70 * @var string
71 */
72 protected $host = '';
73
74 /**
75 * The port of the URI (empty if it is the standard port for the scheme)
76 * @var int|null
77 */
78 protected $port;
79
80 /**
81 * The path part of the URI (can be empty or /)
82 * @var string
83 */
84 protected $path = '';
85
86 /**
87 * The query part of the URI without the ?
88 * @var string
89 */
90 protected $query;
91
92 /**
93 * The fragment part of the URI without the # before
94 * @var string
95 */
96 protected $fragment;
97
98 /**
99 * @param string|null $uri The full URI including query string and fragment
100 * @throws \InvalidArgumentException when the URI is not a string
101 */
102 public function __construct($uri = '')
103 {
104 if (!is_string($uri)) {
105 $argumentType = is_object($uri) ? get_class($uri) : gettype($uri);
106 throw new \InvalidArgumentException('URI passed must be a string, but is of type "' . $argumentType . '"', 1436717320);
107 }
108 if (!empty($uri)) {
109 $this->parseUri($uri);
110 }
111 }
112
113 /**
114 * helper function for parsing the full URI string
115 * @param string $uri
116 * @throws \InvalidArgumentException if the URI is malformed.
117 */
118 protected function parseUri($uri)
119 {
120 $uriParts = parse_url($uri);
121
122 if ($uriParts === false) {
123 throw new \InvalidArgumentException('The parsedUri string appears to be malformed', 1436717322);
124 }
125
126 if (isset($uriParts['scheme'])) {
127 $this->scheme = $this->sanitizeScheme($uriParts['scheme']);
128 }
129
130 if (isset($uriParts['user'])) {
131 $this->userInfo = $uriParts['user'];
132 if (isset($uriParts['pass'])) {
133 $this->userInfo .= ':' . $uriParts['pass'];
134 }
135 }
136
137 if (isset($uriParts['host'])) {
138 $this->host = $uriParts['host'];
139 }
140
141 if (isset($uriParts['port'])) {
142 $this->port = (int)$uriParts['port'];
143 }
144
145 if (isset($uriParts['path'])) {
146 $this->path = $this->sanitizePath($uriParts['path']);
147 }
148
149 if (isset($uriParts['query'])) {
150 $this->query = $this->sanitizeQuery($uriParts['query']);
151 }
152
153 if (isset($uriParts['fragment'])) {
154 $this->fragment = $this->sanitizeFragment($uriParts['fragment']);
155 }
156 }
157
158 /**
159 * Retrieve the scheme component of the URI.
160 *
161 * If no scheme is present, this method MUST return an empty string.
162 *
163 * The value returned MUST be normalized to lowercase, per RFC 3986
164 * Section 3.1.
165 *
166 * The trailing ":" character is not part of the scheme and MUST NOT be
167 * added.
168 *
169 * @see https://tools.ietf.org/html/rfc3986#section-3.1
170 * @return string The URI scheme.
171 */
172 public function getScheme()
173 {
174 return $this->scheme;
175 }
176
177 /**
178 * Retrieve the authority component of the URI.
179 *
180 * If no authority information is present, this method MUST return an empty
181 * string.
182 *
183 * The authority syntax of the URI is:
184 *
185 * <pre>
186 * [user-info@]host[:port]
187 * </pre>
188 *
189 * If the port component is not set or is the standard port for the current
190 * scheme, it SHOULD NOT be included.
191 *
192 * @see https://tools.ietf.org/html/rfc3986#section-3.2
193 * @return string The URI authority, in "[user-info@]host[:port]" format.
194 */
195 public function getAuthority()
196 {
197 if (empty($this->host)) {
198 return '';
199 }
200
201 $authority = $this->host;
202 if (!empty($this->userInfo)) {
203 $authority = $this->userInfo . '@' . $authority;
204 }
205
206 if ($this->isNonStandardPort($this->scheme, $this->host, $this->port)) {
207 $authority .= ':' . $this->port;
208 }
209
210 return $authority;
211 }
212
213 /**
214 * Retrieve the user information component of the URI.
215 *
216 * If no user information is present, this method MUST return an empty
217 * string.
218 *
219 * If a user is present in the URI, this will return that value;
220 * additionally, if the password is also present, it will be appended to the
221 * user value, with a colon (":") separating the values.
222 *
223 * The trailing "@" character is not part of the user information and MUST
224 * NOT be added.
225 *
226 * @return string The URI user information, in "username[:password]" format.
227 */
228 public function getUserInfo()
229 {
230 return $this->userInfo;
231 }
232
233 /**
234 * Retrieve the host component of the URI.
235 *
236 * If no host is present, this method MUST return an empty string.
237 *
238 * The value returned MUST be normalized to lowercase, per RFC 3986
239 * Section 3.2.2.
240 *
241 * @see http://tools.ietf.org/html/rfc3986#section-3.2.2
242 * @return string The URI host.
243 */
244 public function getHost()
245 {
246 return $this->host;
247 }
248
249 /**
250 * Retrieve the port component of the URI.
251 *
252 * If a port is present, and it is non-standard for the current scheme,
253 * this method MUST return it as an integer. If the port is the standard port
254 * used with the current scheme, this method SHOULD return null.
255 *
256 * If no port is present, and no scheme is present, this method MUST return
257 * a null value.
258 *
259 * If no port is present, but a scheme is present, this method MAY return
260 * the standard port for that scheme, but SHOULD return null.
261 *
262 * @return int|null The URI port.
263 */
264 public function getPort()
265 {
266 return $this->isNonStandardPort($this->scheme, $this->host, $this->port) ? $this->port : null;
267 }
268
269 /**
270 * Retrieve the path component of the URI.
271 *
272 * The path can either be empty or absolute (starting with a slash) or
273 * rootless (not starting with a slash). Implementations MUST support all
274 * three syntaxes.
275 *
276 * Normally, the empty path "" and absolute path "/" are considered equal as
277 * defined in RFC 7230 Section 2.7.3. But this method MUST NOT automatically
278 * do this normalization because in contexts with a trimmed base path, e.g.
279 * the front controller, this difference becomes significant. It's the task
280 * of the user to handle both "" and "/".
281 *
282 * The value returned MUST be percent-encoded, but MUST NOT double-encode
283 * any characters. To determine what characters to encode, please refer to
284 * RFC 3986, Sections 2 and 3.3.
285 *
286 * As an example, if the value should include a slash ("/") not intended as
287 * delimiter between path segments, that value MUST be passed in encoded
288 * form (e.g., "%2F") to the instance.
289 *
290 * @see https://tools.ietf.org/html/rfc3986#section-2
291 * @see https://tools.ietf.org/html/rfc3986#section-3.3
292 * @return string The URI path.
293 */
294 public function getPath()
295 {
296 return $this->path;
297 }
298
299 /**
300 * Retrieve the query string of the URI.
301 *
302 * If no query string is present, this method MUST return an empty string.
303 *
304 * The leading "?" character is not part of the query and MUST NOT be
305 * added.
306 *
307 * The value returned MUST be percent-encoded, but MUST NOT double-encode
308 * any characters. To determine what characters to encode, please refer to
309 * RFC 3986, Sections 2 and 3.4.
310 *
311 * As an example, if a value in a key/value pair of the query string should
312 * include an ampersand ("&") not intended as a delimiter between values,
313 * that value MUST be passed in encoded form (e.g., "%26") to the instance.
314 *
315 * @see https://tools.ietf.org/html/rfc3986#section-2
316 * @see https://tools.ietf.org/html/rfc3986#section-3.4
317 * @return string The URI query string.
318 */
319 public function getQuery()
320 {
321 return $this->query;
322 }
323
324 /**
325 * Retrieve the fragment component of the URI.
326 *
327 * If no fragment is present, this method MUST return an empty string.
328 *
329 * The leading "#" character is not part of the fragment and MUST NOT be
330 * added.
331 *
332 * The value returned MUST be percent-encoded, but MUST NOT double-encode
333 * any characters. To determine what characters to encode, please refer to
334 * RFC 3986, Sections 2 and 3.5.
335 *
336 * @see https://tools.ietf.org/html/rfc3986#section-2
337 * @see https://tools.ietf.org/html/rfc3986#section-3.5
338 * @return string The URI fragment.
339 */
340 public function getFragment()
341 {
342 return $this->fragment;
343 }
344
345 /**
346 * Return an instance with the specified scheme.
347 *
348 * This method MUST retain the state of the current instance, and return
349 * an instance that contains the specified scheme.
350 *
351 * Implementations MUST support the schemes "http" and "https" case
352 * insensitively, and MAY accommodate other schemes if required.
353 *
354 * An empty scheme is equivalent to removing the scheme.
355 *
356 * @param string $scheme The scheme to use with the new instance.
357 *
358 * @return static A new instance with the specified scheme.
359 * @throws \InvalidArgumentException for invalid or unsupported schemes.
360 */
361 public function withScheme($scheme)
362 {
363 $scheme = $this->sanitizeScheme($scheme);
364
365 $clonedObject = clone $this;
366 $clonedObject->scheme = $scheme;
367 return $clonedObject;
368 }
369
370 /**
371 * Return an instance with the specified user information.
372 *
373 * This method MUST retain the state of the current instance, and return
374 * an instance that contains the specified user information.
375 *
376 * Password is optional, but the user information MUST include the
377 * user; an empty string for the user is equivalent to removing user
378 * information.
379 *
380 * @param string $user The user name to use for authority.
381 * @param string|null $password The password associated with $user.
382 *
383 * @return static A new instance with the specified user information.
384 */
385 public function withUserInfo($user, $password = null)
386 {
387 $userInfo = $user;
388 if (!empty($password)) {
389 $userInfo .= ':' . $password;
390 }
391
392 $clonedObject = clone $this;
393 $clonedObject->userInfo = $userInfo;
394 return $clonedObject;
395 }
396
397 /**
398 * Return an instance with the specified host.
399 *
400 * This method MUST retain the state of the current instance, and return
401 * an instance that contains the specified host.
402 *
403 * An empty host value is equivalent to removing the host.
404 *
405 * @param string $host The hostname to use with the new instance.
406 *
407 * @return static A new instance with the specified host.
408 * @throws \InvalidArgumentException for invalid hostnames.
409 */
410 public function withHost($host)
411 {
412 $clonedObject = clone $this;
413 $clonedObject->host = $host;
414 return $clonedObject;
415 }
416
417 /**
418 * Return an instance with the specified port.
419 *
420 * This method MUST retain the state of the current instance, and return
421 * an instance that contains the specified port.
422 *
423 * Implementations MUST raise an exception for ports outside the
424 * established TCP and UDP port ranges.
425 *
426 * A null value provided for the port is equivalent to removing the port
427 * information.
428 *
429 * @param int|null $port The port to use with the new instance; a null value
430 * removes the port information.
431 *
432 * @return static A new instance with the specified port.
433 * @throws \InvalidArgumentException for invalid ports.
434 */
435 public function withPort($port)
436 {
437 if ($port !== null) {
438 if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($port) === false) {
439 $argumentType = is_object($port) ? get_class($port) : gettype($port);
440 throw new \InvalidArgumentException('Invalid port "' . $argumentType . '" specified, must be an integer.', 1436717324);
441 }
442
443 $port = (int)$port;
444 if ($port < 1 || $port > 65535) {
445 throw new \InvalidArgumentException('Invalid port "' . $port . '" specified, must be a valid TCP/UDP port.', 1436717326);
446 }
447 }
448
449 $clonedObject = clone $this;
450 $clonedObject->port = $port;
451 return $clonedObject;
452 }
453
454 /**
455 * Return an instance with the specified path.
456 *
457 * This method MUST retain the state of the current instance, and return
458 * an instance that contains the specified path.
459 *
460 * The path can either be empty or absolute (starting with a slash) or
461 * rootless (not starting with a slash). Implementations MUST support all
462 * three syntaxes.
463 *
464 * If the path is intended to be domain-relative rather than path relative then
465 * it must begin with a slash ("/"). Paths not starting with a slash ("/")
466 * are assumed to be relative to some base path known to the application or
467 * consumer.
468 *
469 * Users can provide both encoded and decoded path characters.
470 * Implementations ensure the correct encoding as outlined in getPath().
471 *
472 * @param string $path The path to use with the new instance.
473 *
474 * @return static A new instance with the specified path.
475 * @throws \InvalidArgumentException for invalid paths.
476 */
477 public function withPath($path)
478 {
479 if (!is_string($path)) {
480 throw new \InvalidArgumentException('Invalid path provided. Must be of type string.', 1436717328);
481 }
482
483 if (strpos($path, '?') !== false) {
484 throw new \InvalidArgumentException('Invalid path provided. Must not contain a query string.', 1436717330);
485 }
486
487 if (strpos($path, '#') !== false) {
488 throw new \InvalidArgumentException('Invalid path provided; must not contain a URI fragment', 1436717332);
489 }
490
491 $path = $this->sanitizePath($path);
492 $clonedObject = clone $this;
493 $clonedObject->path = $path;
494 return $clonedObject;
495 }
496
497 /**
498 * Return an instance with the specified query string.
499 *
500 * This method MUST retain the state of the current instance, and return
501 * an instance that contains the specified query string.
502 *
503 * Users can provide both encoded and decoded query characters.
504 * Implementations ensure the correct encoding as outlined in getQuery().
505 *
506 * An empty query string value is equivalent to removing the query string.
507 *
508 * @param string $query The query string to use with the new instance.
509 *
510 * @return static A new instance with the specified query string.
511 * @throws \InvalidArgumentException for invalid query strings.
512 */
513 public function withQuery($query)
514 {
515 if (!is_string($query)) {
516 throw new \InvalidArgumentException('Query string must be a string.', 1436717334);
517 }
518
519 if (strpos($query, '#') !== false) {
520 throw new \InvalidArgumentException('Query string must not include a URI fragment.', 1436717336);
521 }
522
523 $query = $this->sanitizeQuery($query);
524 $clonedObject = clone $this;
525 $clonedObject->query = $query;
526 return $clonedObject;
527 }
528
529 /**
530 * Return an instance with the specified URI fragment.
531 *
532 * This method MUST retain the state of the current instance, and return
533 * an instance that contains the specified URI fragment.
534 *
535 * Users can provide both encoded and decoded fragment characters.
536 * Implementations ensure the correct encoding as outlined in getFragment().
537 *
538 * An empty fragment value is equivalent to removing the fragment.
539 *
540 * @param string $fragment The fragment to use with the new instance.
541 *
542 * @return static A new instance with the specified fragment.
543 */
544 public function withFragment($fragment)
545 {
546 $fragment = $this->sanitizeFragment($fragment);
547 $clonedObject = clone $this;
548 $clonedObject->fragment = $fragment;
549 return $clonedObject;
550 }
551
552 /**
553 * Return the string representation as a URI reference.
554 *
555 * Depending on which components of the URI are present, the resulting
556 * string is either a full URI or relative reference according to RFC 3986,
557 * Section 4.1. The method concatenates the various components of the URI,
558 * using the appropriate delimiters:
559 *
560 * - If a scheme is present, it MUST be suffixed by ":".
561 * - If an authority is present, it MUST be prefixed by "//".
562 * - The path can be concatenated without delimiters. But there are two
563 * cases where the path has to be adjusted to make the URI reference
564 * valid as PHP does not allow to throw an exception in __toString():
565 * - If the path is rootless and an authority is present, the path MUST
566 * be prefixed by "/".
567 * - If the path is starting with more than one "/" and no authority is
568 * present, the starting slashes MUST be reduced to one.
569 * - If a query is present, it MUST be prefixed by "?".
570 * - If a fragment is present, it MUST be prefixed by "#".
571 *
572 * @see http://tools.ietf.org/html/rfc3986#section-4.1
573 * @return string
574 */
575 public function __toString()
576 {
577 $uri = '';
578
579 if (!empty($this->scheme)) {
580 $uri .= $this->scheme . ':';
581 }
582
583 $authority = $this->getAuthority();
584 if (!empty($authority)) {
585 $uri .= '//' . $authority;
586 }
587
588 $path = $this->getPath();
589 if (!empty($path)) {
590 $uri .= '/' . ltrim($path, '/');
591 }
592
593 if ($this->query) {
594 $uri .= '?' . $this->query;
595 }
596 if ($this->fragment) {
597 $uri .= '#' . $this->fragment;
598 }
599 return $uri;
600 }
601
602 /**
603 * Is a given port non-standard for the current scheme?
604 *
605 * @param string $scheme
606 * @param string $host
607 * @param int $port
608 * @return bool
609 */
610 protected function isNonStandardPort($scheme, $host, $port)
611 {
612 if (empty($scheme)) {
613 return empty($host) || !empty($port);
614 }
615
616 if (empty($host) || empty($port)) {
617 return false;
618 }
619
620 return !isset($this->supportedSchemes[$scheme]) || $port !== $this->supportedSchemes[$scheme];
621 }
622
623 /**
624 * Filters the scheme to ensure it is a valid scheme.
625 *
626 * @param string $scheme Scheme name.
627 *
628 * @return string Filtered scheme.
629 * @throws \InvalidArgumentException when a scheme is given which is not supported
630 */
631 protected function sanitizeScheme($scheme)
632 {
633 $scheme = strtolower($scheme);
634 $scheme = preg_replace('#:(//)?$#', '', $scheme);
635
636 if (empty($scheme)) {
637 return '';
638 }
639
640 if (!array_key_exists($scheme, $this->supportedSchemes)) {
641 throw new \InvalidArgumentException('Unsupported scheme "' . $scheme . '"; must be any empty string or in the set (' . implode(', ', array_keys($this->supportedSchemes)) . ')', 1436717338);
642 }
643
644 return $scheme;
645 }
646
647 /**
648 * Filters the path of a URI to ensure it is properly encoded.
649 *
650 * @param string $path
651 * @return string
652 */
653 protected function sanitizePath($path)
654 {
655 return preg_replace_callback(
656 '/(?:[^' . self::UNRESERVED_CHARLIST . ':@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/',
657 function ($matches) {
658 return rawurlencode($matches[0]);
659 },
660 $path
661 );
662 }
663
664 /**
665 * Filter a query string to ensure it is propertly encoded.
666 *
667 * Ensures that the values in the query string are properly urlencoded.
668 *
669 * @param string $query
670 * @return string
671 */
672 protected function sanitizeQuery($query)
673 {
674 if (!empty($query) && strpos($query, '?') === 0) {
675 $query = substr($query, 1);
676 }
677
678 $parts = explode('&', $query);
679 foreach ($parts as $index => $part) {
680 list($key, $value) = $this->splitQueryValue($part);
681 if ($value === null) {
682 $parts[$index] = $this->sanitizeQueryOrFragment($key);
683 continue;
684 }
685 $parts[$index] = $this->sanitizeQueryOrFragment($key) . '=' . $this->sanitizeQueryOrFragment($value);
686 }
687
688 return implode('&', $parts);
689 }
690
691 /**
692 * Split a query value into a key/value tuple.
693 *
694 * @param string $value
695 * @return array A value with exactly two elements, key and value
696 */
697 protected function splitQueryValue($value)
698 {
699 $data = explode('=', $value, 2);
700 if (count($data) === 1) {
701 $data[] = null;
702 }
703 return $data;
704 }
705
706 /**
707 * Filter a fragment value to ensure it is properly encoded.
708 *
709 * @param string|null $fragment
710 * @return string
711 */
712 protected function sanitizeFragment($fragment)
713 {
714 if ($fragment === null) {
715 $fragment = '';
716 }
717
718 if (!empty($fragment) && strpos($fragment, '#') === 0) {
719 $fragment = substr($fragment, 1);
720 }
721
722 return $this->sanitizeQueryOrFragment($fragment);
723 }
724
725 /**
726 * Filter a query string key or value, or a fragment.
727 *
728 * @param string $value
729 * @return string
730 */
731 protected function sanitizeQueryOrFragment($value)
732 {
733 return preg_replace_callback(
734 '/(?:[^' . self::UNRESERVED_CHARLIST . self::SUBDELIMITER_CHARLIST . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/',
735 function ($matches) {
736 return rawurlencode($matches[0]);
737 },
738 $value
739 );
740 }
741 }