[FEATURE] Introduce Request/Response based on PSR-7
[Packages/TYPO3.CMS.git] / typo3 / sysext / core / Classes / Http / Uri.php
1 <?php
2 namespace TYPO3\CMS\Core\Http;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 use Psr\Http\Message\UriInterface;
18
19 /**
20 * Represents a URI based on the PSR-7 Standard.
21 *
22 * Highly inspired by https://github.com/phly/http/
23 *
24 * @internal Note that this is not public API yet.
25 */
26 class Uri implements UriInterface {
27
28 /**
29 * Sub-delimiters used in query strings and fragments.
30 *
31 * @const string
32 */
33 const SUBDELIMITER_CHARLIST = '!\$&\'\(\)\*\+,;=';
34
35 /**
36 * Unreserved characters used in paths, query strings, and fragments.
37 *
38 * @const string
39 */
40 const UNRESERVED_CHARLIST = 'a-zA-Z0-9_\-\.~';
41
42 /**
43 * The default scheme for the URI
44 * @var string
45 */
46 protected $scheme;
47
48 /**
49 * @var int[] Associative array containing schemes and their default ports.
50 */
51 protected $supportedSchemes = array(
52 'http' => 80,
53 'https' => 443
54 );
55
56 /**
57 * The authority part of the URI
58 * @var string
59 */
60 protected $authority = '';
61
62 /**
63 * The userInfo part of the URI
64 * @var string
65 */
66 protected $userInfo = '';
67
68 /**
69 * The host part of the URI
70 * @var string
71 */
72 protected $host = '';
73
74 /**
75 * The port of the URI (empty if it is the standard port for the scheme)
76 * @var int|NULL
77 */
78 protected $port = NULL;
79
80 /**
81 * The path part of the URI (can be empty or /)
82 * @var string
83 */
84 protected $path = '';
85
86 /**
87 * The query part of the URI without the ?
88 * @var string
89 */
90 protected $query;
91
92 /**
93 * The fragment part of the URI without the # before
94 * @var string
95 */
96 protected $fragment;
97
98 /**
99 * @param string|null $uri The full URI including query string and fragment
100 * @throws \InvalidArgumentException when the URI is not a string
101 */
102 public function __construct($uri = '') {
103 if (!is_string($uri)) {
104 $argumentType = is_object($uri) ? get_class($uri) : gettype($uri);
105 throw new \InvalidArgumentException('URI passed must be a string, but is of type "' . $argumentType . '"', 1436717320);
106 }
107 if (!empty($uri)) {
108 $this->parseUri($uri);
109 }
110 }
111
112 /**
113 * helper function for parsing the full URI string
114 * @param string $uri
115 * @throws \InvalidArgumentException if the URI is malformed.
116 */
117 protected function parseUri($uri) {
118 $uriParts = parse_url($uri);
119
120 if ($uriParts === FALSE) {
121 throw new \InvalidArgumentException('The parsedUri string appears to be malformed', 1436717322);
122 }
123
124 if (isset($uriParts['scheme'])) {
125 $this->scheme = $this->sanitizeScheme($uriParts['scheme']);
126 }
127
128 if (isset($uriParts['user'])) {
129 $this->userInfo = $uriParts['user'];
130 if (isset($uriParts['pass'])) {
131 $this->userInfo .= ':' . $uriParts['pass'];
132 }
133 }
134
135 if (isset($uriParts['host'])) {
136 $this->host = $uriParts['host'];
137 }
138
139 if (isset($uriParts['port'])) {
140 $this->port = (int)$uriParts['port'];
141 }
142
143 if (isset($uriParts['path'])) {
144 $this->path = $this->sanitizePath($uriParts['path']);
145 }
146
147 if (isset($uriParts['query'])) {
148 $this->query = $this->sanitizeQuery($uriParts['query']);
149 }
150
151 if (isset($uriParts['fragment'])) {
152 $this->fragment = $this->sanitizeFragment($uriParts['fragment']);
153 }
154 }
155
156 /**
157 * Retrieve the scheme component of the URI.
158 *
159 * If no scheme is present, this method MUST return an empty string.
160 *
161 * The value returned MUST be normalized to lowercase, per RFC 3986
162 * Section 3.1.
163 *
164 * The trailing ":" character is not part of the scheme and MUST NOT be
165 * added.
166 *
167 * @see https://tools.ietf.org/html/rfc3986#section-3.1
168 * @return string The URI scheme.
169 */
170 public function getScheme() {
171 return $this->scheme;
172 }
173
174 /**
175 * Retrieve the authority component of the URI.
176 *
177 * If no authority information is present, this method MUST return an empty
178 * string.
179 *
180 * The authority syntax of the URI is:
181 *
182 * <pre>
183 * [user-info@]host[:port]
184 * </pre>
185 *
186 * If the port component is not set or is the standard port for the current
187 * scheme, it SHOULD NOT be included.
188 *
189 * @see https://tools.ietf.org/html/rfc3986#section-3.2
190 * @return string The URI authority, in "[user-info@]host[:port]" format.
191 */
192 public function getAuthority() {
193 if (empty($this->host)) {
194 return '';
195 }
196
197 $authority = $this->host;
198 if (!empty($this->userInfo)) {
199 $authority = $this->userInfo . '@' . $authority;
200 }
201
202 if ($this->isNonStandardPort($this->scheme, $this->host, $this->port)) {
203 $authority .= ':' . $this->port;
204 }
205
206 return $authority;
207 }
208
209 /**
210 * Retrieve the user information component of the URI.
211 *
212 * If no user information is present, this method MUST return an empty
213 * string.
214 *
215 * If a user is present in the URI, this will return that value;
216 * additionally, if the password is also present, it will be appended to the
217 * user value, with a colon (":") separating the values.
218 *
219 * The trailing "@" character is not part of the user information and MUST
220 * NOT be added.
221 *
222 * @return string The URI user information, in "username[:password]" format.
223 */
224 public function getUserInfo() {
225 return $this->userInfo;
226 }
227
228 /**
229 * Retrieve the host component of the URI.
230 *
231 * If no host is present, this method MUST return an empty string.
232 *
233 * The value returned MUST be normalized to lowercase, per RFC 3986
234 * Section 3.2.2.
235 *
236 * @see http://tools.ietf.org/html/rfc3986#section-3.2.2
237 * @return string The URI host.
238 */
239 public function getHost() {
240 return $this->host;
241 }
242
243 /**
244 * Retrieve the port component of the URI.
245 *
246 * If a port is present, and it is non-standard for the current scheme,
247 * this method MUST return it as an integer. If the port is the standard port
248 * used with the current scheme, this method SHOULD return null.
249 *
250 * If no port is present, and no scheme is present, this method MUST return
251 * a null value.
252 *
253 * If no port is present, but a scheme is present, this method MAY return
254 * the standard port for that scheme, but SHOULD return null.
255 *
256 * @return null|int The URI port.
257 */
258 public function getPort() {
259 return $this->isNonStandardPort($this->scheme, $this->host, $this->port) ? $this->port : NULL;
260 }
261
262 /**
263 * Retrieve the path component of the URI.
264 *
265 * The path can either be empty or absolute (starting with a slash) or
266 * rootless (not starting with a slash). Implementations MUST support all
267 * three syntaxes.
268 *
269 * Normally, the empty path "" and absolute path "/" are considered equal as
270 * defined in RFC 7230 Section 2.7.3. But this method MUST NOT automatically
271 * do this normalization because in contexts with a trimmed base path, e.g.
272 * the front controller, this difference becomes significant. It's the task
273 * of the user to handle both "" and "/".
274 *
275 * The value returned MUST be percent-encoded, but MUST NOT double-encode
276 * any characters. To determine what characters to encode, please refer to
277 * RFC 3986, Sections 2 and 3.3.
278 *
279 * As an example, if the value should include a slash ("/") not intended as
280 * delimiter between path segments, that value MUST be passed in encoded
281 * form (e.g., "%2F") to the instance.
282 *
283 * @see https://tools.ietf.org/html/rfc3986#section-2
284 * @see https://tools.ietf.org/html/rfc3986#section-3.3
285 * @return string The URI path.
286 */
287 public function getPath() {
288 return $this->path;
289 }
290
291 /**
292 * Retrieve the query string of the URI.
293 *
294 * If no query string is present, this method MUST return an empty string.
295 *
296 * The leading "?" character is not part of the query and MUST NOT be
297 * added.
298 *
299 * The value returned MUST be percent-encoded, but MUST NOT double-encode
300 * any characters. To determine what characters to encode, please refer to
301 * RFC 3986, Sections 2 and 3.4.
302 *
303 * As an example, if a value in a key/value pair of the query string should
304 * include an ampersand ("&") not intended as a delimiter between values,
305 * that value MUST be passed in encoded form (e.g., "%26") to the instance.
306 *
307 * @see https://tools.ietf.org/html/rfc3986#section-2
308 * @see https://tools.ietf.org/html/rfc3986#section-3.4
309 * @return string The URI query string.
310 */
311 public function getQuery() {
312 return $this->query;
313 }
314
315 /**
316 * Retrieve the fragment component of the URI.
317 *
318 * If no fragment is present, this method MUST return an empty string.
319 *
320 * The leading "#" character is not part of the fragment and MUST NOT be
321 * added.
322 *
323 * The value returned MUST be percent-encoded, but MUST NOT double-encode
324 * any characters. To determine what characters to encode, please refer to
325 * RFC 3986, Sections 2 and 3.5.
326 *
327 * @see https://tools.ietf.org/html/rfc3986#section-2
328 * @see https://tools.ietf.org/html/rfc3986#section-3.5
329 * @return string The URI fragment.
330 */
331 public function getFragment() {
332 return $this->fragment;
333 }
334
335 /**
336 * Return an instance with the specified scheme.
337 *
338 * This method MUST retain the state of the current instance, and return
339 * an instance that contains the specified scheme.
340 *
341 * Implementations MUST support the schemes "http" and "https" case
342 * insensitively, and MAY accommodate other schemes if required.
343 *
344 * An empty scheme is equivalent to removing the scheme.
345 *
346 * @param string $scheme The scheme to use with the new instance.
347 *
348 * @return self A new instance with the specified scheme.
349 * @throws \InvalidArgumentException for invalid or unsupported schemes.
350 */
351 public function withScheme($scheme) {
352 $scheme = $this->sanitizeScheme($scheme);
353
354 $clonedObject = clone $this;
355 $clonedObject->scheme = $scheme;
356 return $clonedObject;
357 }
358
359 /**
360 * Return an instance with the specified user information.
361 *
362 * This method MUST retain the state of the current instance, and return
363 * an instance that contains the specified user information.
364 *
365 * Password is optional, but the user information MUST include the
366 * user; an empty string for the user is equivalent to removing user
367 * information.
368 *
369 * @param string $user The user name to use for authority.
370 * @param null|string $password The password associated with $user.
371 *
372 * @return self A new instance with the specified user information.
373 */
374 public function withUserInfo($user, $password = NULL) {
375
376 $userInfo = $user;
377 if (!empty($password)) {
378 $userInfo .= ':' . $password;
379 }
380
381 $clonedObject = clone $this;
382 $clonedObject->userInfo = $userInfo;
383 return $clonedObject;
384 }
385
386 /**
387 * Return an instance with the specified host.
388 *
389 * This method MUST retain the state of the current instance, and return
390 * an instance that contains the specified host.
391 *
392 * An empty host value is equivalent to removing the host.
393 *
394 * @param string $host The hostname to use with the new instance.
395 *
396 * @return self A new instance with the specified host.
397 * @throws \InvalidArgumentException for invalid hostnames.
398 */
399 public function withHost($host) {
400 $clonedObject = clone $this;
401 $clonedObject->host = $host;
402 return $clonedObject;
403 }
404
405 /**
406 * Return an instance with the specified port.
407 *
408 * This method MUST retain the state of the current instance, and return
409 * an instance that contains the specified port.
410 *
411 * Implementations MUST raise an exception for ports outside the
412 * established TCP and UDP port ranges.
413 *
414 * A null value provided for the port is equivalent to removing the port
415 * information.
416 *
417 * @param null|int $port The port to use with the new instance; a null value
418 * removes the port information.
419 *
420 * @return self A new instance with the specified port.
421 * @throws \InvalidArgumentException for invalid ports.
422 */
423 public function withPort($port) {
424 if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($port) === FALSE) {
425 $argumentType = is_object($port) ? get_class($port) : gettype($port);
426 throw new \InvalidArgumentException('Invalid port "' . $argumentType . '" specified, must be an integer.', 1436717324);
427 }
428
429 $port = (int)$port;
430 if ($port < 1 || $port > 65535) {
431 throw new \InvalidArgumentException('Invalid port "' . $port . '" specified, must be a valid TCP/UDP port.', 1436717326);
432 }
433
434 $clonedObject = clone $this;
435 $clonedObject->port = $port;
436 return $clonedObject;
437 }
438
439 /**
440 * Return an instance with the specified path.
441 *
442 * This method MUST retain the state of the current instance, and return
443 * an instance that contains the specified path.
444 *
445 * The path can either be empty or absolute (starting with a slash) or
446 * rootless (not starting with a slash). Implementations MUST support all
447 * three syntaxes.
448 *
449 * If the path is intended to be domain-relative rather than path relative then
450 * it must begin with a slash ("/"). Paths not starting with a slash ("/")
451 * are assumed to be relative to some base path known to the application or
452 * consumer.
453 *
454 * Users can provide both encoded and decoded path characters.
455 * Implementations ensure the correct encoding as outlined in getPath().
456 *
457 * @param string $path The path to use with the new instance.
458 *
459 * @return self A new instance with the specified path.
460 * @throws \InvalidArgumentException for invalid paths.
461 */
462 public function withPath($path) {
463 if (!is_string($path)) {
464 throw new \InvalidArgumentException('Invalid path provided. Must be of type string.', 1436717328);
465 }
466
467 if (strpos($path, '?') !== FALSE) {
468 throw new \InvalidArgumentException('Invalid path provided. Must not contain a query string.', 1436717330);
469 }
470
471 if (strpos($path, '#') !== FALSE) {
472 throw new \InvalidArgumentException('Invalid path provided; must not contain a URI fragment', 1436717332);
473 }
474
475 $path = $this->sanitizePath($path);
476 $clonedObject = clone $this;
477 $clonedObject->path = $path;
478 return $clonedObject;
479 }
480
481 /**
482 * Return an instance with the specified query string.
483 *
484 * This method MUST retain the state of the current instance, and return
485 * an instance that contains the specified query string.
486 *
487 * Users can provide both encoded and decoded query characters.
488 * Implementations ensure the correct encoding as outlined in getQuery().
489 *
490 * An empty query string value is equivalent to removing the query string.
491 *
492 * @param string $query The query string to use with the new instance.
493 *
494 * @return self A new instance with the specified query string.
495 * @throws \InvalidArgumentException for invalid query strings.
496 */
497 public function withQuery($query) {
498 if (!is_string($query)) {
499 throw new \InvalidArgumentException('Query string must be a string.', 1436717334);
500 }
501
502 if (strpos($query, '#') !== FALSE) {
503 throw new \InvalidArgumentException('Query string must not include a URI fragment.', 1436717336);
504 }
505
506 $query = $this->sanitizeQuery($query);
507 $clonedObject = clone $this;
508 $clonedObject->query = $query;
509 return $clonedObject;
510 }
511
512 /**
513 * Return an instance with the specified URI fragment.
514 *
515 * This method MUST retain the state of the current instance, and return
516 * an instance that contains the specified URI fragment.
517 *
518 * Users can provide both encoded and decoded fragment characters.
519 * Implementations ensure the correct encoding as outlined in getFragment().
520 *
521 * An empty fragment value is equivalent to removing the fragment.
522 *
523 * @param string $fragment The fragment to use with the new instance.
524 *
525 * @return self A new instance with the specified fragment.
526 */
527 public function withFragment($fragment) {
528 $fragment = $this->sanitizeFragment($fragment);
529 $clonedObject = clone $this;
530 $clonedObject->fragment = $fragment;
531 return $clonedObject;
532 }
533
534 /**
535 * Return the string representation as a URI reference.
536 *
537 * Depending on which components of the URI are present, the resulting
538 * string is either a full URI or relative reference according to RFC 3986,
539 * Section 4.1. The method concatenates the various components of the URI,
540 * using the appropriate delimiters:
541 *
542 * - If a scheme is present, it MUST be suffixed by ":".
543 * - If an authority is present, it MUST be prefixed by "//".
544 * - The path can be concatenated without delimiters. But there are two
545 * cases where the path has to be adjusted to make the URI reference
546 * valid as PHP does not allow to throw an exception in __toString():
547 * - If the path is rootless and an authority is present, the path MUST
548 * be prefixed by "/".
549 * - If the path is starting with more than one "/" and no authority is
550 * present, the starting slashes MUST be reduced to one.
551 * - If a query is present, it MUST be prefixed by "?".
552 * - If a fragment is present, it MUST be prefixed by "#".
553 *
554 * @see http://tools.ietf.org/html/rfc3986#section-4.1
555 * @return string
556 */
557 public function __toString() {
558 $uri = '';
559
560 if (!empty($this->scheme)) {
561 $uri .= $this->scheme . '://';
562 }
563
564 $authority = $this->getAuthority();
565 if (!empty($authority)) {
566 $uri .= $authority;
567 }
568
569 $path = $this->getPath();
570 if (!empty($path)) {
571 $uri .= '/' . ltrim($path, '/');
572 }
573
574 if ($this->query) {
575 $uri .= '?' . $this->query;
576 }
577 if ($this->fragment) {
578 $uri .= '#' . $this->fragment;
579 }
580 return $uri;
581 }
582
583 /**
584 * Is a given port non-standard for the current scheme?
585 *
586 * @param string $scheme
587 * @param string $host
588 * @param int $port
589 * @return bool
590 */
591 protected function isNonStandardPort($scheme, $host, $port) {
592 if (empty($scheme)) {
593 return TRUE;
594 }
595
596 if (empty($host) || empty($port)) {
597 return FALSE;
598 }
599
600 return !isset($this->supportedSchemes[$scheme]) || $port !== $this->supportedSchemes[$scheme];
601 }
602
603 /**
604 * Filters the scheme to ensure it is a valid scheme.
605 *
606 * @param string $scheme Scheme name.
607 *
608 * @return string Filtered scheme.
609 * @throws \InvalidArgumentException when a scheme is given which is not supported
610 */
611 protected function sanitizeScheme($scheme) {
612 $scheme = strtolower($scheme);
613 $scheme = preg_replace('#:(//)?$#', '', $scheme);
614
615 if (empty($scheme)) {
616 return '';
617 }
618
619 if (!array_key_exists($scheme, $this->supportedSchemes)) {
620 throw new \InvalidArgumentException('Unsupported scheme "' . $scheme . '"; must be any empty string or in the set (' . implode(', ', array_keys($this->supportedSchemes)) . ')', 1436717338);
621 }
622
623 return $scheme;
624 }
625
626 /**
627 * Filters the path of a URI to ensure it is properly encoded.
628 *
629 * @param string $path
630 * @return string
631 */
632 protected function sanitizePath($path) {
633 return preg_replace_callback(
634 '/(?:[^' . self::UNRESERVED_CHARLIST . ':@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/',
635 function($matches) {
636 return rawurlencode($matches[0]);
637 },
638 $path
639 );
640 }
641
642 /**
643 * Filter a query string to ensure it is propertly encoded.
644 *
645 * Ensures that the values in the query string are properly urlencoded.
646 *
647 * @param string $query
648 * @return string
649 */
650 protected function sanitizeQuery($query) {
651 if (!empty($query) && strpos($query, '?') === 0) {
652 $query = substr($query, 1);
653 }
654
655 $parts = explode('&', $query);
656 foreach ($parts as $index => $part) {
657 list($key, $value) = $this->splitQueryValue($part);
658 if ($value === NULL) {
659 $parts[$index] = $this->sanitizeQueryOrFragment($key);
660 continue;
661 }
662 $parts[$index] = $this->sanitizeQueryOrFragment($key) . '=' . $this->sanitizeQueryOrFragment($value);
663 }
664
665 return implode('&', $parts);
666 }
667
668 /**
669 * Split a query value into a key/value tuple.
670 *
671 * @param string $value
672 * @return array A value with exactly two elements, key and value
673 */
674 protected function splitQueryValue($value) {
675 $data = explode('=', $value, 2);
676 if (count($data) === 1) {
677 $data[] = NULL;
678 }
679 return $data;
680 }
681
682 /**
683 * Filter a fragment value to ensure it is properly encoded.
684 *
685 * @param null|string $fragment
686 * @return string
687 */
688 protected function sanitizeFragment($fragment) {
689 if ($fragment === NULL) {
690 $fragment = '';
691 }
692
693 if (!empty($fragment) && strpos($fragment, '#') === 0) {
694 $fragment = substr($fragment, 1);
695 }
696
697 return $this->sanitizeQueryOrFragment($fragment);
698 }
699
700 /**
701 * Filter a query string key or value, or a fragment.
702 *
703 * @param string $value
704 * @return string
705 */
706 protected function sanitizeQueryOrFragment($value) {
707 return preg_replace_callback(
708 '/(?:[^' . self::UNRESERVED_CHARLIST . self::SUBDELIMITER_CHARLIST . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/',
709 function($matches) {
710 return rawurlencode($matches[0]);
711 },
712 $value
713 );
714 }
715
716 }