#9474: Integrate OpenID authentication support to TYPO3
[Packages/TYPO3.CMS.git] / typo3 / sysext / openid / lib / php-openid / Auth / OpenID / Parse.php
1 <?php
2
3 /**
4 * This module implements a VERY limited parser that finds <link> tags
5 * in the head of HTML or XHTML documents and parses out their
6 * attributes according to the OpenID spec. It is a liberal parser,
7 * but it requires these things from the data in order to work:
8 *
9 * - There must be an open <html> tag
10 *
11 * - There must be an open <head> tag inside of the <html> tag
12 *
13 * - Only <link>s that are found inside of the <head> tag are parsed
14 * (this is by design)
15 *
16 * - The parser follows the OpenID specification in resolving the
17 * attributes of the link tags. This means that the attributes DO
18 * NOT get resolved as they would by an XML or HTML parser. In
19 * particular, only certain entities get replaced, and href
20 * attributes do not get resolved relative to a base URL.
21 *
22 * From http://openid.net/specs.bml:
23 *
24 * - The openid.server URL MUST be an absolute URL. OpenID consumers
25 * MUST NOT attempt to resolve relative URLs.
26 *
27 * - The openid.server URL MUST NOT include entities other than &amp;,
28 * &lt;, &gt;, and &quot;.
29 *
30 * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
31 * of quoting are allowed for attributes.
32 *
33 * The parser deals with invalid markup in these ways:
34 *
35 * - Tag names are not case-sensitive
36 *
37 * - The <html> tag is accepted even when it is not at the top level
38 *
39 * - The <head> tag is accepted even when it is not a direct child of
40 * the <html> tag, but a <html> tag must be an ancestor of the
41 * <head> tag
42 *
43 * - <link> tags are accepted even when they are not direct children
44 * of the <head> tag, but a <head> tag must be an ancestor of the
45 * <link> tag
46 *
47 * - If there is no closing tag for an open <html> or <head> tag, the
48 * remainder of the document is viewed as being inside of the
49 * tag. If there is no closing tag for a <link> tag, the link tag is
50 * treated as a short tag. Exceptions to this rule are that <html>
51 * closes <html> and <body> or <head> closes <head>
52 *
53 * - Attributes of the <link> tag are not required to be quoted.
54 *
55 * - In the case of duplicated attribute names, the attribute coming
56 * last in the tag will be the value returned.
57 *
58 * - Any text that does not parse as an attribute within a link tag
59 * will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
60 * ignore pumpkin)
61 *
62 * - If there are more than one <html> or <head> tag, the parser only
63 * looks inside of the first one.
64 *
65 * - The contents of <script> tags are ignored entirely, except
66 * unclosed <script> tags. Unclosed <script> tags are ignored.
67 *
68 * - Any other invalid markup is ignored, including unclosed SGML
69 * comments and unclosed <![CDATA[blocks.
70 *
71 * PHP versions 4 and 5
72 *
73 * LICENSE: See the COPYING file included in this distribution.
74 *
75 * @access private
76 * @package OpenID
77 * @author JanRain, Inc. <openid@janrain.com>
78 * @copyright 2005-2008 Janrain, Inc.
79 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache
80 */
81
82 /**
83 * Require Auth_OpenID::arrayGet().
84 */
85 require_once "Auth/OpenID.php";
86
87 class Auth_OpenID_Parse {
88
89 /**
90 * Specify some flags for use with regex matching.
91 */
92 var $_re_flags = "si";
93
94 /**
95 * Stuff to remove before we start looking for tags
96 */
97 var $_removed_re =
98 "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
99
100 /**
101 * Starts with the tag name at a word boundary, where the tag name
102 * is not a namespace
103 */
104 var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))";
105
106 var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
107
108 var $_open_tag_expr = "<%s\b";
109 var $_close_tag_expr = "<((\/%s\b)|(%s[^>\/]*\/))>";
110
111 function Auth_OpenID_Parse()
112 {
113 $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
114 $this->_re_flags);
115
116 $this->_entity_replacements = array(
117 'amp' => '&',
118 'lt' => '<',
119 'gt' => '>',
120 'quot' => '"'
121 );
122
123 $this->_attr_find = sprintf("/%s/%s",
124 $this->_attr_find,
125 $this->_re_flags);
126
127 $this->_removed_re = sprintf("/%s/%s",
128 $this->_removed_re,
129 $this->_re_flags);
130
131 $this->_ent_replace =
132 sprintf("&(%s);", implode("|",
133 $this->_entity_replacements));
134 }
135
136 /**
137 * Returns a regular expression that will match a given tag in an
138 * SGML string.
139 */
140 function tagMatcher($tag_name, $close_tags = null)
141 {
142 $expr = $this->_tag_expr;
143
144 if ($close_tags) {
145 $options = implode("|", array_merge(array($tag_name), $close_tags));
146 $closer = sprintf("(?:%s)", $options);
147 } else {
148 $closer = $tag_name;
149 }
150
151 $expr = sprintf($expr, $tag_name, $closer);
152 return sprintf("/%s/%s", $expr, $this->_re_flags);
153 }
154
155 function openTag($tag_name)
156 {
157 $expr = sprintf($this->_open_tag_expr, $tag_name);
158 return sprintf("/%s/%s", $expr, $this->_re_flags);
159 }
160
161 function closeTag($tag_name)
162 {
163 $expr = sprintf($this->_close_tag_expr, $tag_name, $tag_name);
164 return sprintf("/%s/%s", $expr, $this->_re_flags);
165 }
166
167 function htmlBegin($s)
168 {
169 $matches = array();
170 $result = preg_match($this->openTag('html'), $s,
171 $matches, PREG_OFFSET_CAPTURE);
172 if ($result === false || !$matches) {
173 return false;
174 }
175 // Return the offset of the first match.
176 return $matches[0][1];
177 }
178
179 function htmlEnd($s)
180 {
181 $matches = array();
182 $result = preg_match($this->closeTag('html'), $s,
183 $matches, PREG_OFFSET_CAPTURE);
184 if ($result === false || !$matches) {
185 return false;
186 }
187 // Return the offset of the first match.
188 return $matches[count($matches) - 1][1];
189 }
190
191 function headFind()
192 {
193 return $this->tagMatcher('head', array('body', 'html'));
194 }
195
196 function replaceEntities($str)
197 {
198 foreach ($this->_entity_replacements as $old => $new) {
199 $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
200 }
201 return $str;
202 }
203
204 function removeQuotes($str)
205 {
206 $matches = array();
207 $double = '/^"(.*)"$/';
208 $single = "/^\'(.*)\'$/";
209
210 if (preg_match($double, $str, $matches)) {
211 return $matches[1];
212 } else if (preg_match($single, $str, $matches)) {
213 return $matches[1];
214 } else {
215 return $str;
216 }
217 }
218
219 /**
220 * Find all link tags in a string representing a HTML document and
221 * return a list of their attributes.
222 *
223 * @param string $html The text to parse
224 * @return array $list An array of arrays of attributes, one for each
225 * link tag
226 */
227 function parseLinkAttrs($html)
228 {
229 $stripped = preg_replace($this->_removed_re,
230 "",
231 $html);
232
233 $html_begin = $this->htmlBegin($stripped);
234 $html_end = $this->htmlEnd($stripped);
235
236 if ($html_begin === false) {
237 return array();
238 }
239
240 if ($html_end === false) {
241 $html_end = strlen($stripped);
242 }
243
244 $stripped = substr($stripped, $html_begin,
245 $html_end - $html_begin);
246
247 // Try to find the <HEAD> tag.
248 $head_re = $this->headFind();
249 $head_matches = array();
250 if (!preg_match($head_re, $stripped, $head_matches)) {
251 return array();
252 }
253
254 $link_data = array();
255 $link_matches = array();
256
257 if (!preg_match_all($this->_link_find, $head_matches[0],
258 $link_matches)) {
259 return array();
260 }
261
262 foreach ($link_matches[0] as $link) {
263 $attr_matches = array();
264 preg_match_all($this->_attr_find, $link, $attr_matches);
265 $link_attrs = array();
266 foreach ($attr_matches[0] as $index => $full_match) {
267 $name = $attr_matches[1][$index];
268 $value = $this->replaceEntities(
269 $this->removeQuotes($attr_matches[2][$index]));
270
271 $link_attrs[strtolower($name)] = $value;
272 }
273 $link_data[] = $link_attrs;
274 }
275
276 return $link_data;
277 }
278
279 function relMatches($rel_attr, $target_rel)
280 {
281 // Does this target_rel appear in the rel_str?
282 // XXX: TESTME
283 $rels = preg_split("/\s+/", trim($rel_attr));
284 foreach ($rels as $rel) {
285 $rel = strtolower($rel);
286 if ($rel == $target_rel) {
287 return 1;
288 }
289 }
290
291 return 0;
292 }
293
294 function linkHasRel($link_attrs, $target_rel)
295 {
296 // Does this link have target_rel as a relationship?
297 // XXX: TESTME
298 $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null);
299 return ($rel_attr && $this->relMatches($rel_attr,
300 $target_rel));
301 }
302
303 function findLinksRel($link_attrs_list, $target_rel)
304 {
305 // Filter the list of link attributes on whether it has
306 // target_rel as a relationship.
307 // XXX: TESTME
308 $result = array();
309 foreach ($link_attrs_list as $attr) {
310 if ($this->linkHasRel($attr, $target_rel)) {
311 $result[] = $attr;
312 }
313 }
314
315 return $result;
316 }
317
318 function findFirstHref($link_attrs_list, $target_rel)
319 {
320 // Return the value of the href attribute for the first link
321 // tag in the list that has target_rel as a relationship.
322 // XXX: TESTME
323 $matches = $this->findLinksRel($link_attrs_list,
324 $target_rel);
325 if (!$matches) {
326 return null;
327 }
328 $first = $matches[0];
329 return Auth_OpenID::arrayGet($first, 'href', null);
330 }
331 }
332
333 function Auth_OpenID_legacy_discover($html_text, $server_rel,
334 $delegate_rel)
335 {
336 $p = new Auth_OpenID_Parse();
337
338 $link_attrs = $p->parseLinkAttrs($html_text);
339
340 $server_url = $p->findFirstHref($link_attrs,
341 $server_rel);
342
343 if ($server_url === null) {
344 return false;
345 } else {
346 $delegate_url = $p->findFirstHref($link_attrs,
347 $delegate_rel);
348 return array($delegate_url, $server_url);
349 }
350 }
351
352 ?>