Follow-up to bugfix #12324: Renamed sanitizeBackEndUrl() to sanitizeLocalUrl() in...
[Packages/TYPO3.CMS.git] / t3lib / class.t3lib_readmail.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 1999-2009 Kasper Skaarhoj (kasperYYYY@typo3.com)
6 * All rights reserved
7 *
8 * This script is part of the TYPO3 project. The TYPO3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 * A copy is found in the textfile GPL.txt and important notices to the license
17 * from the author is found in LICENSE.txt distributed with these scripts.
18 *
19 *
20 * This script is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * This copyright notice MUST APPEAR in all copies of the script!
26 ***************************************************************/
27 /**
28 * Contains a class with functions used to read email content
29 *
30 * $Id$
31 * Revised for TYPO3 3.6 May 2003 by Kasper Skaarhoj
32 *
33 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
34 */
35 /**
36 * [CLASS/FUNCTION INDEX of SCRIPT]
37 *
38 *
39 *
40 * 83: class t3lib_readmail
41 *
42 * SECTION: General
43 * 113: function getMessage($mailParts)
44 * 138: function getTextContent($content)
45 * 153: function getMailBoundaryParts($boundary,$content)
46 * 173: function getCType($str)
47 * 196: function analyseReturnError($c)
48 * 251: function decodeHeaderString($str)
49 * 279: function extractNameEmail($str)
50 * 308: function getContentTypeData($contentTypeStr)
51 * 331: function makeUnixDate($dateStr)
52 * 354: function getGMToffset($GMT)
53 * 368: function extractMailHeader($content,$limit=0)
54 * 399: function fullParse($content)
55 *
56 * TOTAL FUNCTIONS: 12
57 * (This index is automatically created/updated by the extension "extdeveval")
58 *
59 */
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 /**
76 * Functions used to read email content
77 * The class is still just a bunch of miscellaneous functions used to read content out of emails
78 *
79 * @author Kasper Skaarhoj <kasperYYYY@typo3.com>
80 * @package TYPO3
81 * @subpackage t3lib
82 */
83 class t3lib_readmail {
84 var $dateAbbrevs = array(
85 'JAN' => 1,
86 'FEB' => 2,
87 'MAR' => 3,
88 'APR' => 4,
89 'MAY' => 5,
90 'JUN' => 6,
91 'JUL' => 7,
92 'AUG' => 8,
93 'SEP' => 9,
94 'OCT' => 10,
95 'NOV' => 11,
96 'DEC' => 12
97 );
98 var $serverGMToffsetMinutes = 60; // = +0100 (CET)
99
100 /*******************************
101 *
102 * General
103 *
104 ********************************/
105
106 /**
107 * Returns the text content of a mail which has previously been parsed by eg. extractMailHeader()
108 * Probably obsolete since the function fullParse() is more advanced and safer to use.
109 *
110 * @param array Output from extractMailHeader()
111 * @return string The content.
112 */
113 function getMessage($mailParts) {
114 if ($mailParts['content-type']) {
115 $CType = $this->getCType($mailParts['content-type']);
116 if ($CType['boundary']) {
117 $parts = $this->getMailBoundaryParts($CType['boundary'],$mailParts['CONTENT']);
118 $c=$this->getTextContent($parts[0]);
119 } else {
120 $c=$this->getTextContent(
121 'Content-Type: '.$mailParts['content-type'].'
122 '.$mailParts['CONTENT']
123 );
124 }
125 } else {
126 $c = $mailParts['CONTENT'];
127 }
128 return $c;
129 }
130
131 /**
132 * Returns the body part of a raw mail message (including headers)
133 * Probably obsolete since the function fullParse() is more advanced and safer to use.
134 *
135 * @param string Raw mail content
136 * @return string Body of message
137 */
138 function getTextContent($content) {
139 $p=$this->extractMailHeader($content);
140 // Here some decoding might be needed...
141 // However we just return what is believed to be the proper notification:
142 return $p['CONTENT'];
143 }
144
145 /**
146 * Splits the body of a mail into parts based on the boundary string given.
147 * Obsolete, use fullParse()
148 *
149 * @param string Boundary string used to split the content.
150 * @param string BODY section of a mail
151 * @return array Parts of the mail based on this
152 */
153 function getMailBoundaryParts($boundary,$content) {
154 $mParts = explode('--'.$boundary,$content);
155 unset($mParts[0]);
156 reset($mParts);
157 $new=array();
158 while(list(,$val)=each($mParts)) {
159 if (trim($val)=='--') break;
160 $new[] = ltrim($val);
161 }
162 return $new;
163 }
164
165 /**
166 * Returns Content Type plus more.
167 * Obsolete, use fullParse()
168 *
169 * @param string "ContentType" string with more
170 * @return array parts in key/value pairs
171 * @ignore
172 */
173 function getCType($str) {
174 $parts = explode(';',$str);
175 $cTypes=array();
176 $cTypes['ContentType']=$parts[0];
177 next($parts);
178 while(list(,$ppstr)=each($parts)) {
179 $mparts = explode('=',$ppstr,2);
180 if (count($mparts)>1) {
181 $cTypes[strtolower(trim($mparts[0]))]=preg_replace('/^"/','',trim(preg_replace('/"$/','',trim($mparts[1]))));
182 } else {
183 $cTypes[]=$ppstr;
184 }
185 }
186 return $cTypes;
187 }
188
189 /**
190 * Analyses the return-mail content for the Dmailer module - used to find what reason there was for rejecting the mail
191 * Used by the Dmailer, but not exclusively.
192 *
193 * @param string message body/text
194 * @return array key/value pairs with analysis result. Eg. "reason", "content", "reason_text", "mailserver" etc.
195 */
196 function analyseReturnError($c) {
197 $cp=array();
198 if (strstr($c,'--- Below this line is a copy of the message.')) { // QMAIL
199 list($c)=explode('--- Below this line is a copy of the message.',$c); // Splits by the QMAIL divider
200 $cp['content']=trim($c);
201 $parts = explode('>:',$c,2);
202 $cp['reason_text']=trim($parts[1]);
203 $cp['mailserver']='Qmail';
204 if (preg_match('/550|no mailbox|account does not exist/i',$cp['reason_text'])) {
205 $cp['reason']=550; // 550 Invalid recipient
206 } elseif (stristr($cp['reason_text'],'couldn\'t find any host named')) {
207 $cp['reason']=2; // Bad host
208 } elseif (preg_match('/Error in Header|invalid Message-ID header/i',$cp['reason_text'])) {
209 $cp['reason']=554;
210 } else {
211 $cp['reason']=-1;
212 }
213 } elseif (strstr($c,'The Postfix program')) { // Postfix
214 $cp['content']=trim($c);
215 $parts = explode('>:',$c,2);
216 $cp['reason_text']=trim($parts[1]);
217 $cp['mailserver']='Postfix';
218 if (stristr($cp['reason_text'],'550')) {
219 $cp['reason']=550; // 550 Invalid recipient, User unknown
220 } elseif (stristr($cp['reason_text'],'553')) {
221 $cp['reason']=553; // No such user
222 } elseif (stristr($cp['reason_text'],'551')) {
223 $cp['reason']=551; // Mailbox full
224 } else {
225 $cp['reason']=-1;
226 }
227 } else { // No-named:
228 $cp['content']=trim($c);
229 $cp['reason_text']=trim(substr($c,0,1000));
230 $cp['mailserver']='unknown';
231 if (preg_match('/Unknown Recipient|Delivery failed 550|Receiver not found|User not listed|recipient problem|Delivery to the following recipients failed|User unknown|recipient name is not recognized/i',$cp['reason_text'])) {
232 $cp['reason']=550; // 550 Invalid recipient, User unknown
233 } elseif (preg_match('/over quota|mailbox full/i',$cp['reason_text'])) {
234 $cp['reason']=551;
235 } elseif (preg_match('/Error in Header/i',$cp['reason_text'])) {
236 $cp['reason']=554;
237 } else {
238 $cp['reason']=-1;
239 }
240 }
241
242 return $cp;
243 }
244
245 /**
246 * Decodes a header-string with the =?....?= syntax including base64/quoted-printable encoding.
247 *
248 * @param string A string (encoded or not) from a mail header, like sender name etc.
249 * @return string The input string, but with the parts in =?....?= decoded.
250 */
251 function decodeHeaderString($str) {
252 $parts = explode('=?',$str,2);
253 if (count($parts)==2) {
254 list($charset,$encType,$encContent)=explode('?',$parts[1],3);
255 $subparts =explode('?=',$encContent,2);
256 $encContent=$subparts[0];
257
258 switch(strtolower($encType)) {
259 case 'q':
260 $encContent = quoted_printable_decode($encContent);
261 $encContent = str_replace('_',' ',$encContent);
262 break;
263 case 'b':
264 $encContent=base64_decode($encContent);
265 break;
266 }
267
268 $parts[1]=$encContent.$this->decodeHeaderString($subparts[1]); // Calls decodeHeaderString recursively for any subsequent encoded section.
269 }
270 return implode('',$parts);
271 }
272
273 /**
274 * Extracts name/email parts from a header field (like 'To:' or 'From:' with name/email mixed up.
275 *
276 * @param string Value from a header field containing name/email values.
277 * @return array Array with the name and email in. Email is validated, otherwise not set.
278 */
279 function extractNameEmail($str) {
280 $outArr=array();
281
282 // Email:
283 $reg='';
284 preg_match('/<([^>]*)>/',$str,$reg);
285 if (t3lib_div::validEmail($str)) {
286 $outArr['email']=$str;
287 } elseif ($reg[1] && t3lib_div::validEmail($reg[1])) {
288 $outArr['email']=$reg[1];
289 // Find name:
290 list($namePart)=explode($reg[0],$str);
291 if (trim($namePart)) {
292 $reg='';
293 preg_match('/"([^"]*)"/',$str,$reg);
294 if (trim($reg[1])) {
295 $outArr['name']=trim($reg[1]);
296 } else $outArr['name']=trim($namePart);
297 }
298 }
299 return $outArr;
300 }
301
302 /**
303 * Returns the data from the 'content-type' field. That is the boundary, charset and mime-type
304 *
305 * @param string "Content-type-string"
306 * @return array key/value pairs with the result.
307 */
308 function getContentTypeData($contentTypeStr) {
309 $outValue=array();
310 $cTypeParts = t3lib_div::trimExplode(';',$contentTypeStr,1);
311 $outValue['_MIME_TYPE']=$cTypeParts[0]; // content type, first value is supposed to be the mime-type, whatever after the first is something else.
312
313 reset($cTypeParts);
314 next($cTypeParts);
315 while(list(,$v)=Each($cTypeParts)) {
316 $reg='';
317 preg_match('/([^=]*)="(.*)"/i',$v,$reg);
318 if (trim($reg[1]) && trim($reg[2])) {
319 $outValue[strtolower($reg[1])] = $reg[2];
320 }
321 }
322 return $outValue;
323 }
324
325 /**
326 * Makes a UNIX-date based on the timestamp in the 'Date' header field.
327 *
328 * @param string String with a timestamp according to email standards.
329 * @return integer The timestamp converted to unix-time in seconds and compensated for GMT/CET ($this->serverGMToffsetMinutes);
330 */
331 function makeUnixDate($dateStr) {
332 $dateParts=explode(',',$dateStr);
333 $dateStr=count($dateParts)>1 ? $dateParts[1] : $dateParts[0];
334
335 $spaceParts = t3lib_div::trimExplode(' ',$dateStr,1);
336
337 $spaceParts[1]=$this->dateAbbrevs[strtoupper($spaceParts[1])];
338 $timeParts = explode(':',$spaceParts[3]);
339 $timeStamp = mktime ($timeParts[0], $timeParts[1], $timeParts[2], $spaceParts[1], $spaceParts[0], $spaceParts[2]);
340
341 $offset = $this->getGMToffset($spaceParts[4]);
342 $timeStamp-=($offset*60); // Compensates for GMT by subtracting the number of seconds which the date is offset from serverTime
343
344 return $timeStamp;
345 }
346
347 /**
348 * Parsing the GMT offset value from a mail timestamp.
349 *
350 * @param string A string like "+0100" or so.
351 * @return integer Minutes to offset the timestamp
352 * @access private
353 */
354 function getGMToffset($GMT) {
355 $GMToffset=substr($GMT,1,2)*60+substr($GMT,3,2);
356 $GMToffset*=substr($GMT,0,1)=='+'?1:-1;
357 $GMToffset-=$this->serverGMToffsetMinutes;
358 return $GMToffset;
359 }
360
361 /**
362 * This returns the mail header items in an array with associative keys and the mail body part in another CONTENT field
363 *
364 * @param string Raw mail content
365 * @param integer A safety limit that will put a upper length to how many header chars will be processed. Set to zero means that there is no limit. (Uses a simple substr() to limit the amount of mail data to process to avoid run-away)
366 * @return array An array where each key/value pair is a header-key/value pair. The mail BODY is returned in the key 'CONTENT' if $limit is not set!
367 */
368 function extractMailHeader($content,$limit=0) {
369 if ($limit) $content = substr($content,0,$limit);
370
371 $lines=explode(chr(10),ltrim($content));
372 $headers=array();
373 $p='';
374 while(list($k,$str)=each($lines)) {
375 if (!trim($str)) break; // header finished
376 $parts = explode(' ',$str,2);
377 if ($parts[0] && substr($parts[0],-1)==':') {
378 $p=strtolower(substr($parts[0],0,-1));
379 if (isset($headers[$p])) {
380 $headers[$p.'.'][]=$headers[$p];
381 $headers[$p]='';
382 }
383 $headers[$p]=trim($parts[1]);
384 } else {
385 $headers[$p].=' '.trim($str);
386 }
387 unset($lines[$k]);
388 }
389 if (!$limit) $headers['CONTENT']=ltrim(implode(chr(10),$lines));
390 return $headers;
391 }
392
393 /**
394 * The extended version of the extractMailHeader() which will also parse all the content body into an array and further process the header fields and decode content etc. Returns every part of the mail ready to go.
395 *
396 * @param string Raw email input.
397 * @return array Multidimensional array with all parts of the message organized nicely. Use t3lib_div::debug() to analyse it visually.
398 */
399 function fullParse($content) {
400 // *************************
401 // PROCESSING the HEADER part of the mail
402 // *************************
403
404 // Splitting header and body of mail:
405 $mailParts = $this->extractMailHeader($content);
406
407 // Decoding header values which potentially can be encoded by =?...?=
408 $list = explode(',','subject,thread-topic,from,to');
409 while(list(,$headerType)=each($list)) {
410 if (isset($mailParts[$headerType])) $mailParts[$headerType]=$this->decodeHeaderString($mailParts[$headerType]);
411 }
412 // Separating email/names from header fields which can contain email addresses.
413 $list = explode(',','from,to,reply-to,sender,return-path');
414 while(list(,$headerType)=each($list)) {
415 if (isset($mailParts[$headerType])) {
416 $mailParts['_'.strtoupper($headerType)]=$this->extractNameEmail($mailParts[$headerType]);
417 }
418 }
419 // Decode date from human-readable format to unix-time (includes compensation for GMT CET)
420 $mailParts['_DATE']=$this->makeUnixDate($mailParts['date']);
421
422 // Transfer encodings of body content
423 switch(strtolower($mailParts['content-transfer-encoding'])) {
424 case 'quoted-printable':
425 $mailParts['CONTENT']=quoted_printable_decode($mailParts['CONTENT']);
426 break;
427 case 'base64':
428 $mailParts['CONTENT']=base64_decode($mailParts['CONTENT']);
429 break;
430 }
431
432 // Content types
433 $mailParts['_CONTENT_TYPE_DAT']=$this->getContentTypeData($mailParts['content-type']);
434
435
436 // *************************
437 // PROCESSING the CONTENT part of the mail (the body)
438 // *************************
439 $cType = strtolower($mailParts['_CONTENT_TYPE_DAT']['_MIME_TYPE']);
440 $cType = substr($cType,0,9); // only looking for 'multipart' in string.
441 switch($cType) {
442 /* case 'multipart/mixed':
443 case 'multipart/related':
444 case 'multipart/alternative':
445 case 'multipart/signed':
446 */
447 case 'multipart':
448 if ($mailParts['_CONTENT_TYPE_DAT']['boundary']) {
449 $contentSectionParts = t3lib_div::trimExplode('--'.$mailParts['_CONTENT_TYPE_DAT']['boundary'],$mailParts['CONTENT'],1);
450 $contentSectionParts_proc=array();
451
452 reset($contentSectionParts);
453 while(list($k,$v)=each($contentSectionParts)) {
454 if (substr($v,0,2)=='--') break;
455 $contentSectionParts_proc[$k]=$this->fullParse($v);
456 }
457 $mailParts['CONTENT']=$contentSectionParts_proc;
458 } else $mailParts['CONTENT'] = 'ERROR: No boundary found.';
459 break;
460 default:
461 if (strtolower($mailParts['_CONTENT_TYPE_DAT']['charset'])=='utf-8') {
462 $mailParts['CONTENT']=utf8_decode($mailParts['CONTENT']);
463 }
464 break;
465 }
466 return $mailParts;
467 }
468 }
469
470 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_readmail.php']) {
471 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_readmail.php']);
472 }
473 ?>