Follow up for issue #9771: htmlArea RTE spellchecker not working in TYPO3 4.3-dev
[Packages/TYPO3.CMS.git] / typo3 / sysext / rtehtmlarea / pi1 / class.tx_rtehtmlarea_pi1.php
1 <?php
2 /***************************************************************
3 * Copyright notice
4 *
5 * (c) 2003-2008 Stanislas Rolland <typo3(arobas)sjbr.ca>
6 * All rights reserved
7 *
8 * This script is part of the Typo3 project. The Typo3 project is
9 * free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * The GNU General Public License can be found at
15 * http://www.gnu.org/copyleft/gpl.html.
16 *
17 * This script is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * This copyright notice MUST APPEAR in all copies of the script!
23 ***************************************************************/
24 /**
25 * Spell checking plugin 'tx_rtehtmlarea_pi1' for the htmlArea RTE extension.
26 *
27 * @author Stanislas Rolland <typo3(arobas)sjbr.ca>
28 *
29 * TYPO3 SVN ID: $Id$
30 *
31 */
32
33 class tx_rtehtmlarea_pi1 {
34
35 protected $csConvObj;
36 var $extKey = 'rtehtmlarea'; // The extension key.
37 var $siteUrl;
38 var $charset = 'utf-8';
39 var $parserCharset = 'utf-8';
40 var $defaultAspellEncoding = 'utf-8';
41 var $aspellEncoding;
42 var $result;
43 var $text;
44 var $misspelled = array();
45 var $suggestedWords;
46 var $wordCount = 0;
47 var $suggestionCount = 0;
48 var $suggestedWordCount = 0;
49 var $pspell_link;
50 var $pspellMode = 'normal';
51 var $dictionary;
52 var $AspellDirectory;
53 var $pspell_is_available;
54 var $forceCommandMode = 0;
55 var $filePrefix = 'rtehtmlarea_';
56 var $uploadFolder = 'uploads/tx_rtehtmlarea/';
57 var $userUid;
58 var $personalDictsArg = '';
59 var $xmlCharacterData = '';
60
61 /**
62 * Main class of Spell Checker plugin for Typo3 CMS
63 *
64 * @return string content produced by the plugin
65 */
66 function main() {
67
68 require_once(PATH_t3lib.'class.t3lib_cs.php');
69 $this->csConvObj = t3lib_div::makeInstance('t3lib_cs');
70
71 // Setting start time
72 $time_start = microtime(true);
73 $this->pspell_is_available = in_array('pspell', get_loaded_extensions());
74 $this->AspellDirectory = trim($GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$this->extKey]['AspellDirectory'])? trim($GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$this->extKey]['AspellDirectory']) : '/usr/bin/aspell';
75 $this->forceCommandMode = (trim($GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$this->extKey]['forceCommandMode']))? trim($GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$this->extKey]['forceCommandMode']) : 0;
76 $safe_mode_is_enabled = ini_get('safe_mode');
77 if($safe_mode_is_enabled && !$this->pspell_is_available ) echo('Configuration problem: Spell checking cannot be performed');
78 if($safe_mode_is_enabled && $this->forceCommandMode) echo('Configuration problem: Spell checking cannot be performed in command mode');
79 if(!$safe_mode_is_enabled && (!$this->pspell_is_available || $this->forceCommandMode)) {
80 $AspellVersionString = explode('Aspell', shell_exec( $this->AspellDirectory.' -v'));
81 $AspellVersion = substr( $AspellVersionString[1], 0, 4);
82 if( doubleval($AspellVersion) < doubleval('0.5') && (!$this->pspell_is_available || $this->forceCommandMode)) echo('Configuration problem: Aspell version ' . $AspellVersion . ' too old. Spell checking cannot be performed in command mode');
83 $this->defaultAspellEncoding = trim(shell_exec($this->AspellDirectory.' config encoding'));
84 }
85
86 // Setting the list of dictionaries
87 if(!$safe_mode_is_enabled && (!$this->pspell_is_available || $this->forceCommandMode)) {
88 $dictionaryList = shell_exec( $this->AspellDirectory.' dump dicts');
89 $dictionaryList = implode(',', t3lib_div::trimExplode(chr(10), $dictionaryList, 1));
90 }
91 if( empty($dictionaryList) ) {
92 $dictionaryList = trim($GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$this->extKey]['dictionaryList']);
93 }
94 if( empty($dictionaryList) ) {
95 $dictionaryList = 'en';
96 }
97 $dictionaryArray = t3lib_div::trimExplode(',', $dictionaryList, 1);
98
99 $defaultDictionary = trim($GLOBALS['TYPO3_CONF_VARS']['EXTCONF'][$this->extKey]['defaultDictionary']);
100 if(!$defaultDictionary || !in_array($defaultDictionary, $dictionaryArray)) {
101 $defaultDictionary = 'en';
102 }
103
104 // Get the defined sys_language codes
105 $languageArray = array();
106 $tableA = 'sys_language';
107 $tableB = 'static_languages';
108 $selectFields = $tableA . '.uid,' . $tableB . '.lg_iso_2,' . $tableB . '.lg_country_iso_2';
109 $table = $tableA . ' LEFT JOIN ' . $tableB . ' ON ' . $tableA . '.static_lang_isocode=' . $tableB . '.uid';
110 $whereClause = '1=1 ';
111 $whereClause .= ' AND ' . $tableA . '.hidden != 1';
112 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery($selectFields, $table, $whereClause);
113 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
114 $languageArray[] = strtolower($row['lg_iso_2']).($row['lg_country_iso_2']?'_'.$row['lg_country_iso_2']:'');
115 }
116 if(!in_array($defaultDictionary, $languageArray)) {
117 $languageArray[] = $defaultDictionary;
118 }
119 foreach ($dictionaryArray as $key => $dict) {
120 $lang = explode('-', $dict);
121 if( !in_array(substr($dict, 0, 2), $languageArray) || !empty($lang[1])) {
122 unset($dictionaryArray[$key]);
123 } else {
124 $dictionaryArray[$key] = $lang[0];
125 }
126 }
127 uasort($dictionaryArray, 'strcoll');
128 $dictionaryList = implode(',', $dictionaryArray);
129
130 // Setting the dictionary
131 $this->dictionary = t3lib_div::_POST('dictionary');
132 if( empty($this->dictionary) || !in_array($this->dictionary, $dictionaryArray)) {
133 $this->dictionary = $defaultDictionary;
134 }
135 $dictionaries = substr_replace($dictionaryList, '@'.$this->dictionary, strpos($dictionaryList, $this->dictionary), strlen($this->dictionary));
136
137 // Setting the pspell suggestion mode
138 $this->pspellMode = t3lib_div::_POST('pspell_mode')?t3lib_div::_POST('pspell_mode'): $this->pspellMode;
139 // Now sanitize $this->pspellMode
140 $this->pspellMode = t3lib_div::inList('ultra,fast,normal,bad-spellers',$this->pspellMode)?$this->pspellMode:'normal';
141 switch($this->pspellMode) {
142 case 'ultra':
143 case 'fast':
144 $pspellModeFlag = PSPELL_FAST;
145 break;
146 case 'bad-spellers':
147 $pspellModeFlag = PSPELL_BAD_SPELLERS;
148 break;
149 case 'normal':
150 default:
151 $pspellModeFlag = PSPELL_NORMAL;
152 break;
153 }
154
155 // Setting the charset
156 if (t3lib_div::_POST('pspell_charset')) {
157 $this->charset = trim(t3lib_div::_POST('pspell_charset'));
158 }
159 if (strtolower($this->charset) == 'iso-8859-1') {
160 $this->parserCharset = strtolower($this->charset);
161 }
162
163 // In some configurations, Aspell uses 'iso8859-1' instead of 'iso-8859-1'
164 $this->aspellEncoding = $this->parserCharset;
165 if ($this->parserCharset == 'iso-8859-1' && strstr($this->defaultAspellEncoding, '8859-1')) {
166 $this->aspellEncoding = $this->defaultAspellEncoding;
167 }
168
169 // However, we are going to work only in the parser charset
170 if($this->pspell_is_available && !$this->forceCommandMode) {
171 $this->pspell_link = pspell_new($this->dictionary, '', '', $this->parserCharset, $pspellModeFlag);
172 }
173
174 // Setting the path to user personal dicts, if any
175 if (t3lib_div::_POST('enablePersonalDicts') == 'true' && TYPO3_MODE == 'BE' && is_object($GLOBALS['BE_USER'])) {
176 $this->userUid = 'BE_' . $GLOBALS['BE_USER']->user['uid'];
177 if ($this->userUid) {
178 $this->personalDictPath = t3lib_div::getFileAbsFileName($this->uploadFolder . $this->userUid);
179 if (!is_dir($this->personalDictPath)) {
180 t3lib_div::mkdir($this->personalDictPath);
181 }
182 // escape here for later use
183 $this->personalDictsArg = ' --home-dir=' . escapeshellarg($this->personalDictPath);
184 }
185 }
186
187 $cmd = t3lib_div::_POST('cmd');
188 if ($cmd == 'learn' && !$safe_mode_is_enabled) {
189 // Only availble for BE_USERS, die silently if someone has gotten here by accident
190 if (TYPO3_MODE !='BE' || !is_object($GLOBALS['BE_USER'])) die('');
191 // Updating the personal word list
192 $to_p_dict = t3lib_div::_POST('to_p_dict');
193 $to_p_dict = $to_p_dict ? $to_p_dict : array();
194 $to_r_list = t3lib_div::_POST('to_r_list');
195 $to_r_list = $to_r_list ? $to_r_list : array();
196 header('Content-Type: text/plain; charset=' . strtoupper($this->parserCharset));
197 header('Pragma: no-cache');
198 if($to_p_dict || $to_r_list) {
199 $tmpFileName = t3lib_div::tempnam($this->filePrefix);
200 if($filehandle = fopen($tmpFileName,'wb')) {
201 foreach ($to_p_dict as $personal_word) {
202 $cmd = '&' . $personal_word . "\n";
203 echo $cmd;
204 fwrite($filehandle, $cmd, strlen($cmd));
205 }
206 foreach ($to_r_list as $replace_pair) {
207 $cmd = '$$ra ' . $replace_pair[0] . ' , ' . $replace_pair[1] . "\n";
208 echo $cmd;
209 fwrite($filehandle, $cmd, strlen($cmd));
210 }
211 $cmd = "#\n";
212 echo $cmd;
213 fwrite($filehandle, $cmd, strlen($cmd));
214 fclose($filehandle);
215 // $this->personalDictsArg has already been escapeshellarg()'ed above, it is an optional paramter and might be empty here
216 $AspellCommand = 'cat ' . escapeshellarg($tmpFileName) . ' | ' . $this->AspellDirectory . ' -a --mode=none' . $this->personalDictsArg . ' --lang=' . escapeshellarg($this->dictionary) . ' --encoding=' . escapeshellarg($this->aspellEncoding) . ' 2>&1';
217 print $AspellCommand . "\n";
218 print shell_exec($AspellCommand);
219 t3lib_div::unlink_tempfile($tmpFileName);
220 echo('Personal word list was updated.');
221 } else {
222 echo('SpellChecker tempfile open error.');
223 }
224 } else {
225 echo('Nothing to add to the personal word list.');
226 }
227 flush();
228 exit();
229 } else {
230 // Check spelling content
231 // Initialize output
232 $this->result = '<?xml version="1.0" encoding="' . $this->parserCharset . '"?>
233 <!DOCTYPE html
234 PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
235 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
236 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="' . substr($this->dictionary, 0, 2) . '" lang="' . substr($this->dictionary, 0, 2) . '">
237 <html>
238 <head>
239 <meta http-equiv="Content-Type" content="text/html; charset=' . $this->parserCharset . '" />
240 <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
241 <script type="text/javascript">
242 /*<![CDATA[*/
243 <!--
244 ';
245
246 // Getting the input content
247 $content = t3lib_div::_POST('content');
248
249 // Parsing the input HTML
250 $parser = xml_parser_create(strtoupper($this->parserCharset));
251 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
252 xml_set_object($parser, &$this);
253 if (!xml_set_element_handler($parser, 'startHandler', 'endHandler')) echo('Bad xml handler setting');
254 if (!xml_set_character_data_handler($parser, 'collectDataHandler')) echo('Bad xml handler setting');
255 if (!xml_set_default_handler($parser, 'defaultHandler')) echo('Bad xml handler setting');
256 if (!xml_parse($parser,'<?xml version="1.0" encoding="' . $this->parserCharset . '"?><spellchecker> ' . preg_replace('/&nbsp;/'.(($this->parserCharset == 'utf-8')?'u':''), ' ', $content) . ' </spellchecker>')) echo('Bad parsing');
257 if (xml_get_error_code($parser)) {
258 die('Line '.xml_get_current_line_number($parser).': '.xml_error_string(xml_get_error_code($parser)));
259 }
260 xml_parser_free($parser);
261 if ($this->pspell_is_available && !$this->forceCommandMode) {
262 pspell_clear_session ($this->pspell_link);
263 }
264 $this->result .= 'var suggested_words = {' . $this->suggestedWords . '};
265 ';
266
267 // Calculating parsing and spell checkting time
268 $time = number_format(microtime(true) - $time_start, 2, ',', ' ');
269
270 // Insert spellcheck info
271 $this->result .= 'var spellcheck_info = { "Total words":"'.$this->wordCount.'","Misspelled words":"'.sizeof($this->misspelled).'","Total suggestions":"'.$this->suggestionCount.'","Total words suggested":"'.$this->suggestedWordCount.'","Spelling checked in":"'.$time.'" };
272 // -->
273 /*]]>*/
274 </script>
275 </head>
276 ';
277 $this->result .= '<body onload="window.parent.finishedSpellChecking();">';
278 $this->result .= preg_replace('/'.preg_quote('<?xml').'.*'.preg_quote('?>').'['.preg_quote(chr(10).chr(13).chr(32)).']*/'.(($this->parserCharset == 'utf-8')?'u':''), '', $this->text);
279 $this->result .= '<div id="HA-spellcheck-dictionaries">'.$dictionaries.'</div>';
280
281 // Closing
282 $this->result .= '
283 </body></html>';
284
285 // Outputting
286 header('Content-Type: text/html; charset=' . strtoupper($this->parserCharset));
287 echo $this->result;
288 }
289
290 } // end of function main
291
292 function startHandler($xml_parser, $tag, $attributes) {
293
294 if (strlen($this->xmlCharacterData)) {
295 $this->spellCheckHandler($xml_parser, $this->xmlCharacterData);
296 $this->xmlCharacterData = '';
297 }
298
299 switch($tag) {
300 case 'spellchecker':
301 break;
302 case 'br':
303 case 'BR':
304 case 'img':
305 case 'IMG':
306 case 'hr':
307 case 'HR':
308 case 'area':
309 case 'AREA':
310 $this->text .= '<'. $this->csConvObj->conv_case($this->parserCharset, $tag, 'toLower') . ' ';
311 foreach( $attributes as $key => $val) {
312 $this->text .= $key . '="' . $val . '" ';
313 }
314 $this->text .= ' />';
315 break;
316 default:
317 $this->text .= '<'. $this->csConvObj->conv_case($this->parserCharset, $tag, 'toLower') . ' ';
318 foreach( $attributes as $key => $val) {
319 $this->text .= $key . '="' . $val . '" ';
320 }
321 $this->text .= '>';
322 break;
323 }
324 return;
325 }
326
327 function endHandler($xml_parser, $tag) {
328 if (strlen($this->xmlCharacterData)) {
329 $this->spellCheckHandler($xml_parser, $this->xmlCharacterData);
330 $this->xmlCharacterData = '';
331 }
332
333 switch($tag) {
334 case 'spellchecker':
335 break;
336 case 'br':
337 case 'BR':
338 case 'img':
339 case 'IMG':
340 case 'hr':
341 case 'HR':
342 case 'input':
343 case 'INPUT':
344 case 'area':
345 case 'AREA':
346 break;
347 default:
348 $this->text .= '</' . $tag . '>';
349 break;
350 }
351 return;
352 }
353
354 function spellCheckHandler($xml_parser, $string) {
355 $incurrent=array();
356 $stringText = $string;
357 $words = preg_split((($this->parserCharset == 'utf-8')?'/\P{L}+/u':'/\W+/'), $stringText);
358 while( list(,$word) = each($words) ) {
359 $word = preg_replace('/ /'.(($this->parserCharset == 'utf-8')?'u':''), '', $word);
360 if( $word && !is_numeric($word)) {
361 if($this->pspell_is_available && !$this->forceCommandMode) {
362 if (!pspell_check($this->pspell_link, $word)) {
363 if(!in_array($word, $this->misspelled)) {
364 if(sizeof($this->misspelled) != 0 ) {
365 $this->suggestedWords .= ',';
366 }
367 $suggest = array();
368 $suggest = pspell_suggest($this->pspell_link, $word);
369 if(sizeof($suggest) != 0 ) {
370 $this->suggestionCount++;
371 $this->suggestedWordCount += sizeof($suggest);
372 }
373 $this->suggestedWords .= '"'.$word.'":"'.implode(',',$suggest).'"';
374 $this->misspelled[] = $word;
375 unset($suggest);
376 }
377 if( !in_array($word, $incurrent) ) {
378 $stringText = preg_replace('/\b'.$word.'\b/'.(($this->parserCharset == 'utf-8')?'u':''), '<span class="HA-spellcheck-error">'.$word.'</span>', $stringText);
379 $incurrent[] = $word;
380 }
381 }
382 } else {
383 $tmpFileName = t3lib_div::tempnam($this->filePrefix);
384 if(!$filehandle = fopen($tmpFileName,'wb')) echo('SpellChecker tempfile open error');
385 if(!fwrite($filehandle, $word)) echo('SpellChecker tempfile write error');
386 if(!fclose($filehandle)) echo('SpellChecker tempfile close error');
387 $AspellCommand = 'cat ' . escapeshellarg($tmpFileName) . ' | ' . $this->AspellDirectory . ' -a check --mode=none --sug-mode=' . escapeshellarg($this->pspellMode) . $this->personalDictsArg . ' --lang=' . escapeshellarg($this->dictionary) . ' --encoding=' . escapeshellarg($this->aspellEncoding) . ' 2>&1';
388 $AspellAnswer = shell_exec($AspellCommand);
389 $AspellResultLines = array();
390 $AspellResultLines = t3lib_div::trimExplode(chr(10), $AspellAnswer, 1);
391 if(substr($AspellResultLines[0],0,6) == 'Error:') echo("{$AspellAnswer}");
392 t3lib_div::unlink_tempfile($tmpFileName);
393 if(substr($AspellResultLines['1'],0,1) != '*') {
394 if(!in_array($word, $this->misspelled)) {
395 if(sizeof($this->misspelled) != 0 ) {
396 $this->suggestedWords .= ',';
397 }
398 $suggest = array();
399 $suggestions = array();
400 if (substr($AspellResultLines['1'],0,1) == '&') {
401 $suggestions = t3lib_div::trimExplode(':', $AspellResultLines['1'], 1);
402 $suggest = t3lib_div::trimExplode(',', $suggestions['1'], 1);
403 }
404 if (sizeof($suggest) != 0) {
405 $this->suggestionCount++;
406 $this->suggestedWordCount += sizeof($suggest);
407 }
408 $this->suggestedWords .= '"'.$word.'":"'.implode(',',$suggest).'"';
409 $this->misspelled[] = $word;
410 unset($suggest);
411 unset($suggestions);
412 }
413 if (!in_array($word, $incurrent)) {
414 $stringText = preg_replace('/\b'.$word.'\b/'.(($this->parserCharset == 'utf-8')?'u':''), '<span class="HA-spellcheck-error">'.$word.'</span>', $stringText);
415 $incurrent[] = $word;
416 }
417 }
418 unset($AspellResultLines);
419 }
420 $this->wordCount++;
421 }
422 }
423 $this->text .= $stringText;
424 unset($incurrent);
425 return;
426 }
427
428 function collectDataHandler($xml_parser, $string) {
429 $this->xmlCharacterData .= $string;
430 }
431
432 function defaultHandler($xml_parser, $string) {
433 $this->text .= $string;
434 return;
435 }
436
437 }
438
439 if (TYPO3_MODE=='FE') {
440 require_once(PATH_tslib.'class.tslib_eidtools.php');
441 tslib_eidtools::connectDB();
442 $spellChecker = t3lib_div::makeInstance('tx_rtehtmlarea_pi1');
443 $spellChecker->main();
444 }
445
446 ?>