[TASK] Remove superfluous parenthesis in sysexts
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Utility / DoubleMetaPhoneUtility.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Utility;
3
4 /***************************************************************
5 * VERSION DoubleMetaphone Class 1.01
6 *
7 * DESCRIPTION
8 *
9 * This class implements a "sounds like" algorithm developed
10 * by Lawrence Philips which he published in the June, 2000 issue
11 * of C/C++ Users Journal. Double Metaphone is an improved
12 * version of Philips' original Metaphone algorithm.
13 *
14 * COPYRIGHT
15 *
16 * Copyright 2001, Stephen Woodbridge <woodbri@swoodbridge.com>
17 * All rights reserved.
18 *
19 * http://swoodbridge.com/DoubleMetaPhone/
20 *
21 * This PHP translation is based heavily on the C implementation
22 * by Maurice Aubrey <maurice@hevanet.com>, which in turn
23 * is based heavily on the C++ implementation by
24 * Lawrence Philips and incorporates several bug fixes courtesy
25 * of Kevin Atkinson <kevina@users.sourceforge.net>.
26 *
27 * This module is free software; you may redistribute it and/or
28 * modify it under the same terms as Perl itself.
29 *
30 * CONTRIBUTIONS
31 *
32 * 17-May-2002 Geoff Caplan http://www.advantae.com
33 * Bug fix: added code to return class object which I forgot to do
34 * Created a functional callable version instead of the class version
35 * which is faster if you are calling this a lot.
36 *
37 ***************************************************************/
38
39 /**
40 * TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
41 * TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you can enable it in the extension configuration
42 * TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
43 */
44 class DoubleMetaPhoneUtility {
45
46 // properties
47 /**
48 * @todo Define visibility
49 */
50 public $original = '';
51
52 /**
53 * @todo Define visibility
54 */
55 public $primary = '';
56
57 /**
58 * @todo Define visibility
59 */
60 public $secondary = '';
61
62 /**
63 * @todo Define visibility
64 */
65 public $length = 0;
66
67 /**
68 * @todo Define visibility
69 */
70 public $last = 0;
71
72 /**
73 * @todo Define visibility
74 */
75 public $current = 0;
76
77 // methods
78 // TYPO3 specific API to this class. BEGIN
79 /**
80 * @todo Define visibility
81 */
82 public function metaphone($string, $sys_language_uid = 0) {
83 $res = $this->DoubleMetaPhone($string);
84 // debug(array($string,$res['primary']));
85 return $res['primary'];
86 }
87
88 // TYPO3 specific API to this class. END
89 // Public method
90 /**
91 * @todo Define visibility
92 */
93 public function DoubleMetaPhone($string) {
94 $this->primary = '';
95 $this->secondary = '';
96 $this->current = 0;
97 $this->current = 0;
98 $this->length = strlen($string);
99 $this->last = $this->length - 1;
100 $this->original = $string . ' ';
101 $this->original = strtoupper($this->original);
102 // skip this at beginning of word
103 if ($this->StringAt($this->original, 0, 2, array('GN', 'KN', 'PN', 'WR', 'PS'))) {
104 $this->current++;
105 }
106 // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
107 if (substr($this->original, 0, 1) == 'X') {
108 $this->primary .= 'S';
109 // 'Z' maps to 'S'
110 $this->secondary .= 'S';
111 $this->current++;
112 }
113 // main loop
114 while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
115 if ($this->current >= $this->length) {
116 break;
117 }
118 switch (substr($this->original, $this->current, 1)) {
119 case 'A':
120
121 case 'E':
122
123 case 'I':
124
125 case 'O':
126
127 case 'U':
128
129 case 'Y':
130 if ($this->current == 0) {
131 // all init vowels now map to 'A'
132 $this->primary .= 'A';
133 $this->secondary .= 'A';
134 }
135 $this->current += 1;
136 break;
137 case 'B':
138 // '-mb', e.g. "dumb", already skipped over ...
139 $this->primary .= 'P';
140 $this->secondary .= 'P';
141 if (substr($this->original, $this->current + 1, 1) == 'B') {
142 $this->current += 2;
143 } else {
144 $this->current += 1;
145 }
146 break;
147 case 'Ç':
148 $this->primary .= 'S';
149 $this->secondary .= 'S';
150 $this->current += 1;
151 break;
152 case 'C':
153 // various gremanic
154 if ($this->current > 1 && !$this->IsVowel($this->original, ($this->current - 2)) && $this->StringAt($this->original, $this->current - 1, 3, array('ACH')) && (substr($this->original, $this->current + 2, 1) != 'I' && (substr($this->original, $this->current + 2, 1) != 'E' || $this->StringAt($this->original, $this->current - 2, 6, array('BACHER', 'MACHER'))))) {
155 $this->primary .= 'K';
156 $this->secondary .= 'K';
157 $this->current += 2;
158 break;
159 }
160 // special case 'caesar'
161 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 6, array('CAESAR'))) {
162 $this->primary .= 'S';
163 $this->secondary .= 'S';
164 $this->current += 2;
165 break;
166 }
167 // italian 'chianti'
168 if ($this->StringAt($this->original, $this->current, 4, array('CHIA'))) {
169 $this->primary .= 'K';
170 $this->secondary .= 'K';
171 $this->current += 2;
172 break;
173 }
174 if ($this->StringAt($this->original, $this->current, 2, array('CH'))) {
175 // find 'michael'
176 if ($this->current > 0 && $this->StringAt($this->original, $this->current, 4, array('CHAE'))) {
177 $this->primary .= 'K';
178 $this->secondary .= 'X';
179 $this->current += 2;
180 break;
181 }
182 // greek roots e.g. 'chemistry', 'chorus'
183 if ($this->current == 0 && ($this->StringAt($this->original, $this->current + 1, 5, array('HARAC', 'HARIS')) || $this->StringAt($this->original, $this->current + 1, 3, array('HOR', 'HYM', 'HIA', 'HEM'))) && !$this->StringAt($this->original, 0, 5, array('CHORE'))) {
184 $this->primary .= 'K';
185 $this->secondary .= 'K';
186 $this->current += 2;
187 break;
188 }
189 // germanic, greek, or otherwise 'ch' for 'kh' sound
190 if ($this->StringAt($this->original, 0, 4, array('VAN ', 'VON ')) || $this->StringAt($this->original, 0, 3, array('SCH')) || $this->StringAt($this->original, $this->current - 2, 6, array('ORCHES', 'ARCHIT', 'ORCHID')) || $this->StringAt($this->original, $this->current + 2, 1, array('T', 'S')) || ($this->StringAt($this->original, $this->current - 1, 1, array('A', 'O', 'U', 'E')) || $this->current == 0) && $this->StringAt($this->original, $this->current + 2, 1, array('L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '))) {
191 $this->primary .= 'K';
192 $this->secondary .= 'K';
193 } else {
194 if ($this->current > 0) {
195 if ($this->StringAt($this->original, 0, 2, array('MC'))) {
196 // e.g. 'McHugh'
197 $this->primary .= 'K';
198 $this->secondary .= 'K';
199 } else {
200 $this->primary .= 'X';
201 $this->secondary .= 'K';
202 }
203 } else {
204 $this->primary .= 'X';
205 $this->secondary .= 'X';
206 }
207 }
208 $this->current += 2;
209 break;
210 }
211 // e.g. 'czerny'
212 if ($this->StringAt($this->original, $this->current, 2, array('CZ')) && !$this->StringAt($this->original, ($this->current - 2), 4, array('WICZ'))) {
213 $this->primary .= 'S';
214 $this->secondary .= 'X';
215 $this->current += 2;
216 break;
217 }
218 // e.g. 'focaccia'
219 if ($this->StringAt($this->original, $this->current + 1, 3, array('CIA'))) {
220 $this->primary .= 'X';
221 $this->secondary .= 'X';
222 $this->current += 3;
223 break;
224 }
225 // double 'C', but not McClellan'
226 if ($this->StringAt($this->original, $this->current, 2, array('CC')) && !($this->current == 1 && substr($this->original, 0, 1) == 'M')) {
227 // 'bellocchio' but not 'bacchus'
228 if ($this->StringAt($this->original, $this->current + 2, 1, array('I', 'E', 'H')) && !$this->StringAt($this->original, ($this->current + 2), 2, array('HU'))) {
229 // 'accident', 'accede', 'succeed'
230 if ($this->current == 1 && substr($this->original, $this->current - 1, 1) == 'A' || $this->StringAt($this->original, $this->current - 1, 5, array('UCCEE', 'UCCES'))) {
231 $this->primary .= 'KS';
232 $this->secondary .= 'KS';
233 } else {
234 $this->primary .= 'X';
235 $this->secondary .= 'X';
236 }
237 $this->current += 3;
238 break;
239 } else {
240 // Pierce's rule
241 $this->primary .= 'K';
242 $this->secondary .= 'K';
243 $this->current += 2;
244 break;
245 }
246 }
247 if ($this->StringAt($this->original, $this->current, 2, array('CK', 'CG', 'CQ'))) {
248 $this->primary .= 'K';
249 $this->secondary .= 'K';
250 $this->current += 2;
251 break;
252 }
253 if ($this->StringAt($this->original, $this->current, 2, array('CI', 'CE', 'CY'))) {
254 // italian vs. english
255 if ($this->StringAt($this->original, $this->current, 3, array('CIO', 'CIE', 'CIA'))) {
256 $this->primary .= 'S';
257 $this->secondary .= 'X';
258 } else {
259 $this->primary .= 'S';
260 $this->secondary .= 'S';
261 }
262 $this->current += 2;
263 break;
264 }
265 // else
266 $this->primary .= 'K';
267 $this->secondary .= 'K';
268 // name sent in 'mac caffrey', 'mac gregor'
269 if ($this->StringAt($this->original, $this->current + 1, 2, array(' C', ' Q', ' G'))) {
270 $this->current += 3;
271 } else {
272 if ($this->StringAt($this->original, $this->current + 1, 1, array('C', 'K', 'Q')) && !$this->StringAt($this->original, ($this->current + 1), 2, array('CE', 'CI'))) {
273 $this->current += 2;
274 } else {
275 $this->current += 1;
276 }
277 }
278 break;
279 case 'D':
280 if ($this->StringAt($this->original, $this->current, 2, array('DG'))) {
281 if ($this->StringAt($this->original, $this->current + 2, 1, array('I', 'E', 'Y'))) {
282 // e.g. 'edge'
283 $this->primary .= 'J';
284 $this->secondary .= 'J';
285 $this->current += 3;
286 break;
287 } else {
288 // e.g. 'edgar'
289 $this->primary .= 'TK';
290 $this->secondary .= 'TK';
291 $this->current += 2;
292 break;
293 }
294 }
295 if ($this->StringAt($this->original, $this->current, 2, array('DT', 'DD'))) {
296 $this->primary .= 'T';
297 $this->secondary .= 'T';
298 $this->current += 2;
299 break;
300 }
301 // else
302 $this->primary .= 'T';
303 $this->secondary .= 'T';
304 $this->current += 1;
305 break;
306 case 'F':
307 if (substr($this->original, $this->current + 1, 1) == 'F') {
308 $this->current += 2;
309 } else {
310 $this->current += 1;
311 }
312 $this->primary .= 'F';
313 $this->secondary .= 'F';
314 break;
315 case 'G':
316 if (substr($this->original, $this->current + 1, 1) == 'H') {
317 if ($this->current > 0 && !$this->IsVowel($this->original, ($this->current - 1))) {
318 $this->primary .= 'K';
319 $this->secondary .= 'K';
320 $this->current += 2;
321 break;
322 }
323 if ($this->current < 3) {
324 // 'ghislane', 'ghiradelli'
325 if ($this->current == 0) {
326 if (substr($this->original, $this->current + 2, 1) == 'I') {
327 $this->primary .= 'J';
328 $this->secondary .= 'J';
329 } else {
330 $this->primary .= 'K';
331 $this->secondary .= 'K';
332 }
333 $this->current += 2;
334 break;
335 }
336 }
337 // Parker's rule (with some further refinements) - e.g. 'hugh'
338 if ($this->current > 1 && $this->StringAt($this->original, $this->current - 2, 1, array('B', 'H', 'D')) || $this->current > 2 && $this->StringAt($this->original, $this->current - 3, 1, array('B', 'H', 'D')) || $this->current > 3 && $this->StringAt($this->original, $this->current - 4, 1, array('B', 'H'))) {
339 $this->current += 2;
340 break;
341 } else {
342 // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
343 if ($this->current > 2 && substr($this->original, $this->current - 1, 1) == 'U' && $this->StringAt($this->original, $this->current - 3, 1, array('C', 'G', 'L', 'R', 'T'))) {
344 $this->primary .= 'F';
345 $this->secondary .= 'F';
346 } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) != 'I') {
347 $this->primary .= 'K';
348 $this->secondary .= 'K';
349 }
350 $this->current += 2;
351 break;
352 }
353 }
354 if (substr($this->original, $this->current + 1, 1) == 'N') {
355 if ($this->current == 1 && $this->IsVowel($this->original, 0) && !$this->SlavoGermanic($this->original)) {
356 $this->primary .= 'KN';
357 $this->secondary .= 'N';
358 } else {
359 // not e.g. 'cagney'
360 if (!$this->StringAt($this->original, ($this->current + 2), 2, array('EY')) && substr($this->original, $this->current + 1) != 'Y' && !$this->SlavoGermanic($this->original)) {
361 $this->primary .= 'N';
362 $this->secondary .= 'KN';
363 } else {
364 $this->primary .= 'KN';
365 $this->secondary .= 'KN';
366 }
367 }
368 $this->current += 2;
369 break;
370 }
371 // 'tagliaro'
372 if ($this->StringAt($this->original, $this->current + 1, 2, array('LI')) && !$this->SlavoGermanic($this->original)) {
373 $this->primary .= 'KL';
374 $this->secondary .= 'L';
375 $this->current += 2;
376 break;
377 }
378 // -ges-, -gep-, -gel- at beginning
379 if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) == 'Y' || $this->StringAt($this->original, $this->current + 1, 2, array(
380 'ES',
381 'EP',
382 'EB',
383 'EL',
384 'EY',
385 'IB',
386 'IL',
387 'IN',
388 'IE',
389 'EI',
390 'ER'
391 )))) {
392 $this->primary .= 'K';
393 $this->secondary .= 'J';
394 $this->current += 2;
395 break;
396 }
397 // -ger-, -gy-
398 if (($this->StringAt($this->original, $this->current + 1, 2, array('ER')) || substr($this->original, $this->current + 1, 1) == 'Y') && !$this->StringAt($this->original, 0, 6, array('DANGER', 'RANGER', 'MANGER')) && !$this->StringAt($this->original, ($this->current - 1), 1, array('E', 'I')) && !$this->StringAt($this->original, ($this->current - 1), 3, array('RGY', 'OGY'))) {
399 $this->primary .= 'K';
400 $this->secondary .= 'J';
401 $this->current += 2;
402 break;
403 }
404 // italian e.g. 'biaggi'
405 if ($this->StringAt($this->original, $this->current + 1, 1, array('E', 'I', 'Y')) || $this->StringAt($this->original, $this->current - 1, 4, array('AGGI', 'OGGI'))) {
406 // obvious germanic
407 if ($this->StringAt($this->original, 0, 4, array('VAN ', 'VON ')) || $this->StringAt($this->original, 0, 3, array('SCH')) || $this->StringAt($this->original, $this->current + 1, 2, array('ET'))) {
408 $this->primary .= 'K';
409 $this->secondary .= 'K';
410 } else {
411 // always soft if french ending
412 if ($this->StringAt($this->original, $this->current + 1, 4, array('IER '))) {
413 $this->primary .= 'J';
414 $this->secondary .= 'J';
415 } else {
416 $this->primary .= 'J';
417 $this->secondary .= 'K';
418 }
419 }
420 $this->current += 2;
421 break;
422 }
423 if (substr($this->original, $this->current + 1, 1) == 'G') {
424 $this->current += 2;
425 } else {
426 $this->current += 1;
427 }
428 $this->primary .= 'K';
429 $this->secondary .= 'K';
430 break;
431 case 'H':
432 // only keep if first & before vowel or btw. 2 vowels
433 if (($this->current == 0 || $this->IsVowel($this->original, $this->current - 1)) && $this->IsVowel($this->original, $this->current + 1)) {
434 $this->primary .= 'H';
435 $this->secondary .= 'H';
436 $this->current += 2;
437 } else {
438 $this->current += 1;
439 }
440 break;
441 case 'J':
442 // obvious spanish, 'jose', 'san jacinto'
443 if ($this->StringAt($this->original, $this->current, 4, array('JOSE')) || $this->StringAt($this->original, 0, 4, array('SAN '))) {
444 if ($this->current == 0 && substr($this->original, $this->current + 4, 1) == ' ' || $this->StringAt($this->original, 0, 4, array('SAN '))) {
445 $this->primary .= 'H';
446 $this->secondary .= 'H';
447 } else {
448 $this->primary .= 'J';
449 $this->secondary .= 'H';
450 }
451 $this->current += 1;
452 break;
453 }
454 if ($this->current == 0 && !$this->StringAt($this->original, $this->current, 4, array('JOSE'))) {
455 $this->primary .= 'J';
456 // Yankelovich/Jankelowicz
457 $this->secondary .= 'A';
458 } else {
459 // spanish pron. of .e.g. 'bajador'
460 if ($this->IsVowel($this->original, $this->current - 1) && !$this->SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) == 'A' || substr($this->original, $this->current + 1, 1) == 'O')) {
461 $this->primary .= 'J';
462 $this->secondary .= 'H';
463 } else {
464 if ($this->current == $this->last) {
465 $this->primary .= 'J';
466 $this->secondary .= '';
467 } else {
468 if (!$this->StringAt($this->original, ($this->current + 1), 1, array('L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z')) && !$this->StringAt($this->original, ($this->current - 1), 1, array('S', 'K', 'L'))) {
469 $this->primary .= 'J';
470 $this->secondary .= 'J';
471 }
472 }
473 }
474 }
475 if (substr($this->original, $this->current + 1, 1) == 'J') {
476 // it could happen
477 $this->current += 2;
478 } else {
479 $this->current += 1;
480 }
481 break;
482 case 'K':
483 if (substr($this->original, $this->current + 1, 1) == 'K') {
484 $this->current += 2;
485 } else {
486 $this->current += 1;
487 }
488 $this->primary .= 'K';
489 $this->secondary .= 'K';
490 break;
491 case 'L':
492 if (substr($this->original, $this->current + 1, 1) == 'L') {
493 // spanish e.g. 'cabrillo', 'gallegos'
494 if ($this->current == $this->length - 3 && $this->StringAt($this->original, $this->current - 1, 4, array('ILLO', 'ILLA', 'ALLE')) || ($this->StringAt($this->original, $this->last - 1, 2, array('AS', 'OS')) || $this->StringAt($this->original, $this->last, 1, array('A', 'O'))) && $this->StringAt($this->original, $this->current - 1, 4, array('ALLE'))) {
495 $this->primary .= 'L';
496 $this->secondary .= '';
497 $this->current += 2;
498 break;
499 }
500 $this->current += 2;
501 } else {
502 $this->current += 1;
503 }
504 $this->primary .= 'L';
505 $this->secondary .= 'L';
506 break;
507 case 'M':
508 if ($this->StringAt($this->original, $this->current - 1, 3, array('UMB')) && ($this->current + 1 == $this->last || $this->StringAt($this->original, $this->current + 2, 2, array('ER'))) || substr($this->original, $this->current + 1, 1) == 'M') {
509 $this->current += 2;
510 } else {
511 $this->current += 1;
512 }
513 $this->primary .= 'M';
514 $this->secondary .= 'M';
515 break;
516 case 'N':
517 if (substr($this->original, $this->current + 1, 1) == 'N') {
518 $this->current += 2;
519 } else {
520 $this->current += 1;
521 }
522 $this->primary .= 'N';
523 $this->secondary .= 'N';
524 break;
525 case 'Ñ':
526 $this->current += 1;
527 $this->primary .= 'N';
528 $this->secondary .= 'N';
529 break;
530 case 'P':
531 if (substr($this->original, $this->current + 1, 1) == 'H') {
532 $this->current += 2;
533 $this->primary .= 'F';
534 $this->secondary .= 'F';
535 break;
536 }
537 // also account for "campbell" and "raspberry"
538 if ($this->StringAt($this->original, $this->current + 1, 1, array('P', 'B'))) {
539 $this->current += 2;
540 } else {
541 $this->current += 1;
542 }
543 $this->primary .= 'P';
544 $this->secondary .= 'P';
545 break;
546 case 'Q':
547 if (substr($this->original, $this->current + 1, 1) == 'Q') {
548 $this->current += 2;
549 } else {
550 $this->current += 1;
551 }
552 $this->primary .= 'K';
553 $this->secondary .= 'K';
554 break;
555 case 'R':
556 // french e.g. 'rogier', but exclude 'hochmeier'
557 if ($this->current == $this->last && !$this->SlavoGermanic($this->original) && $this->StringAt($this->original, $this->current - 2, 2, array('IE')) && !$this->StringAt($this->original, ($this->current - 4), 2, array('ME', 'MA'))) {
558 $this->primary .= '';
559 $this->secondary .= 'R';
560 } else {
561 $this->primary .= 'R';
562 $this->secondary .= 'R';
563 }
564 if (substr($this->original, $this->current + 1, 1) == 'R') {
565 $this->current += 2;
566 } else {
567 $this->current += 1;
568 }
569 break;
570 case 'S':
571 // special cases 'island', 'isle', 'carlisle', 'carlysle'
572 if ($this->StringAt($this->original, $this->current - 1, 3, array('ISL', 'YSL'))) {
573 $this->current += 1;
574 break;
575 }
576 // special case 'sugar-'
577 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 5, array('SUGAR'))) {
578 $this->primary .= 'X';
579 $this->secondary .= 'S';
580 $this->current += 1;
581 break;
582 }
583 if ($this->StringAt($this->original, $this->current, 2, array('SH'))) {
584 // germanic
585 if ($this->StringAt($this->original, $this->current + 1, 4, array('HEIM', 'HOEK', 'HOLM', 'HOLZ'))) {
586 $this->primary .= 'S';
587 $this->secondary .= 'S';
588 } else {
589 $this->primary .= 'X';
590 $this->secondary .= 'X';
591 }
592 $this->current += 2;
593 break;
594 }
595 // italian & armenian
596 if ($this->StringAt($this->original, $this->current, 3, array('SIO', 'SIA')) || $this->StringAt($this->original, $this->current, 4, array('SIAN'))) {
597 if (!$this->SlavoGermanic($this->original)) {
598 $this->primary .= 'S';
599 $this->secondary .= 'X';
600 } else {
601 $this->primary .= 'S';
602 $this->secondary .= 'S';
603 }
604 $this->current += 3;
605 break;
606 }
607 // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
608 // also, -sz- in slavic language altho in hungarian it is pronounced 's'
609 if ($this->current == 0 && $this->StringAt($this->original, $this->current + 1, 1, array('M', 'N', 'L', 'W')) || $this->StringAt($this->original, $this->current + 1, 1, array('Z'))) {
610 $this->primary .= 'S';
611 $this->secondary .= 'X';
612 if ($this->StringAt($this->original, $this->current + 1, 1, array('Z'))) {
613 $this->current += 2;
614 } else {
615 $this->current += 1;
616 }
617 break;
618 }
619 if ($this->StringAt($this->original, $this->current, 2, array('SC'))) {
620 // Schlesinger's rule
621 if (substr($this->original, $this->current + 2, 1) == 'H') {
622 // dutch origin, e.g. 'school', 'schooner'
623 if ($this->StringAt($this->original, $this->current + 3, 2, array('OO', 'ER', 'EN', 'UY', 'ED', 'EM'))) {
624 // 'schermerhorn', 'schenker'
625 if ($this->StringAt($this->original, $this->current + 3, 2, array('ER', 'EN'))) {
626 $this->primary .= 'X';
627 $this->secondary .= 'SK';
628 } else {
629 $this->primary .= 'SK';
630 $this->secondary .= 'SK';
631 }
632 $this->current += 3;
633 break;
634 } else {
635 if ($this->current == 0 && !$this->IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) != 'W') {
636 $this->primary .= 'X';
637 $this->secondary .= 'S';
638 } else {
639 $this->primary .= 'X';
640 $this->secondary .= 'X';
641 }
642 $this->current += 3;
643 break;
644 }
645 }
646 if ($this->StringAt($this->original, $this->current + 2, 1, array('I', 'E', 'Y'))) {
647 $this->primary .= 'S';
648 $this->secondary .= 'S';
649 $this->current += 3;
650 break;
651 }
652 // else
653 $this->primary .= 'SK';
654 $this->secondary .= 'SK';
655 $this->current += 3;
656 break;
657 }
658 // french e.g. 'resnais', 'artois'
659 if ($this->current == $this->last && $this->StringAt($this->original, $this->current - 2, 2, array('AI', 'OI'))) {
660 $this->primary .= '';
661 $this->secondary .= 'S';
662 } else {
663 $this->primary .= 'S';
664 $this->secondary .= 'S';
665 }
666 if ($this->StringAt($this->original, $this->current + 1, 1, array('S', 'Z'))) {
667 $this->current += 2;
668 } else {
669 $this->current += 1;
670 }
671 break;
672 case 'T':
673 if ($this->StringAt($this->original, $this->current, 4, array('TION'))) {
674 $this->primary .= 'X';
675 $this->secondary .= 'X';
676 $this->current += 3;
677 break;
678 }
679 if ($this->StringAt($this->original, $this->current, 3, array('TIA', 'TCH'))) {
680 $this->primary .= 'X';
681 $this->secondary .= 'X';
682 $this->current += 3;
683 break;
684 }
685 if ($this->StringAt($this->original, $this->current, 2, array('TH')) || $this->StringAt($this->original, $this->current, 3, array('TTH'))) {
686 // special case 'thomas', 'thames' or germanic
687 if ($this->StringAt($this->original, $this->current + 2, 2, array('OM', 'AM')) || $this->StringAt($this->original, 0, 4, array('VAN ', 'VON ')) || $this->StringAt($this->original, 0, 3, array('SCH'))) {
688 $this->primary .= 'T';
689 $this->secondary .= 'T';
690 } else {
691 $this->primary .= '0';
692 $this->secondary .= 'T';
693 }
694 $this->current += 2;
695 break;
696 }
697 if ($this->StringAt($this->original, $this->current + 1, 1, array('T', 'D'))) {
698 $this->current += 2;
699 } else {
700 $this->current += 1;
701 }
702 $this->primary .= 'T';
703 $this->secondary .= 'T';
704 break;
705 case 'V':
706 if (substr($this->original, $this->current + 1, 1) == 'V') {
707 $this->current += 2;
708 } else {
709 $this->current += 1;
710 }
711 $this->primary .= 'F';
712 $this->secondary .= 'F';
713 break;
714 case 'W':
715 // can also be in middle of word
716 if ($this->StringAt($this->original, $this->current, 2, array('WR'))) {
717 $this->primary .= 'R';
718 $this->secondary .= 'R';
719 $this->current += 2;
720 break;
721 }
722 if ($this->current == 0 && ($this->IsVowel($this->original, $this->current + 1) || $this->StringAt($this->original, $this->current, 2, array('WH')))) {
723 // Wasserman should match Vasserman
724 if ($this->IsVowel($this->original, $this->current + 1)) {
725 $this->primary .= 'A';
726 $this->secondary .= 'F';
727 } else {
728 // need Uomo to match Womo
729 $this->primary .= 'A';
730 $this->secondary .= 'A';
731 }
732 }
733 // Arnow should match Arnoff
734 if ($this->current == $this->last && $this->IsVowel($this->original, $this->current - 1) || $this->StringAt($this->original, $this->current - 1, 5, array('EWSKI', 'EWSKY', 'OWSKI', 'OWSKY')) || $this->StringAt($this->original, 0, 3, array('SCH'))) {
735 $this->primary .= '';
736 $this->secondary .= 'F';
737 $this->current += 1;
738 break;
739 }
740 // polish e.g. 'filipowicz'
741 if ($this->StringAt($this->original, $this->current, 4, array('WICZ', 'WITZ'))) {
742 $this->primary .= 'TS';
743 $this->secondary .= 'FX';
744 $this->current += 4;
745 break;
746 }
747 // else skip it
748 $this->current += 1;
749 break;
750 case 'X':
751 // french e.g. breaux
752 if (!($this->current == $this->last && ($this->StringAt($this->original, $this->current - 3, 3, array('IAU', 'EAU')) || $this->StringAt($this->original, $this->current - 2, 2, array('AU', 'OU'))))) {
753 $this->primary .= 'KS';
754 $this->secondary .= 'KS';
755 }
756 if ($this->StringAt($this->original, $this->current + 1, 1, array('C', 'X'))) {
757 $this->current += 2;
758 } else {
759 $this->current += 1;
760 }
761 break;
762 case 'Z':
763 // chinese pinyin e.g. 'zhao'
764 if (substr($this->original, $this->current + 1, 1) == 'H') {
765 $this->primary .= 'J';
766 $this->secondary .= 'J';
767 $this->current += 2;
768 break;
769 } elseif ($this->StringAt($this->original, $this->current + 1, 2, array('ZO', 'ZI', 'ZA')) || $this->SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) != 'T')) {
770 $this->primary .= 'S';
771 $this->secondary .= 'TS';
772 } else {
773 $this->primary .= 'S';
774 $this->secondary .= 'S';
775 }
776 if (substr($this->original, $this->current + 1, 1) == 'Z') {
777 $this->current += 2;
778 } else {
779 $this->current += 1;
780 }
781 break;
782 default:
783 $this->current += 1;
784 }
785 }
786 // end while
787 $this->primary = substr($this->primary, 0, 4);
788 $this->secondary = substr($this->secondary, 0, 4);
789 $result['primary'] = $this->primary;
790 $result['secondary'] = $this->secondary;
791 return $result;
792 }
793
794 // end of function MetaPhone
795 // Private methods
796 /**
797 * @todo Define visibility
798 */
799 public function StringAt($string, $start, $length, $list) {
800 if ($start < 0 || $start >= strlen($string)) {
801 return 0;
802 }
803 for ($i = 0; $i < count($list); $i++) {
804 if ($list[$i] == substr($string, $start, $length)) {
805 return 1;
806 }
807 }
808 return 0;
809 }
810
811 /**
812 * [Describe function...]
813 *
814 * @param [type] $string: ...
815 * @param [type] $pos: ...
816 * @return [type] ...
817 * @todo Define visibility
818 */
819 public function IsVowel($string, $pos) {
820 return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
821 }
822
823 /**
824 * [Describe function...]
825 *
826 * @param [type] $string: ...
827 * @return [type] ...
828 * @todo Define visibility
829 */
830 public function SlavoGermanic($string) {
831 return preg_match('/W|K|CZ|WITZ/', $string);
832 }
833
834 }
835
836
837 ?>