Replace all TINYTEXT columns with VARCHAR(255) and change index "parent" of fe_user...
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / class.doublemetaphone.php
1 <?php
2 // VERSION DoubleMetaphone Class 1.01
3 //
4 // DESCRIPTION
5 //
6 // This class implements a "sounds like" algorithm developed
7 // by Lawrence Philips which he published in the June, 2000 issue
8 // of C/C++ Users Journal. Double Metaphone is an improved
9 // version of Philips' original Metaphone algorithm.
10 //
11 // COPYRIGHT
12 //
13 // Copyright 2001, Stephen Woodbridge <woodbri@swoodbridge.com>
14 // All rights reserved.
15 //
16 // http://swoodbridge.com/DoubleMetaPhone/
17 //
18 // This PHP translation is based heavily on the C implementation
19 // by Maurice Aubrey <maurice@hevanet.com>, which in turn
20 // is based heavily on the C++ implementation by
21 // Lawrence Philips and incorporates several bug fixes courtesy
22 // of Kevin Atkinson <kevina@users.sourceforge.net>.
23 //
24 // This module is free software; you may redistribute it and/or
25 // modify it under the same terms as Perl itself.
26 //
27 // CONTRIBUTIONS
28 //
29 // 17-May-2002 Geoff Caplan http://www.advantae.com
30 // Bug fix: added code to return class object which I forgot to do
31 // Created a functional callable version instead of the class version
32 // which is faster if you are calling this a lot.
33 //
34 // ------------------------------------------------------------------
35
36
37
38 // TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
39 // TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file:
40 // TYPO3: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
41 // TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example.
42
43 class user_DoubleMetaPhone
44 {
45 // properties
46
47 var $original = "";
48 var $primary = "";
49 var $secondary = "";
50 var $length = 0;
51 var $last = 0;
52 var $current = 0;
53
54 // methods
55
56 // TYPO3 specific API to this class. BEGIN
57 function metaphone($string,$sys_language_uid=0) {
58 $res = $this->DoubleMetaPhone($string);
59 #debug(array($string,$res['primary']));
60 return $res['primary'];
61 }
62 // TYPO3 specific API to this class. END
63
64
65 // Public method
66
67 function DoubleMetaPhone($string) {
68
69 $this->primary = "";
70 $this->secondary = "";
71 $this->current = 0;
72
73 $this->current = 0;
74 $this->length = strlen($string);
75 $this->last = $this->length - 1;
76 $this->original = $string . " ";
77
78 $this->original = strtoupper($this->original);
79
80 // skip this at beginning of word
81 if ($this->StringAt($this->original, 0, 2,
82 array('GN', 'KN', 'PN', 'WR', 'PS')))
83 $this->current++;
84
85 // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
86 if (substr($this->original, 0, 1) == 'X') {
87 $this->primary .= "S"; // 'Z' maps to 'S'
88 $this->secondary .= "S";
89 $this->current++;
90 }
91
92 // main loop
93
94 while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
95 if ($this->current >= $this->length)
96 break;
97
98 switch (substr($this->original, $this->current, 1)) {
99 case 'A':
100 case 'E':
101 case 'I':
102 case 'O':
103 case 'U':
104 case 'Y':
105 if ($this->current == 0) {
106 // all init vowels now map to 'A'
107 $this->primary .= 'A';
108 $this->secondary .= 'A';
109 }
110 $this->current += 1;
111 break;
112
113 case 'B':
114 // '-mb', e.g. "dumb", already skipped over ...
115 $this->primary .= 'P';
116 $this->secondary .= 'P';
117
118 if (substr($this->original, $this->current + 1, 1) == 'B')
119 $this->current += 2;
120 else
121 $this->current += 1;
122 break;
123
124 case 'Ç':
125 $this->primary .= 'S';
126 $this->secondary .= 'S';
127 $this->current += 1;
128 break;
129
130 case 'C':
131 // various gremanic
132 if (($this->current > 1)
133 && !$this->IsVowel($this->original, $this->current - 2)
134 && $this->StringAt($this->original, $this->current - 1, 3,
135 array("ACH"))
136 && ((substr($this->original, $this->current + 2, 1) != 'I')
137 && ((substr($this->original, $this->current + 2, 1) != 'E')
138 || $this->StringAt($this->original, $this->current - 2, 6,
139 array("BACHER", "MACHER"))))) {
140
141 $this->primary .= 'K';
142 $this->secondary .= 'K';
143 $this->current += 2;
144 break;
145 }
146
147 // special case 'caesar'
148 if (($this->current == 0)
149 && $this->StringAt($this->original, $this->current, 6,
150 array("CAESAR"))) {
151 $this->primary .= 'S';
152 $this->secondary .= 'S';
153 $this->current += 2;
154 break;
155 }
156
157 // italian 'chianti'
158 if ($this->StringAt($this->original, $this->current, 4,
159 array("CHIA"))) {
160 $this->primary .= 'K';
161 $this->secondary .= 'K';
162 $this->current += 2;
163 break;
164 }
165
166 if ($this->StringAt($this->original, $this->current, 2,
167 array("CH"))) {
168
169 // find 'michael'
170 if (($this->current > 0)
171 && $this->StringAt($this->original, $this->current, 4,
172 array("CHAE"))) {
173 $this->primary .= 'K';
174 $this->secondary .= 'X';
175 $this->current += 2;
176 break;
177 }
178
179 // greek roots e.g. 'chemistry', 'chorus'
180 if (($this->current == 0)
181 && ($this->StringAt($this->original, $this->current + 1, 5,
182 array("HARAC", "HARIS"))
183 || $this->StringAt($this->original, $this->current + 1, 3,
184 array("HOR", "HYM", "HIA", "HEM")))
185 && !$this->StringAt($this->original, 0, 5, array("CHORE"))) {
186 $this->primary .= 'K';
187 $this->secondary .= 'K';
188 $this->current += 2;
189 break;
190 }
191
192 // germanic, greek, or otherwise 'ch' for 'kh' sound
193 if (($this->StringAt($this->original, 0, 4, array("VAN ", "VON "))
194 || $this->StringAt($this->original, 0, 3, array("SCH")))
195 // 'architect' but not 'arch', orchestra', 'orchid'
196 || $this->StringAt($this->original, $this->current - 2, 6,
197 array("ORCHES", "ARCHIT", "ORCHID"))
198 || $this->StringAt($this->original, $this->current + 2, 1,
199 array("T", "S"))
200 || (($this->StringAt($this->original, $this->current - 1, 1,
201 array("A","O","U","E"))
202 || ($this->current == 0))
203 // e.g. 'wachtler', 'weschsler', but not 'tichner'
204 && $this->StringAt($this->original, $this->current + 2, 1,
205 array("L","R","N","M","B","H","F","V","W"," ")))) {
206 $this->primary .= 'K';
207 $this->secondary .= 'K';
208 } else {
209 if ($this->current > 0) {
210 if ($this->StringAt($this->original, 0, 2, array("MC"))) {
211 // e.g. 'McHugh'
212 $this->primary .= 'K';
213 $this->secondary .= 'K';
214 } else {
215 $this->primary .= 'X';
216 $this->secondary .= 'K';
217 }
218 } else {
219 $this->primary .= 'X';
220 $this->secondary .= 'X';
221 }
222 }
223 $this->current += 2;
224 break;
225 }
226
227 // e.g. 'czerny'
228 if ($this->StringAt($this->original, $this->current, 2, array("CZ"))
229 && !$this->StringAt($this->original, $this->current -2, 4,
230 array("WICZ"))) {
231 $this->primary .= 'S';
232 $this->secondary .= 'X';
233 $this->current += 2;
234 break;
235 }
236
237 // e.g. 'focaccia'
238 if ($this->StringAt($this->original, $this->current + 1, 3,
239 array("CIA"))) {
240 $this->primary .= 'X';
241 $this->secondary .= 'X';
242 $this->current += 3;
243 break;
244 }
245
246 // double 'C', but not McClellan'
247 if ($this->StringAt($this->original, $this->current, 2, array("CC"))
248 && !(($this->current == 1)
249 && (substr($this->original, 0, 1) == 'M'))) {
250 // 'bellocchio' but not 'bacchus'
251 if ($this->StringAt($this->original, $this->current + 2, 1,
252 array("I","E","H"))
253 && !$this->StringAt($this->original, $this->current + 2, 2,
254 array("HU"))) {
255 // 'accident', 'accede', 'succeed'
256 if ((($this->current == 1)
257 && (substr($this->original, $this->current - 1, 1) == 'A'))
258 || $this->StringAt($this->original, $this->current - 1, 5,
259 array("UCCEE", "UCCES"))) {
260 $this->primary .= "KS";
261 $this->secondary .= "KS";
262 // 'bacci', 'bertucci', other italian
263 } else {
264 $this->primary .= "X";
265 $this->secondary .= "X";
266 }
267 $this->current += 3;
268 break;
269 } else {
270 // Pierce's rule
271 $this->primary .= "K";
272 $this->secondary .= "K";
273 $this->current += 2;
274 break;
275 }
276 }
277
278 if ($this->StringAt($this->original, $this->current, 2,
279 array("CK","CG","CQ"))) {
280 $this->primary .= "K";
281 $this->secondary .= "K";
282 $this->current += 2;
283 break;
284 }
285
286 if ($this->StringAt($this->original, $this->current, 2,
287 array("CI","CE","CY"))) {
288 // italian vs. english
289 if ($this->StringAt($this->original, $this->current, 3,
290 array("CIO","CIE","CIA"))) {
291 $this->primary .= "S";
292 $this->secondary .= "X";
293 } else {
294 $this->primary .= "S";
295 $this->secondary .= "S";
296 }
297 $this->current += 2;
298 break;
299 }
300
301 // else
302 $this->primary .= "K";
303 $this->secondary .= "K";
304
305 // name sent in 'mac caffrey', 'mac gregor'
306 if ($this->StringAt($this->original, $this->current + 1, 2,
307 array(" C"," Q"," G"))) {
308 $this->current += 3;
309 } else {
310 if ($this->StringAt($this->original, $this->current + 1, 1,
311 array("C","K","Q"))
312 && !$this->StringAt($this->original, $this->current + 1, 2,
313 array("CE","CI"))) {
314 $this->current += 2;
315 } else {
316 $this->current += 1;
317 }
318 }
319 break;
320
321 case 'D':
322 if ($this->StringAt($this->original, $this->current, 2,
323 array("DG"))) {
324 if ($this->StringAt($this->original, $this->current + 2, 1,
325 array("I","E","Y"))) {
326 // e.g. 'edge'
327 $this->primary .= "J";
328 $this->secondary .= "J";
329 $this->current += 3;
330
331 break;
332 } else {
333 // e.g. 'edgar'
334 $this->primary .= "TK";
335 $this->secondary .= "TK";
336 $this->current += 2;
337 break;
338 }
339 }
340
341 if ($this->StringAt($this->original, $this->current, 2,
342 array("DT","DD"))) {
343 $this->primary .= "T";
344 $this->secondary .= "T";
345 $this->current += 2;
346 break;
347 }
348
349 // else
350 $this->primary .= "T";
351 $this->secondary .= "T";
352 $this->current += 1;
353 break;
354
355 case 'F':
356 if (substr($this->original, $this->current + 1, 1) == 'F')
357 $this->current += 2;
358 else
359 $this->current += 1;
360 $this->primary .= "F";
361 $this->secondary .= "F";
362 break;
363
364 case 'G':
365 if (substr($this->original, $this->current + 1, 1) == 'H') {
366 if (($this->current > 0)
367 && !$this->IsVowel($this->original, $this->current - 1)) {
368 $this->primary .= "K";
369 $this->secondary .= "K";
370 $this->current += 2;
371 break;
372 }
373
374 if ($this->current < 3) {
375 // 'ghislane', 'ghiradelli'
376 if ($this->current == 0) {
377 if (substr($this->original, $this->current + 2, 1) == 'I') {
378 $this->primary .= "J";
379 $this->secondary .= "J";
380 } else {
381 $this->primary .= "K";
382 $this->secondary .= "K";
383 }
384 $this->current += 2;
385 break;
386 }
387 }
388
389 // Parker's rule (with some further refinements) - e.g. 'hugh'
390 if ((($this->current > 1)
391 && $this->StringAt($this->original, $this->current - 2, 1,
392 array("B","H","D")))
393 // e.g. 'bough'
394 || (($this->current > 2)
395 && $this->StringAt($this->original, $this->current - 3, 1,
396 array("B","H","D")))
397 // e.g. 'broughton'
398 || (($this->current > 3)
399 && $this->StringAt($this->original, $this->current - 4, 1,
400 array("B","H")))) {
401 $this->current += 2;
402 break;
403 } else {
404 // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
405 if (($this->current > 2)
406 && (substr($this->original, $this->current - 1, 1) == 'U')
407 && $this->StringAt($this->original, $this->current - 3, 1,
408 array("C","G","L","R","T"))) {
409 $this->primary .= "F";
410 $this->secondary .= "F";
411 } elseif (($this->current > 0)
412 && substr($this->original, $this->current - 1, 1) != 'I') {
413 $this->primary .= "K";
414 $this->secondary .= "K";
415 }
416 $this->current += 2;
417 break;
418 }
419 }
420
421 if (substr($this->original, $this->current + 1, 1) == 'N') {
422 if (($this->current == 1) && $this->IsVowel($this->original, 0)
423 && !$this->SlavoGermanic($this->original)) {
424 $this->primary .= "KN";
425 $this->secondary .= "N";
426 } else {
427 // not e.g. 'cagney'
428 if (!$this->StringAt($this->original, $this->current + 2, 2,
429 array("EY"))
430 && (substr($this->original, $this->current + 1) != "Y")
431 && !$this->SlavoGermanic($this->original)) {
432 $this->primary .= "N";
433 $this->secondary .= "KN";
434 } else {
435 $this->primary .= "KN";
436 $this->secondary .= "KN";
437 }
438 }
439 $this->current += 2;
440 break;
441 }
442
443 // 'tagliaro'
444 if ($this->StringAt($this->original, $this->current + 1, 2,
445 array("LI"))
446 && !$this->SlavoGermanic($this->original)) {
447 $this->primary .= "KL";
448 $this->secondary .= "L";
449 $this->current += 2;
450 break;
451 }
452
453 // -ges-, -gep-, -gel- at beginning
454 if (($this->current == 0)
455 && ((substr($this->original, $this->current + 1, 1) == 'Y')
456 || $this->StringAt($this->original, $this->current + 1, 2,
457 array("ES","EP","EB","EL","EY","IB","IL","IN","IE",
458 "EI","ER")))) {
459 $this->primary .= "K";
460 $this->secondary .= "J";
461 $this->current += 2;
462 break;
463 }
464
465 // -ger-, -gy-
466 if (($this->StringAt($this->original, $this->current + 1, 2,
467 array("ER"))
468 || (substr($this->original, $this->current + 1, 1) == 'Y'))
469 && !$this->StringAt($this->original, 0, 6,
470 array("DANGER","RANGER","MANGER"))
471 && !$this->StringAt($this->original, $this->current -1, 1,
472 array("E", "I"))
473 && !$this->StringAt($this->original, $this->current -1, 3,
474 array("RGY","OGY"))) {
475 $this->primary .= "K";
476 $this->secondary .= "J";
477 $this->current += 2;
478 break;
479 }
480
481 // italian e.g. 'biaggi'
482 if ($this->StringAt($this->original, $this->current + 1, 1,
483 array("E","I","Y"))
484 || $this->StringAt($this->original, $this->current -1, 4,
485 array("AGGI","OGGI"))) {
486 // obvious germanic
487 if (($this->StringAt($this->original, 0, 4, array("VAN ", "VON "))
488 || $this->StringAt($this->original, 0, 3, array("SCH")))
489 || $this->StringAt($this->original, $this->current + 1, 2,
490 array("ET"))) {
491 $this->primary .= "K";
492 $this->secondary .= "K";
493 } else {
494 // always soft if french ending
495 if ($this->StringAt($this->original, $this->current + 1, 4,
496 array("IER "))) {
497 $this->primary .= "J";
498 $this->secondary .= "J";
499 } else {
500 $this->primary .= "J";
501 $this->secondary .= "K";
502 }
503 }
504 $this->current += 2;
505 break;
506 }
507
508 if (substr($this->original, $this->current +1, 1) == 'G')
509 $this->current += 2;
510 else
511 $this->current += 1;
512
513 $this->primary .= 'K';
514 $this->secondary .= 'K';
515 break;
516
517 case 'H':
518 // only keep if first & before vowel or btw. 2 vowels
519 if ((($this->current == 0) ||
520 $this->IsVowel($this->original, $this->current - 1))
521 && $this->IsVowel($this->original, $this->current + 1)) {
522 $this->primary .= 'H';
523 $this->secondary .= 'H';
524 $this->current += 2;
525 } else
526 $this->current += 1;
527 break;
528
529 case 'J':
530 // obvious spanish, 'jose', 'san jacinto'
531 if ($this->StringAt($this->original, $this->current, 4,
532 array("JOSE"))
533 || $this->StringAt($this->original, 0, 4, array("SAN "))) {
534 if ((($this->current == 0)
535 && (substr($this->original, $this->current + 4, 1) == ' '))
536 || $this->StringAt($this->original, 0, 4, array("SAN "))) {
537 $this->primary .= 'H';
538 $this->secondary .= 'H';
539 } else {
540 $this->primary .= "J";
541 $this->secondary .= 'H';
542 }
543 $this->current += 1;
544 break;
545 }
546
547 if (($this->current == 0)
548 && !$this->StringAt($this->original, $this->current, 4,
549 array("JOSE"))) {
550 $this->primary .= 'J'; // Yankelovich/Jankelowicz
551 $this->secondary .= 'A';
552 } else {
553 // spanish pron. of .e.g. 'bajador'
554 if ($this->IsVowel($this->original, $this->current - 1)
555 && !$this->SlavoGermanic($this->original)
556 && ((substr($this->original, $this->current + 1, 1) == 'A')
557 || (substr($this->original, $this->current + 1, 1) == 'O'))) {
558 $this->primary .= "J";
559 $this->secondary .= "H";
560 } else {
561 if ($this->current == $this->last) {
562 $this->primary .= "J";
563 $this->secondary .= "";
564 } else {
565 if (!$this->StringAt($this->original, $this->current + 1, 1,
566 array("L","T","K","S","N","M","B","Z"))
567 && !$this->StringAt($this->original, $this->current - 1, 1,
568 array("S","K","L"))) {
569 $this->primary .= "J";
570 $this->secondary .= "J";
571 }
572 }
573 }
574 }
575
576 if (substr($this->original, $this->current + 1, 1) == 'J') // it could happen
577 $this->current += 2;
578 else
579 $this->current += 1;
580 break;
581
582 case 'K':
583 if (substr($this->original, $this->current + 1, 1) == 'K')
584 $this->current += 2;
585 else
586 $this->current += 1;
587 $this->primary .= "K";
588 $this->secondary .= "K";
589 break;
590
591 case 'L':
592 if (substr($this->original, $this->current + 1, 1) == 'L') {
593 // spanish e.g. 'cabrillo', 'gallegos'
594 if ((($this->current == ($this->length - 3))
595 && $this->StringAt($this->original, $this->current - 1, 4,
596 array("ILLO","ILLA","ALLE")))
597 || (($this->StringAt($this->original, $this->last-1, 2,
598 array("AS","OS"))
599 || $this->StringAt($this->original, $this->last, 1,
600 array("A","O")))
601 && $this->StringAt($this->original, $this->current - 1, 4,
602 array("ALLE")))) {
603 $this->primary .= "L";
604 $this->secondary .= "";
605 $this->current += 2;
606 break;
607 }
608 $this->current += 2;
609 } else
610 $this->current += 1;
611 $this->primary .= "L";
612 $this->secondary .= "L";
613 break;
614
615 case 'M':
616 if (($this->StringAt($this->original, $this->current - 1, 3,
617 array("UMB"))
618 && ((($this->current + 1) == $this->last)
619 || $this->StringAt($this->original, $this->current + 2, 2,
620 array("ER"))))
621 // 'dumb', 'thumb'
622 || (substr($this->original, $this->current + 1, 1) == 'M')) {
623 $this->current += 2;
624 } else {
625 $this->current += 1;
626 }
627 $this->primary .= "M";
628 $this->secondary .= "M";
629 break;
630
631 case 'N':
632 if (substr($this->original, $this->current + 1, 1) == 'N')
633 $this->current += 2;
634 else
635 $this->current += 1;
636 $this->primary .= "N";
637 $this->secondary .= "N";
638 break;
639
640 case 'Ñ':
641 $this->current += 1;
642 $this->primary .= "N";
643 $this->secondary .= "N";
644 break;
645
646 case 'P':
647 if (substr($this->original, $this->current + 1, 1) == 'H') {
648 $this->current += 2;
649 $this->primary .= "F";
650 $this->secondary .= "F";
651 break;
652 }
653
654 // also account for "campbell" and "raspberry"
655 if ($this->StringAt($this->original, $this->current + 1, 1,
656 array("P","B")))
657 $this->current += 2;
658 else
659 $this->current += 1;
660 $this->primary .= "P";
661 $this->secondary .= "P";
662 break;
663
664 case 'Q':
665 if (substr($this->original, $this->current + 1, 1) == 'Q')
666 $this->current += 2;
667 else
668 $this->current += 1;
669 $this->primary .= "K";
670 $this->secondary .= "K";
671 break;
672
673 case 'R':
674 // french e.g. 'rogier', but exclude 'hochmeier'
675 if (($this->current == $this->last)
676 && !$this->SlavoGermanic($this->original)
677 && $this->StringAt($this->original, $this->current - 2, 2,
678 array("IE"))
679 && !$this->StringAt($this->original, $this->current - 4, 2,
680 array("ME","MA"))) {
681 $this->primary .= "";
682 $this->secondary .= "R";
683 } else {
684 $this->primary .= "R";
685 $this->secondary .= "R";
686 }
687 if (substr($this->original, $this->current + 1, 1) == 'R')
688 $this->current += 2;
689 else
690 $this->current += 1;
691 break;
692
693 case 'S':
694 // special cases 'island', 'isle', 'carlisle', 'carlysle'
695 if ($this->StringAt($this->original, $this->current - 1, 3,
696 array("ISL","YSL"))) {
697 $this->current += 1;
698 break;
699 }
700
701 // special case 'sugar-'
702 if (($this->current == 0)
703 && $this->StringAt($this->original, $this->current, 5,
704 array("SUGAR"))) {
705 $this->primary .= "X";
706 $this->secondary .= "S";
707 $this->current += 1;
708 break;
709 }
710
711 if ($this->StringAt($this->original, $this->current, 2,
712 array("SH"))) {
713 // germanic
714 if ($this->StringAt($this->original, $this->current + 1, 4,
715 array("HEIM","HOEK","HOLM","HOLZ"))) {
716 $this->primary .= "S";
717 $this->secondary .= "S";
718 } else {
719 $this->primary .= "X";
720 $this->secondary .= "X";
721 }
722 $this->current += 2;
723 break;
724 }
725
726 // italian & armenian
727 if ($this->StringAt($this->original, $this->current, 3,
728 array("SIO","SIA"))
729 || $this->StringAt($this->original, $this->current, 4,
730 array("SIAN"))) {
731 if (!$this->SlavoGermanic($this->original)) {
732 $this->primary .= "S";
733 $this->secondary .= "X";
734 } else {
735 $this->primary .= "S";
736 $this->secondary .= "S";
737 }
738 $this->current += 3;
739 break;
740 }
741
742 // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
743 // also, -sz- in slavic language altho in hungarian it is pronounced 's'
744 if ((($this->current == 0)
745 && $this->StringAt($this->original, $this->current + 1, 1,
746 array("M","N","L","W")))
747 || $this->StringAt($this->original, $this->current + 1, 1,
748 array("Z"))) {
749 $this->primary .= "S";
750 $this->secondary .= "X";
751 if ($this->StringAt($this->original, $this->current + 1, 1,
752 array("Z")))
753 $this->current += 2;
754 else
755 $this->current += 1;
756 break;
757 }
758
759 if ($this->StringAt($this->original, $this->current, 2,
760 array("SC"))) {
761 // Schlesinger's rule
762 if (substr($this->original, $this->current + 2, 1) == 'H')
763 // dutch origin, e.g. 'school', 'schooner'
764 if ($this->StringAt($this->original, $this->current + 3, 2,
765 array("OO","ER","EN","UY","ED","EM"))) {
766 // 'schermerhorn', 'schenker'
767 if ($this->StringAt($this->original, $this->current + 3, 2,
768 array("ER","EN"))) {
769 $this->primary .= "X";
770 $this->secondary .= "SK";
771 } else {
772 $this->primary .= "SK";
773 $this->secondary .= "SK";
774 }
775 $this->current += 3;
776 break;
777 } else {
778 if (($this->current == 0)
779 && !$this->IsVowel($this->original, 3)
780 && (substr($this->original, $this->current + 3, 1) != 'W')) {
781 $this->primary .= "X";
782 $this->secondary .= "S";
783 } else {
784 $this->primary .= "X";
785 $this->secondary .= "X";
786 }
787 $this->current += 3;
788 break;
789 }
790
791 if ($this->StringAt($this->original, $this->current + 2, 1,
792 array("I","E","Y"))) {
793 $this->primary .= "S";
794 $this->secondary .= "S";
795 $this->current += 3;
796 break;
797 }
798
799 // else
800 $this->primary .= "SK";
801 $this->secondary .= "SK";
802 $this->current += 3;
803 break;
804 }
805
806 // french e.g. 'resnais', 'artois'
807 if (($this->current == $this->last)
808 && $this->StringAt($this->original, $this->current - 2, 2,
809 array("AI","OI"))) {
810 $this->primary .= "";
811 $this->secondary .= "S";
812 } else {
813 $this->primary .= "S";
814 $this->secondary .= "S";
815 }
816
817 if ($this->StringAt($this->original, $this->current + 1, 1,
818 array("S","Z")))
819 $this->current += 2;
820 else
821 $this->current += 1;
822 break;
823
824 case 'T':
825 if ($this->StringAt($this->original, $this->current, 4,
826 array("TION"))) {
827 $this->primary .= "X";
828 $this->secondary .= "X";
829 $this->current += 3;
830 break;
831 }
832
833 if ($this->StringAt($this->original, $this->current, 3,
834 array("TIA","TCH"))) {
835 $this->primary .= "X";
836 $this->secondary .= "X";
837 $this->current += 3;
838 break;
839 }
840
841 if ($this->StringAt($this->original, $this->current, 2,
842 array("TH"))
843 || $this->StringAt($this->original, $this->current, 3,
844 array("TTH"))) {
845 // special case 'thomas', 'thames' or germanic
846 if ($this->StringAt($this->original, $this->current + 2, 2,
847 array("OM","AM"))
848 || $this->StringAt($this->original, 0, 4, array("VAN ","VON "))
849 || $this->StringAt($this->original, 0, 3, array("SCH"))) {
850 $this->primary .= "T";
851 $this->secondary .= "T";
852 } else {
853 $this->primary .= "0";
854 $this->secondary .= "T";
855 }
856 $this->current += 2;
857 break;
858 }
859
860 if ($this->StringAt($this->original, $this->current + 1, 1,
861 array("T","D")))
862 $this->current += 2;
863 else
864 $this->current += 1;
865 $this->primary .= "T";
866 $this->secondary .= "T";
867 break;
868
869 case 'V':
870 if (substr($this->original, $this->current + 1, 1) == 'V')
871 $this->current += 2;
872 else
873 $this->current += 1;
874 $this->primary .= "F";
875 $this->secondary .= "F";
876 break;
877
878 case 'W':
879 // can also be in middle of word
880 if ($this->StringAt($this->original, $this->current, 2, array("WR"))) {
881 $this->primary .= "R";
882 $this->secondary .= "R";
883 $this->current += 2;
884 break;
885 }
886
887 if (($this->current == 0)
888 && ($this->IsVowel($this->original, $this->current + 1)
889 || $this->StringAt($this->original, $this->current, 2,
890 array("WH")))) {
891 // Wasserman should match Vasserman
892 if ($this->IsVowel($this->original, $this->current + 1)) {
893 $this->primary .= "A";
894 $this->secondary .= "F";
895 } else {
896 // need Uomo to match Womo
897 $this->primary .= "A";
898 $this->secondary .= "A";
899 }
900 }
901
902 // Arnow should match Arnoff
903 if ((($this->current == $this->last)
904 && $this->IsVowel($this->original, $this->current - 1))
905 || $this->StringAt($this->original, $this->current - 1, 5,
906 array("EWSKI","EWSKY","OWSKI","OWSKY"))
907 || $this->StringAt($this->original, 0, 3, array("SCH"))) {
908 $this->primary .= "";
909 $this->secondary .= "F";
910 $this->current += 1;
911 break;
912 }
913
914 // polish e.g. 'filipowicz'
915 if ($this->StringAt($this->original, $this->current, 4,
916 array("WICZ","WITZ"))) {
917 $this->primary .= "TS";
918 $this->secondary .= "FX";
919 $this->current += 4;
920 break;
921 }
922
923 // else skip it
924 $this->current += 1;
925 break;
926
927 case 'X':
928 // french e.g. breaux
929 if (!(($this->current == $this->last)
930 && ($this->StringAt($this->original, $this->current - 3, 3,
931 array("IAU", "EAU"))
932 || $this->StringAt($this->original, $this->current - 2, 2,
933 array("AU", "OU"))))) {
934 $this->primary .= "KS";
935 $this->secondary .= "KS";
936 }
937
938 if ($this->StringAt($this->original, $this->current + 1, 1,
939 array("C","X")))
940 $this->current += 2;
941 else
942 $this->current += 1;
943 break;
944
945 case 'Z':
946 // chinese pinyin e.g. 'zhao'
947 if (substr($this->original, $this->current + 1, 1) == "H") {
948 $this->primary .= "J";
949 $this->secondary .= "J";
950 $this->current += 2;
951 break;
952 } elseif ($this->StringAt($this->original, $this->current + 1, 2,
953 array("ZO", "ZI", "ZA"))
954 || ($this->SlavoGermanic($this->original)
955 && (($this->current > 0)
956 && substr($this->original, $this->current - 1, 1) != 'T'))) {
957 $this->primary .= "S";
958 $this->secondary .= "TS";
959 } else {
960 $this->primary .= "S";
961 $this->secondary .= "S";
962 }
963
964 if (substr($this->original, $this->current + 1, 1) == 'Z')
965 $this->current += 2;
966 else
967 $this->current += 1;
968 break;
969
970 default:
971 $this->current += 1;
972
973 } // end switch
974
975 // printf("<br>ORIGINAL: '%s'\n", $this->original);
976 // printf("<br>current: '%s'\n", $this->current);
977 // printf("<br> PRIMARY: '%s'\n", $this->primary);
978 // printf("<br> SECONDARY: '%s'\n", $this->secondary);
979
980 } // end while
981
982 $this->primary = substr($this->primary, 0, 4);
983 $this->secondary = substr($this->secondary, 0, 4);
984
985 $result["primary"] = $this->primary ;
986 $result["secondary"] = $this->secondary ;
987
988 return $result ;
989
990 } // end of function MetaPhone
991
992
993 // Private methods
994
995 function StringAt($string, $start, $length, $list) {
996 if (($start <0) || ($start >= strlen($string)))
997 return 0;
998
999 for ($i=0; $i<count($list); $i++) {
1000 if ($list[$i] == substr($string, $start, $length))
1001 return 1;
1002 }
1003 return 0;
1004 }
1005
1006 /**
1007 * [Describe function...]
1008 *
1009 * @param [type] $string: ...
1010 * @param [type] $pos: ...
1011 * @return [type] ...
1012 */
1013 function IsVowel($string, $pos) {
1014 return ereg("[AEIOUY]", substr($string, $pos, 1));
1015 }
1016
1017 /**
1018 * [Describe function...]
1019 *
1020 * @param [type] $string: ...
1021 * @return [type] ...
1022 */
1023 function SlavoGermanic($string) {
1024 return ereg("W|K|CZ|WITZ", $string);
1025 }
1026 } // end of class MetaPhone
1027 ?>