16a4fb539bf167f41ea0918eba333dd767399a10
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Utility / DoubleMetaPhoneUtility.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Utility;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 /**
18 * TYPO3: Had to change name to "\TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
19 * TYPO3: If you want to use this metaphone method instead of the default in the indexer you can enable it in the extension configuration
20 * TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
21 */
22 class DoubleMetaPhoneUtility
23 {
24 /**
25 * @var string
26 */
27 public $original = '';
28
29 /**
30 * @var string
31 */
32 public $primary = '';
33
34 /**
35 * @var string
36 */
37 public $secondary = '';
38
39 /**
40 * @var int
41 */
42 public $length = 0;
43
44 /**
45 * @var int
46 */
47 public $last = 0;
48
49 /**
50 * @var int
51 */
52 public $current = 0;
53
54 // methods
55 // TYPO3 specific API to this class. BEGIN
56 /**
57 * Metaphone
58 *
59 * @param string $string
60 * @param int $sys_language_uid
61 * @return string
62 */
63 public function metaphone($string, $sys_language_uid = 0)
64 {
65 $res = $this->DoubleMetaPhone($string);
66 return $res['primary'];
67 }
68
69 // TYPO3 specific API to this class. END
70 // Public method
71 /**
72 * Double metaphone
73 *
74 * @param string $string
75 * @return array
76 */
77 public function DoubleMetaPhone($string)
78 {
79 $this->primary = '';
80 $this->secondary = '';
81 $this->current = 0;
82 $this->current = 0;
83 $this->length = strlen($string);
84 $this->last = $this->length - 1;
85 $this->original = $string . ' ';
86 $this->original = strtoupper($this->original);
87 // skip this at beginning of word
88 if ($this->StringAt($this->original, 0, 2, ['GN', 'KN', 'PN', 'WR', 'PS'])) {
89 $this->current++;
90 }
91 // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
92 if ($this->original[0] === 'X') {
93 $this->primary .= 'S';
94 // 'Z' maps to 'S'
95 $this->secondary .= 'S';
96 $this->current++;
97 }
98 // main loop
99 while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
100 if ($this->current >= $this->length) {
101 break;
102 }
103 switch (substr($this->original, $this->current, 1)) {
104 case 'A':
105
106 case 'E':
107
108 case 'I':
109
110 case 'O':
111
112 case 'U':
113
114 case 'Y':
115 if ($this->current == 0) {
116 // all init vowels now map to 'A'
117 $this->primary .= 'A';
118 $this->secondary .= 'A';
119 }
120 $this->current += 1;
121 break;
122 case 'B':
123 // '-mb', e.g. "dumb", already skipped over ...
124 $this->primary .= 'P';
125 $this->secondary .= 'P';
126 if (substr($this->original, $this->current + 1, 1) === 'B') {
127 $this->current += 2;
128 } else {
129 $this->current += 1;
130 }
131 break;
132 case 'Ç':
133 $this->primary .= 'S';
134 $this->secondary .= 'S';
135 $this->current += 1;
136 break;
137 case 'C':
138 // various gremanic
139 if ($this->current > 1 && !$this->IsVowel($this->original, $this->current - 2) && $this->StringAt($this->original, $this->current - 1, 3, ['ACH']) && (substr($this->original, $this->current + 2, 1) !== 'I' && (substr($this->original, $this->current + 2, 1) !== 'E' || $this->StringAt($this->original, $this->current - 2, 6, ['BACHER', 'MACHER'])))) {
140 $this->primary .= 'K';
141 $this->secondary .= 'K';
142 $this->current += 2;
143 break;
144 }
145 // special case 'caesar'
146 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 6, ['CAESAR'])) {
147 $this->primary .= 'S';
148 $this->secondary .= 'S';
149 $this->current += 2;
150 break;
151 }
152 // italian 'chianti'
153 if ($this->StringAt($this->original, $this->current, 4, ['CHIA'])) {
154 $this->primary .= 'K';
155 $this->secondary .= 'K';
156 $this->current += 2;
157 break;
158 }
159 if ($this->StringAt($this->original, $this->current, 2, ['CH'])) {
160 // find 'michael'
161 if ($this->current > 0 && $this->StringAt($this->original, $this->current, 4, ['CHAE'])) {
162 $this->primary .= 'K';
163 $this->secondary .= 'X';
164 $this->current += 2;
165 break;
166 }
167 // greek roots e.g. 'chemistry', 'chorus'
168 if ($this->current == 0 && ($this->StringAt($this->original, $this->current + 1, 5, ['HARAC', 'HARIS']) || $this->StringAt($this->original, $this->current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) && !$this->StringAt($this->original, 0, 5, ['CHORE'])) {
169 $this->primary .= 'K';
170 $this->secondary .= 'K';
171 $this->current += 2;
172 break;
173 }
174 // germanic, greek, or otherwise 'ch' for 'kh' sound
175 if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) || $this->StringAt($this->original, $this->current + 2, 1, ['T', 'S']) || ($this->StringAt($this->original, $this->current - 1, 1, ['A', 'O', 'U', 'E']) || $this->current == 0) && $this->StringAt($this->original, $this->current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '])) {
176 $this->primary .= 'K';
177 $this->secondary .= 'K';
178 } else {
179 if ($this->current > 0) {
180 if ($this->StringAt($this->original, 0, 2, ['MC'])) {
181 // e.g. 'McHugh'
182 $this->primary .= 'K';
183 $this->secondary .= 'K';
184 } else {
185 $this->primary .= 'X';
186 $this->secondary .= 'K';
187 }
188 } else {
189 $this->primary .= 'X';
190 $this->secondary .= 'X';
191 }
192 }
193 $this->current += 2;
194 break;
195 }
196 // e.g. 'czerny'
197 if ($this->StringAt($this->original, $this->current, 2, ['CZ']) && !$this->StringAt(
198 $this->original,
199 $this->current - 2,
200 4,
201 ['WICZ']
202 )) {
203 $this->primary .= 'S';
204 $this->secondary .= 'X';
205 $this->current += 2;
206 break;
207 }
208 // e.g. 'focaccia'
209 if ($this->StringAt($this->original, $this->current + 1, 3, ['CIA'])) {
210 $this->primary .= 'X';
211 $this->secondary .= 'X';
212 $this->current += 3;
213 break;
214 }
215 // double 'C', but not McClellan'
216 if ($this->StringAt($this->original, $this->current, 2, ['CC']) && !($this->current == 1 && $this->original[0] === 'M')) {
217 // 'bellocchio' but not 'bacchus'
218 if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'H']) && !$this->StringAt(
219 $this->original,
220 $this->current + 2,
221 2,
222 ['HU']
223 )) {
224 // 'accident', 'accede', 'succeed'
225 if ($this->current == 1 && substr($this->original, $this->current - 1, 1) === 'A' || $this->StringAt($this->original, $this->current - 1, 5, ['UCCEE', 'UCCES'])) {
226 $this->primary .= 'KS';
227 $this->secondary .= 'KS';
228 } else {
229 $this->primary .= 'X';
230 $this->secondary .= 'X';
231 }
232 $this->current += 3;
233 break;
234 }
235 // Pierce's rule
236 $this->primary .= 'K';
237 $this->secondary .= 'K';
238 $this->current += 2;
239 break;
240 }
241 if ($this->StringAt($this->original, $this->current, 2, ['CK', 'CG', 'CQ'])) {
242 $this->primary .= 'K';
243 $this->secondary .= 'K';
244 $this->current += 2;
245 break;
246 }
247 if ($this->StringAt($this->original, $this->current, 2, ['CI', 'CE', 'CY'])) {
248 // italian vs. english
249 if ($this->StringAt($this->original, $this->current, 3, ['CIO', 'CIE', 'CIA'])) {
250 $this->primary .= 'S';
251 $this->secondary .= 'X';
252 } else {
253 $this->primary .= 'S';
254 $this->secondary .= 'S';
255 }
256 $this->current += 2;
257 break;
258 }
259 // else
260 $this->primary .= 'K';
261 $this->secondary .= 'K';
262 // name sent in 'mac caffrey', 'mac gregor'
263 if ($this->StringAt($this->original, $this->current + 1, 2, [' C', ' Q', ' G'])) {
264 $this->current += 3;
265 } else {
266 if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'K', 'Q']) && !$this->StringAt(
267 $this->original,
268 $this->current + 1,
269 2,
270 ['CE', 'CI']
271 )) {
272 $this->current += 2;
273 } else {
274 $this->current += 1;
275 }
276 }
277 break;
278 case 'D':
279 if ($this->StringAt($this->original, $this->current, 2, ['DG'])) {
280 if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
281 // e.g. 'edge'
282 $this->primary .= 'J';
283 $this->secondary .= 'J';
284 $this->current += 3;
285 break;
286 }
287 // e.g. 'edgar'
288 $this->primary .= 'TK';
289 $this->secondary .= 'TK';
290 $this->current += 2;
291 break;
292 }
293 if ($this->StringAt($this->original, $this->current, 2, ['DT', 'DD'])) {
294 $this->primary .= 'T';
295 $this->secondary .= 'T';
296 $this->current += 2;
297 break;
298 }
299 // else
300 $this->primary .= 'T';
301 $this->secondary .= 'T';
302 $this->current += 1;
303 break;
304 case 'F':
305 if (substr($this->original, $this->current + 1, 1) === 'F') {
306 $this->current += 2;
307 } else {
308 $this->current += 1;
309 }
310 $this->primary .= 'F';
311 $this->secondary .= 'F';
312 break;
313 case 'G':
314 if (substr($this->original, $this->current + 1, 1) === 'H') {
315 if ($this->current > 0 && !$this->IsVowel($this->original, $this->current - 1)) {
316 $this->primary .= 'K';
317 $this->secondary .= 'K';
318 $this->current += 2;
319 break;
320 }
321 if ($this->current < 3) {
322 // 'ghislane', 'ghiradelli'
323 if ($this->current == 0) {
324 if (substr($this->original, $this->current + 2, 1) === 'I') {
325 $this->primary .= 'J';
326 $this->secondary .= 'J';
327 } else {
328 $this->primary .= 'K';
329 $this->secondary .= 'K';
330 }
331 $this->current += 2;
332 break;
333 }
334 }
335 // Parker's rule (with some further refinements) - e.g. 'hugh'
336 if ($this->current > 1 && $this->StringAt($this->original, $this->current - 2, 1, ['B', 'H', 'D']) || $this->current > 2 && $this->StringAt($this->original, $this->current - 3, 1, ['B', 'H', 'D']) || $this->current > 3 && $this->StringAt($this->original, $this->current - 4, 1, ['B', 'H'])) {
337 $this->current += 2;
338 break;
339 }
340 // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
341 if ($this->current > 2 && substr($this->original, $this->current - 1, 1) === 'U' && $this->StringAt($this->original, $this->current - 3, 1, ['C', 'G', 'L', 'R', 'T'])) {
342 $this->primary .= 'F';
343 $this->secondary .= 'F';
344 } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'I') {
345 $this->primary .= 'K';
346 $this->secondary .= 'K';
347 }
348 $this->current += 2;
349 break;
350 }
351 if (substr($this->original, $this->current + 1, 1) === 'N') {
352 if ($this->current == 1 && $this->IsVowel($this->original, 0) && !$this->SlavoGermanic($this->original)) {
353 $this->primary .= 'KN';
354 $this->secondary .= 'N';
355 } else {
356 // not e.g. 'cagney'
357 if (!$this->StringAt($this->original, $this->current + 2, 2, ['EY']) && substr($this->original, $this->current + 1) !== 'Y' && !$this->SlavoGermanic($this->original)) {
358 $this->primary .= 'N';
359 $this->secondary .= 'KN';
360 } else {
361 $this->primary .= 'KN';
362 $this->secondary .= 'KN';
363 }
364 }
365 $this->current += 2;
366 break;
367 }
368 // 'tagliaro'
369 if ($this->StringAt($this->original, $this->current + 1, 2, ['LI']) && !$this->SlavoGermanic($this->original)) {
370 $this->primary .= 'KL';
371 $this->secondary .= 'L';
372 $this->current += 2;
373 break;
374 }
375 // -ges-, -gep-, -gel- at beginning
376 if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) === 'Y' || $this->StringAt($this->original, $this->current + 1, 2, [
377 'ES',
378 'EP',
379 'EB',
380 'EL',
381 'EY',
382 'IB',
383 'IL',
384 'IN',
385 'IE',
386 'EI',
387 'ER'
388 ]))) {
389 $this->primary .= 'K';
390 $this->secondary .= 'J';
391 $this->current += 2;
392 break;
393 }
394 // -ger-, -gy-
395 if (($this->StringAt($this->original, $this->current + 1, 2, ['ER']) || substr($this->original, $this->current + 1, 1) === 'Y') && !$this->StringAt($this->original, 0, 6, ['DANGER', 'RANGER', 'MANGER']) && !$this->StringAt(
396 $this->original,
397 $this->current - 1,
398 1,
399 ['E', 'I']
400 ) && !$this->StringAt($this->original, $this->current - 1, 3, ['RGY', 'OGY'])) {
401 $this->primary .= 'K';
402 $this->secondary .= 'J';
403 $this->current += 2;
404 break;
405 }
406 // italian e.g. 'biaggi'
407 if ($this->StringAt($this->original, $this->current + 1, 1, ['E', 'I', 'Y']) || $this->StringAt($this->original, $this->current - 1, 4, ['AGGI', 'OGGI'])) {
408 // obvious germanic
409 if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current + 1, 2, ['ET'])) {
410 $this->primary .= 'K';
411 $this->secondary .= 'K';
412 } else {
413 // always soft if french ending
414 if ($this->StringAt($this->original, $this->current + 1, 4, ['IER '])) {
415 $this->primary .= 'J';
416 $this->secondary .= 'J';
417 } else {
418 $this->primary .= 'J';
419 $this->secondary .= 'K';
420 }
421 }
422 $this->current += 2;
423 break;
424 }
425 if (substr($this->original, $this->current + 1, 1) === 'G') {
426 $this->current += 2;
427 } else {
428 $this->current += 1;
429 }
430 $this->primary .= 'K';
431 $this->secondary .= 'K';
432 break;
433 case 'H':
434 // only keep if first & before vowel or btw. 2 vowels
435 if (($this->current == 0 || $this->IsVowel($this->original, $this->current - 1)) && $this->IsVowel($this->original, $this->current + 1)) {
436 $this->primary .= 'H';
437 $this->secondary .= 'H';
438 $this->current += 2;
439 } else {
440 $this->current += 1;
441 }
442 break;
443 case 'J':
444 // obvious spanish, 'jose', 'san jacinto'
445 if ($this->StringAt($this->original, $this->current, 4, ['JOSE']) || $this->StringAt($this->original, 0, 4, ['SAN '])) {
446 if ($this->current == 0 && substr($this->original, $this->current + 4, 1) === ' ' || $this->StringAt($this->original, 0, 4, ['SAN '])) {
447 $this->primary .= 'H';
448 $this->secondary .= 'H';
449 } else {
450 $this->primary .= 'J';
451 $this->secondary .= 'H';
452 }
453 $this->current += 1;
454 break;
455 }
456 if ($this->current == 0 && !$this->StringAt($this->original, $this->current, 4, ['JOSE'])) {
457 $this->primary .= 'J';
458 // Yankelovich/Jankelowicz
459 $this->secondary .= 'A';
460 } else {
461 // spanish pron. of .e.g. 'bajador'
462 if ($this->IsVowel($this->original, $this->current - 1) && !$this->SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) === 'A' || substr($this->original, $this->current + 1, 1) === 'O')) {
463 $this->primary .= 'J';
464 $this->secondary .= 'H';
465 } else {
466 if ($this->current == $this->last) {
467 $this->primary .= 'J';
468 $this->secondary .= '';
469 } else {
470 if (!$this->StringAt($this->original, $this->current + 1, 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) && !$this->StringAt(
471 $this->original,
472 $this->current - 1,
473 1,
474 ['S', 'K', 'L']
475 )) {
476 $this->primary .= 'J';
477 $this->secondary .= 'J';
478 }
479 }
480 }
481 }
482 if (substr($this->original, $this->current + 1, 1) === 'J') {
483 // it could happen
484 $this->current += 2;
485 } else {
486 $this->current += 1;
487 }
488 break;
489 case 'K':
490 if (substr($this->original, $this->current + 1, 1) === 'K') {
491 $this->current += 2;
492 } else {
493 $this->current += 1;
494 }
495 $this->primary .= 'K';
496 $this->secondary .= 'K';
497 break;
498 case 'L':
499 if (substr($this->original, $this->current + 1, 1) === 'L') {
500 // spanish e.g. 'cabrillo', 'gallegos'
501 if ($this->current == $this->length - 3 && $this->StringAt($this->original, $this->current - 1, 4, ['ILLO', 'ILLA', 'ALLE']) || ($this->StringAt($this->original, $this->last - 1, 2, ['AS', 'OS']) || $this->StringAt($this->original, $this->last, 1, ['A', 'O'])) && $this->StringAt($this->original, $this->current - 1, 4, ['ALLE'])) {
502 $this->primary .= 'L';
503 $this->secondary .= '';
504 $this->current += 2;
505 break;
506 }
507 $this->current += 2;
508 } else {
509 $this->current += 1;
510 }
511 $this->primary .= 'L';
512 $this->secondary .= 'L';
513 break;
514 case 'M':
515 if ($this->StringAt($this->original, $this->current - 1, 3, ['UMB']) && ($this->current + 1 == $this->last || $this->StringAt($this->original, $this->current + 2, 2, ['ER'])) || substr($this->original, $this->current + 1, 1) === 'M') {
516 $this->current += 2;
517 } else {
518 $this->current += 1;
519 }
520 $this->primary .= 'M';
521 $this->secondary .= 'M';
522 break;
523 case 'N':
524 if (substr($this->original, $this->current + 1, 1) === 'N') {
525 $this->current += 2;
526 } else {
527 $this->current += 1;
528 }
529 $this->primary .= 'N';
530 $this->secondary .= 'N';
531 break;
532 case 'Ñ':
533 $this->current += 1;
534 $this->primary .= 'N';
535 $this->secondary .= 'N';
536 break;
537 case 'P':
538 if (substr($this->original, $this->current + 1, 1) === 'H') {
539 $this->current += 2;
540 $this->primary .= 'F';
541 $this->secondary .= 'F';
542 break;
543 }
544 // also account for "campbell" and "raspberry"
545 if ($this->StringAt($this->original, $this->current + 1, 1, ['P', 'B'])) {
546 $this->current += 2;
547 } else {
548 $this->current += 1;
549 }
550 $this->primary .= 'P';
551 $this->secondary .= 'P';
552 break;
553 case 'Q':
554 if (substr($this->original, $this->current + 1, 1) === 'Q') {
555 $this->current += 2;
556 } else {
557 $this->current += 1;
558 }
559 $this->primary .= 'K';
560 $this->secondary .= 'K';
561 break;
562 case 'R':
563 // french e.g. 'rogier', but exclude 'hochmeier'
564 if ($this->current == $this->last && !$this->SlavoGermanic($this->original) && $this->StringAt($this->original, $this->current - 2, 2, ['IE']) && !$this->StringAt(
565 $this->original,
566 $this->current - 4,
567 2,
568 ['ME', 'MA']
569 )) {
570 $this->primary .= '';
571 $this->secondary .= 'R';
572 } else {
573 $this->primary .= 'R';
574 $this->secondary .= 'R';
575 }
576 if (substr($this->original, $this->current + 1, 1) === 'R') {
577 $this->current += 2;
578 } else {
579 $this->current += 1;
580 }
581 break;
582 case 'S':
583 // special cases 'island', 'isle', 'carlisle', 'carlysle'
584 if ($this->StringAt($this->original, $this->current - 1, 3, ['ISL', 'YSL'])) {
585 $this->current += 1;
586 break;
587 }
588 // special case 'sugar-'
589 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 5, ['SUGAR'])) {
590 $this->primary .= 'X';
591 $this->secondary .= 'S';
592 $this->current += 1;
593 break;
594 }
595 if ($this->StringAt($this->original, $this->current, 2, ['SH'])) {
596 // germanic
597 if ($this->StringAt($this->original, $this->current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ'])) {
598 $this->primary .= 'S';
599 $this->secondary .= 'S';
600 } else {
601 $this->primary .= 'X';
602 $this->secondary .= 'X';
603 }
604 $this->current += 2;
605 break;
606 }
607 // italian & armenian
608 if ($this->StringAt($this->original, $this->current, 3, ['SIO', 'SIA']) || $this->StringAt($this->original, $this->current, 4, ['SIAN'])) {
609 if (!$this->SlavoGermanic($this->original)) {
610 $this->primary .= 'S';
611 $this->secondary .= 'X';
612 } else {
613 $this->primary .= 'S';
614 $this->secondary .= 'S';
615 }
616 $this->current += 3;
617 break;
618 }
619 // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
620 // also, -sz- in slavic language although in hungarian it is pronounced 's'
621 if ($this->current == 0 && $this->StringAt($this->original, $this->current + 1, 1, ['M', 'N', 'L', 'W']) || $this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
622 $this->primary .= 'S';
623 $this->secondary .= 'X';
624 if ($this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
625 $this->current += 2;
626 } else {
627 $this->current += 1;
628 }
629 break;
630 }
631 if ($this->StringAt($this->original, $this->current, 2, ['SC'])) {
632 // Schlesinger's rule
633 if (substr($this->original, $this->current + 2, 1) === 'H') {
634 // dutch origin, e.g. 'school', 'schooner'
635 if ($this->StringAt($this->original, $this->current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM'])) {
636 // 'schermerhorn', 'schenker'
637 if ($this->StringAt($this->original, $this->current + 3, 2, ['ER', 'EN'])) {
638 $this->primary .= 'X';
639 $this->secondary .= 'SK';
640 } else {
641 $this->primary .= 'SK';
642 $this->secondary .= 'SK';
643 }
644 $this->current += 3;
645 break;
646 }
647 if ($this->current == 0 && !$this->IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) !== 'W') {
648 $this->primary .= 'X';
649 $this->secondary .= 'S';
650 } else {
651 $this->primary .= 'X';
652 $this->secondary .= 'X';
653 }
654 $this->current += 3;
655 break;
656 }
657 if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
658 $this->primary .= 'S';
659 $this->secondary .= 'S';
660 $this->current += 3;
661 break;
662 }
663 // else
664 $this->primary .= 'SK';
665 $this->secondary .= 'SK';
666 $this->current += 3;
667 break;
668 }
669 // french e.g. 'resnais', 'artois'
670 if ($this->current == $this->last && $this->StringAt($this->original, $this->current - 2, 2, ['AI', 'OI'])) {
671 $this->primary .= '';
672 $this->secondary .= 'S';
673 } else {
674 $this->primary .= 'S';
675 $this->secondary .= 'S';
676 }
677 if ($this->StringAt($this->original, $this->current + 1, 1, ['S', 'Z'])) {
678 $this->current += 2;
679 } else {
680 $this->current += 1;
681 }
682 break;
683 case 'T':
684 if ($this->StringAt($this->original, $this->current, 4, ['TION'])) {
685 $this->primary .= 'X';
686 $this->secondary .= 'X';
687 $this->current += 3;
688 break;
689 }
690 if ($this->StringAt($this->original, $this->current, 3, ['TIA', 'TCH'])) {
691 $this->primary .= 'X';
692 $this->secondary .= 'X';
693 $this->current += 3;
694 break;
695 }
696 if ($this->StringAt($this->original, $this->current, 2, ['TH']) || $this->StringAt($this->original, $this->current, 3, ['TTH'])) {
697 // special case 'thomas', 'thames' or germanic
698 if ($this->StringAt($this->original, $this->current + 2, 2, ['OM', 'AM']) || $this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
699 $this->primary .= 'T';
700 $this->secondary .= 'T';
701 } else {
702 $this->primary .= '0';
703 $this->secondary .= 'T';
704 }
705 $this->current += 2;
706 break;
707 }
708 if ($this->StringAt($this->original, $this->current + 1, 1, ['T', 'D'])) {
709 $this->current += 2;
710 } else {
711 $this->current += 1;
712 }
713 $this->primary .= 'T';
714 $this->secondary .= 'T';
715 break;
716 case 'V':
717 if (substr($this->original, $this->current + 1, 1) === 'V') {
718 $this->current += 2;
719 } else {
720 $this->current += 1;
721 }
722 $this->primary .= 'F';
723 $this->secondary .= 'F';
724 break;
725 case 'W':
726 // can also be in middle of word
727 if ($this->StringAt($this->original, $this->current, 2, ['WR'])) {
728 $this->primary .= 'R';
729 $this->secondary .= 'R';
730 $this->current += 2;
731 break;
732 }
733 if ($this->current == 0 && ($this->IsVowel($this->original, $this->current + 1) || $this->StringAt($this->original, $this->current, 2, ['WH']))) {
734 // Wasserman should match Vasserman
735 if ($this->IsVowel($this->original, $this->current + 1)) {
736 $this->primary .= 'A';
737 $this->secondary .= 'F';
738 } else {
739 // need Uomo to match Womo
740 $this->primary .= 'A';
741 $this->secondary .= 'A';
742 }
743 }
744 // Arnow should match Arnoff
745 if ($this->current == $this->last && $this->IsVowel($this->original, $this->current - 1) || $this->StringAt($this->original, $this->current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
746 $this->primary .= '';
747 $this->secondary .= 'F';
748 $this->current += 1;
749 break;
750 }
751 // polish e.g. 'filipowicz'
752 if ($this->StringAt($this->original, $this->current, 4, ['WICZ', 'WITZ'])) {
753 $this->primary .= 'TS';
754 $this->secondary .= 'FX';
755 $this->current += 4;
756 break;
757 }
758 // else skip it
759 $this->current += 1;
760 break;
761 case 'X':
762 // french e.g. breaux
763 if (!($this->current == $this->last && ($this->StringAt($this->original, $this->current - 3, 3, ['IAU', 'EAU']) || $this->StringAt($this->original, $this->current - 2, 2, ['AU', 'OU'])))) {
764 $this->primary .= 'KS';
765 $this->secondary .= 'KS';
766 }
767 if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'X'])) {
768 $this->current += 2;
769 } else {
770 $this->current += 1;
771 }
772 break;
773 case 'Z':
774 // chinese pinyin e.g. 'zhao'
775 if (substr($this->original, $this->current + 1, 1) === 'H') {
776 $this->primary .= 'J';
777 $this->secondary .= 'J';
778 $this->current += 2;
779 break;
780 }
781 if ($this->StringAt($this->original, $this->current + 1, 2, ['ZO', 'ZI', 'ZA']) || $this->SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'T')) {
782 $this->primary .= 'S';
783 $this->secondary .= 'TS';
784 } else {
785 $this->primary .= 'S';
786 $this->secondary .= 'S';
787 }
788 if (substr($this->original, $this->current + 1, 1) === 'Z') {
789 $this->current += 2;
790 } else {
791 $this->current += 1;
792 }
793 break;
794 default:
795 $this->current += 1;
796 }
797 }
798 // end while
799 $this->primary = substr($this->primary, 0, 4);
800 $this->secondary = substr($this->secondary, 0, 4);
801 $result['primary'] = $this->primary;
802 $result['secondary'] = $this->secondary;
803 return $result;
804 }
805
806 // end of function MetaPhone
807 // Private methods
808 /**
809 * String at
810 *
811 * @param string $string
812 * @param int $start
813 * @param int $length
814 * @param array $list
815 * @return bool
816 */
817 public function StringAt($string, $start, $length, $list)
818 {
819 if ($start < 0 || $start >= strlen($string)) {
820 return 0;
821 }
822 $listCount = count($list);
823 for ($i = 0; $i < $listCount; $i++) {
824 if ($list[$i] == substr($string, $start, $length)) {
825 return 1;
826 }
827 }
828 return 0;
829 }
830
831 /**
832 * Is vowel?
833 *
834 * @param string $string
835 * @param int $pos
836 * @return bool|int
837 */
838 public function IsVowel($string, $pos)
839 {
840 return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
841 }
842
843 /**
844 * Is slavogermanic?
845 *
846 * @param string $string
847 * @return bool|int
848 */
849 public function SlavoGermanic($string)
850 {
851 return preg_match('/W|K|CZ|WITZ/', $string);
852 }
853 }