[TASK] Use strict comparison for strings
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Utility / DoubleMetaPhoneUtility.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Utility;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 /**
18 * TYPO3: Had to change name to "\TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
19 * TYPO3: If you want to use this metaphone method instead of the default in the indexer you can enable it in the extension configuration
20 * TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
21 */
22 class DoubleMetaPhoneUtility
23 {
24 /**
25 * @var string
26 */
27 public $original = '';
28
29 /**
30 * @var string
31 */
32 public $primary = '';
33
34 /**
35 * @var string
36 */
37 public $secondary = '';
38
39 /**
40 * @var int
41 */
42 public $length = 0;
43
44 /**
45 * @var int
46 */
47 public $last = 0;
48
49 /**
50 * @var int
51 */
52 public $current = 0;
53
54 // methods
55 // TYPO3 specific API to this class. BEGIN
56 /**
57 * Metaphone
58 *
59 * @param string $string
60 * @param int $sys_language_uid
61 * @return string
62 */
63 public function metaphone($string, $sys_language_uid = 0)
64 {
65 $res = $this->DoubleMetaPhone($string);
66 return $res['primary'];
67 }
68
69 // TYPO3 specific API to this class. END
70 // Public method
71 /**
72 * Double metaphone
73 *
74 * @param string $string
75 * @return array
76 */
77 public function DoubleMetaPhone($string)
78 {
79 $this->primary = '';
80 $this->secondary = '';
81 $this->current = 0;
82 $this->current = 0;
83 $this->length = strlen($string);
84 $this->last = $this->length - 1;
85 $this->original = $string . ' ';
86 $this->original = strtoupper($this->original);
87 // skip this at beginning of word
88 if ($this->StringAt($this->original, 0, 2, ['GN', 'KN', 'PN', 'WR', 'PS'])) {
89 $this->current++;
90 }
91 // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
92 if ($this->original[0] === 'X') {
93 $this->primary .= 'S';
94 // 'Z' maps to 'S'
95 $this->secondary .= 'S';
96 $this->current++;
97 }
98 // main loop
99 while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
100 if ($this->current >= $this->length) {
101 break;
102 }
103 switch (substr($this->original, $this->current, 1)) {
104 case 'A':
105
106 case 'E':
107
108 case 'I':
109
110 case 'O':
111
112 case 'U':
113
114 case 'Y':
115 if ($this->current == 0) {
116 // all init vowels now map to 'A'
117 $this->primary .= 'A';
118 $this->secondary .= 'A';
119 }
120 $this->current += 1;
121 break;
122 case 'B':
123 // '-mb', e.g. "dumb", already skipped over ...
124 $this->primary .= 'P';
125 $this->secondary .= 'P';
126 if (substr($this->original, $this->current + 1, 1) === 'B') {
127 $this->current += 2;
128 } else {
129 $this->current += 1;
130 }
131 break;
132 case 'Ç':
133 $this->primary .= 'S';
134 $this->secondary .= 'S';
135 $this->current += 1;
136 break;
137 case 'C':
138 // various gremanic
139 if ($this->current > 1 && !$this->IsVowel($this->original, ($this->current - 2)) && $this->StringAt($this->original, $this->current - 1, 3, ['ACH']) && (substr($this->original, $this->current + 2, 1) !== 'I' && (substr($this->original, $this->current + 2, 1) !== 'E' || $this->StringAt($this->original, $this->current - 2, 6, ['BACHER', 'MACHER'])))) {
140 $this->primary .= 'K';
141 $this->secondary .= 'K';
142 $this->current += 2;
143 break;
144 }
145 // special case 'caesar'
146 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 6, ['CAESAR'])) {
147 $this->primary .= 'S';
148 $this->secondary .= 'S';
149 $this->current += 2;
150 break;
151 }
152 // italian 'chianti'
153 if ($this->StringAt($this->original, $this->current, 4, ['CHIA'])) {
154 $this->primary .= 'K';
155 $this->secondary .= 'K';
156 $this->current += 2;
157 break;
158 }
159 if ($this->StringAt($this->original, $this->current, 2, ['CH'])) {
160 // find 'michael'
161 if ($this->current > 0 && $this->StringAt($this->original, $this->current, 4, ['CHAE'])) {
162 $this->primary .= 'K';
163 $this->secondary .= 'X';
164 $this->current += 2;
165 break;
166 }
167 // greek roots e.g. 'chemistry', 'chorus'
168 if ($this->current == 0 && ($this->StringAt($this->original, $this->current + 1, 5, ['HARAC', 'HARIS']) || $this->StringAt($this->original, $this->current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) && !$this->StringAt($this->original, 0, 5, ['CHORE'])) {
169 $this->primary .= 'K';
170 $this->secondary .= 'K';
171 $this->current += 2;
172 break;
173 }
174 // germanic, greek, or otherwise 'ch' for 'kh' sound
175 if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) || $this->StringAt($this->original, $this->current + 2, 1, ['T', 'S']) || ($this->StringAt($this->original, $this->current - 1, 1, ['A', 'O', 'U', 'E']) || $this->current == 0) && $this->StringAt($this->original, $this->current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '])) {
176 $this->primary .= 'K';
177 $this->secondary .= 'K';
178 } else {
179 if ($this->current > 0) {
180 if ($this->StringAt($this->original, 0, 2, ['MC'])) {
181 // e.g. 'McHugh'
182 $this->primary .= 'K';
183 $this->secondary .= 'K';
184 } else {
185 $this->primary .= 'X';
186 $this->secondary .= 'K';
187 }
188 } else {
189 $this->primary .= 'X';
190 $this->secondary .= 'X';
191 }
192 }
193 $this->current += 2;
194 break;
195 }
196 // e.g. 'czerny'
197 if ($this->StringAt($this->original, $this->current, 2, ['CZ']) && !$this->StringAt($this->original, ($this->current - 2), 4, ['WICZ'])) {
198 $this->primary .= 'S';
199 $this->secondary .= 'X';
200 $this->current += 2;
201 break;
202 }
203 // e.g. 'focaccia'
204 if ($this->StringAt($this->original, $this->current + 1, 3, ['CIA'])) {
205 $this->primary .= 'X';
206 $this->secondary .= 'X';
207 $this->current += 3;
208 break;
209 }
210 // double 'C', but not McClellan'
211 if ($this->StringAt($this->original, $this->current, 2, ['CC']) && !($this->current == 1 && $this->original[0] === 'M')) {
212 // 'bellocchio' but not 'bacchus'
213 if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'H']) && !$this->StringAt($this->original, ($this->current + 2), 2, ['HU'])) {
214 // 'accident', 'accede', 'succeed'
215 if ($this->current == 1 && substr($this->original, $this->current - 1, 1) === 'A' || $this->StringAt($this->original, $this->current - 1, 5, ['UCCEE', 'UCCES'])) {
216 $this->primary .= 'KS';
217 $this->secondary .= 'KS';
218 } else {
219 $this->primary .= 'X';
220 $this->secondary .= 'X';
221 }
222 $this->current += 3;
223 break;
224 } else {
225 // Pierce's rule
226 $this->primary .= 'K';
227 $this->secondary .= 'K';
228 $this->current += 2;
229 break;
230 }
231 }
232 if ($this->StringAt($this->original, $this->current, 2, ['CK', 'CG', 'CQ'])) {
233 $this->primary .= 'K';
234 $this->secondary .= 'K';
235 $this->current += 2;
236 break;
237 }
238 if ($this->StringAt($this->original, $this->current, 2, ['CI', 'CE', 'CY'])) {
239 // italian vs. english
240 if ($this->StringAt($this->original, $this->current, 3, ['CIO', 'CIE', 'CIA'])) {
241 $this->primary .= 'S';
242 $this->secondary .= 'X';
243 } else {
244 $this->primary .= 'S';
245 $this->secondary .= 'S';
246 }
247 $this->current += 2;
248 break;
249 }
250 // else
251 $this->primary .= 'K';
252 $this->secondary .= 'K';
253 // name sent in 'mac caffrey', 'mac gregor'
254 if ($this->StringAt($this->original, $this->current + 1, 2, [' C', ' Q', ' G'])) {
255 $this->current += 3;
256 } else {
257 if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'K', 'Q']) && !$this->StringAt($this->original, ($this->current + 1), 2, ['CE', 'CI'])) {
258 $this->current += 2;
259 } else {
260 $this->current += 1;
261 }
262 }
263 break;
264 case 'D':
265 if ($this->StringAt($this->original, $this->current, 2, ['DG'])) {
266 if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
267 // e.g. 'edge'
268 $this->primary .= 'J';
269 $this->secondary .= 'J';
270 $this->current += 3;
271 break;
272 } else {
273 // e.g. 'edgar'
274 $this->primary .= 'TK';
275 $this->secondary .= 'TK';
276 $this->current += 2;
277 break;
278 }
279 }
280 if ($this->StringAt($this->original, $this->current, 2, ['DT', 'DD'])) {
281 $this->primary .= 'T';
282 $this->secondary .= 'T';
283 $this->current += 2;
284 break;
285 }
286 // else
287 $this->primary .= 'T';
288 $this->secondary .= 'T';
289 $this->current += 1;
290 break;
291 case 'F':
292 if (substr($this->original, $this->current + 1, 1) === 'F') {
293 $this->current += 2;
294 } else {
295 $this->current += 1;
296 }
297 $this->primary .= 'F';
298 $this->secondary .= 'F';
299 break;
300 case 'G':
301 if (substr($this->original, $this->current + 1, 1) === 'H') {
302 if ($this->current > 0 && !$this->IsVowel($this->original, ($this->current - 1))) {
303 $this->primary .= 'K';
304 $this->secondary .= 'K';
305 $this->current += 2;
306 break;
307 }
308 if ($this->current < 3) {
309 // 'ghislane', 'ghiradelli'
310 if ($this->current == 0) {
311 if (substr($this->original, $this->current + 2, 1) === 'I') {
312 $this->primary .= 'J';
313 $this->secondary .= 'J';
314 } else {
315 $this->primary .= 'K';
316 $this->secondary .= 'K';
317 }
318 $this->current += 2;
319 break;
320 }
321 }
322 // Parker's rule (with some further refinements) - e.g. 'hugh'
323 if ($this->current > 1 && $this->StringAt($this->original, $this->current - 2, 1, ['B', 'H', 'D']) || $this->current > 2 && $this->StringAt($this->original, $this->current - 3, 1, ['B', 'H', 'D']) || $this->current > 3 && $this->StringAt($this->original, $this->current - 4, 1, ['B', 'H'])) {
324 $this->current += 2;
325 break;
326 } else {
327 // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
328 if ($this->current > 2 && substr($this->original, $this->current - 1, 1) === 'U' && $this->StringAt($this->original, $this->current - 3, 1, ['C', 'G', 'L', 'R', 'T'])) {
329 $this->primary .= 'F';
330 $this->secondary .= 'F';
331 } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'I') {
332 $this->primary .= 'K';
333 $this->secondary .= 'K';
334 }
335 $this->current += 2;
336 break;
337 }
338 }
339 if (substr($this->original, $this->current + 1, 1) === 'N') {
340 if ($this->current == 1 && $this->IsVowel($this->original, 0) && !$this->SlavoGermanic($this->original)) {
341 $this->primary .= 'KN';
342 $this->secondary .= 'N';
343 } else {
344 // not e.g. 'cagney'
345 if (!$this->StringAt($this->original, ($this->current + 2), 2, ['EY']) && substr($this->original, $this->current + 1) !== 'Y' && !$this->SlavoGermanic($this->original)) {
346 $this->primary .= 'N';
347 $this->secondary .= 'KN';
348 } else {
349 $this->primary .= 'KN';
350 $this->secondary .= 'KN';
351 }
352 }
353 $this->current += 2;
354 break;
355 }
356 // 'tagliaro'
357 if ($this->StringAt($this->original, $this->current + 1, 2, ['LI']) && !$this->SlavoGermanic($this->original)) {
358 $this->primary .= 'KL';
359 $this->secondary .= 'L';
360 $this->current += 2;
361 break;
362 }
363 // -ges-, -gep-, -gel- at beginning
364 if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) === 'Y' || $this->StringAt($this->original, $this->current + 1, 2, [
365 'ES',
366 'EP',
367 'EB',
368 'EL',
369 'EY',
370 'IB',
371 'IL',
372 'IN',
373 'IE',
374 'EI',
375 'ER'
376 ]))) {
377 $this->primary .= 'K';
378 $this->secondary .= 'J';
379 $this->current += 2;
380 break;
381 }
382 // -ger-, -gy-
383 if (($this->StringAt($this->original, $this->current + 1, 2, ['ER']) || substr($this->original, $this->current + 1, 1) === 'Y') && !$this->StringAt($this->original, 0, 6, ['DANGER', 'RANGER', 'MANGER']) && !$this->StringAt($this->original, ($this->current - 1), 1, ['E', 'I']) && !$this->StringAt($this->original, ($this->current - 1), 3, ['RGY', 'OGY'])) {
384 $this->primary .= 'K';
385 $this->secondary .= 'J';
386 $this->current += 2;
387 break;
388 }
389 // italian e.g. 'biaggi'
390 if ($this->StringAt($this->original, $this->current + 1, 1, ['E', 'I', 'Y']) || $this->StringAt($this->original, $this->current - 1, 4, ['AGGI', 'OGGI'])) {
391 // obvious germanic
392 if ($this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH']) || $this->StringAt($this->original, $this->current + 1, 2, ['ET'])) {
393 $this->primary .= 'K';
394 $this->secondary .= 'K';
395 } else {
396 // always soft if french ending
397 if ($this->StringAt($this->original, $this->current + 1, 4, ['IER '])) {
398 $this->primary .= 'J';
399 $this->secondary .= 'J';
400 } else {
401 $this->primary .= 'J';
402 $this->secondary .= 'K';
403 }
404 }
405 $this->current += 2;
406 break;
407 }
408 if (substr($this->original, $this->current + 1, 1) === 'G') {
409 $this->current += 2;
410 } else {
411 $this->current += 1;
412 }
413 $this->primary .= 'K';
414 $this->secondary .= 'K';
415 break;
416 case 'H':
417 // only keep if first & before vowel or btw. 2 vowels
418 if (($this->current == 0 || $this->IsVowel($this->original, $this->current - 1)) && $this->IsVowel($this->original, $this->current + 1)) {
419 $this->primary .= 'H';
420 $this->secondary .= 'H';
421 $this->current += 2;
422 } else {
423 $this->current += 1;
424 }
425 break;
426 case 'J':
427 // obvious spanish, 'jose', 'san jacinto'
428 if ($this->StringAt($this->original, $this->current, 4, ['JOSE']) || $this->StringAt($this->original, 0, 4, ['SAN '])) {
429 if ($this->current == 0 && substr($this->original, $this->current + 4, 1) === ' ' || $this->StringAt($this->original, 0, 4, ['SAN '])) {
430 $this->primary .= 'H';
431 $this->secondary .= 'H';
432 } else {
433 $this->primary .= 'J';
434 $this->secondary .= 'H';
435 }
436 $this->current += 1;
437 break;
438 }
439 if ($this->current == 0 && !$this->StringAt($this->original, $this->current, 4, ['JOSE'])) {
440 $this->primary .= 'J';
441 // Yankelovich/Jankelowicz
442 $this->secondary .= 'A';
443 } else {
444 // spanish pron. of .e.g. 'bajador'
445 if ($this->IsVowel($this->original, $this->current - 1) && !$this->SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) === 'A' || substr($this->original, $this->current + 1, 1) === 'O')) {
446 $this->primary .= 'J';
447 $this->secondary .= 'H';
448 } else {
449 if ($this->current == $this->last) {
450 $this->primary .= 'J';
451 $this->secondary .= '';
452 } else {
453 if (!$this->StringAt($this->original, ($this->current + 1), 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) && !$this->StringAt($this->original, ($this->current - 1), 1, ['S', 'K', 'L'])) {
454 $this->primary .= 'J';
455 $this->secondary .= 'J';
456 }
457 }
458 }
459 }
460 if (substr($this->original, $this->current + 1, 1) === 'J') {
461 // it could happen
462 $this->current += 2;
463 } else {
464 $this->current += 1;
465 }
466 break;
467 case 'K':
468 if (substr($this->original, $this->current + 1, 1) === 'K') {
469 $this->current += 2;
470 } else {
471 $this->current += 1;
472 }
473 $this->primary .= 'K';
474 $this->secondary .= 'K';
475 break;
476 case 'L':
477 if (substr($this->original, $this->current + 1, 1) === 'L') {
478 // spanish e.g. 'cabrillo', 'gallegos'
479 if ($this->current == $this->length - 3 && $this->StringAt($this->original, $this->current - 1, 4, ['ILLO', 'ILLA', 'ALLE']) || ($this->StringAt($this->original, $this->last - 1, 2, ['AS', 'OS']) || $this->StringAt($this->original, $this->last, 1, ['A', 'O'])) && $this->StringAt($this->original, $this->current - 1, 4, ['ALLE'])) {
480 $this->primary .= 'L';
481 $this->secondary .= '';
482 $this->current += 2;
483 break;
484 }
485 $this->current += 2;
486 } else {
487 $this->current += 1;
488 }
489 $this->primary .= 'L';
490 $this->secondary .= 'L';
491 break;
492 case 'M':
493 if ($this->StringAt($this->original, $this->current - 1, 3, ['UMB']) && ($this->current + 1 == $this->last || $this->StringAt($this->original, $this->current + 2, 2, ['ER'])) || substr($this->original, $this->current + 1, 1) === 'M') {
494 $this->current += 2;
495 } else {
496 $this->current += 1;
497 }
498 $this->primary .= 'M';
499 $this->secondary .= 'M';
500 break;
501 case 'N':
502 if (substr($this->original, $this->current + 1, 1) === 'N') {
503 $this->current += 2;
504 } else {
505 $this->current += 1;
506 }
507 $this->primary .= 'N';
508 $this->secondary .= 'N';
509 break;
510 case 'Ñ':
511 $this->current += 1;
512 $this->primary .= 'N';
513 $this->secondary .= 'N';
514 break;
515 case 'P':
516 if (substr($this->original, $this->current + 1, 1) === 'H') {
517 $this->current += 2;
518 $this->primary .= 'F';
519 $this->secondary .= 'F';
520 break;
521 }
522 // also account for "campbell" and "raspberry"
523 if ($this->StringAt($this->original, $this->current + 1, 1, ['P', 'B'])) {
524 $this->current += 2;
525 } else {
526 $this->current += 1;
527 }
528 $this->primary .= 'P';
529 $this->secondary .= 'P';
530 break;
531 case 'Q':
532 if (substr($this->original, $this->current + 1, 1) === 'Q') {
533 $this->current += 2;
534 } else {
535 $this->current += 1;
536 }
537 $this->primary .= 'K';
538 $this->secondary .= 'K';
539 break;
540 case 'R':
541 // french e.g. 'rogier', but exclude 'hochmeier'
542 if ($this->current == $this->last && !$this->SlavoGermanic($this->original) && $this->StringAt($this->original, $this->current - 2, 2, ['IE']) && !$this->StringAt($this->original, ($this->current - 4), 2, ['ME', 'MA'])) {
543 $this->primary .= '';
544 $this->secondary .= 'R';
545 } else {
546 $this->primary .= 'R';
547 $this->secondary .= 'R';
548 }
549 if (substr($this->original, $this->current + 1, 1) === 'R') {
550 $this->current += 2;
551 } else {
552 $this->current += 1;
553 }
554 break;
555 case 'S':
556 // special cases 'island', 'isle', 'carlisle', 'carlysle'
557 if ($this->StringAt($this->original, $this->current - 1, 3, ['ISL', 'YSL'])) {
558 $this->current += 1;
559 break;
560 }
561 // special case 'sugar-'
562 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 5, ['SUGAR'])) {
563 $this->primary .= 'X';
564 $this->secondary .= 'S';
565 $this->current += 1;
566 break;
567 }
568 if ($this->StringAt($this->original, $this->current, 2, ['SH'])) {
569 // germanic
570 if ($this->StringAt($this->original, $this->current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ'])) {
571 $this->primary .= 'S';
572 $this->secondary .= 'S';
573 } else {
574 $this->primary .= 'X';
575 $this->secondary .= 'X';
576 }
577 $this->current += 2;
578 break;
579 }
580 // italian & armenian
581 if ($this->StringAt($this->original, $this->current, 3, ['SIO', 'SIA']) || $this->StringAt($this->original, $this->current, 4, ['SIAN'])) {
582 if (!$this->SlavoGermanic($this->original)) {
583 $this->primary .= 'S';
584 $this->secondary .= 'X';
585 } else {
586 $this->primary .= 'S';
587 $this->secondary .= 'S';
588 }
589 $this->current += 3;
590 break;
591 }
592 // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
593 // also, -sz- in slavic language altho in hungarian it is pronounced 's'
594 if ($this->current == 0 && $this->StringAt($this->original, $this->current + 1, 1, ['M', 'N', 'L', 'W']) || $this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
595 $this->primary .= 'S';
596 $this->secondary .= 'X';
597 if ($this->StringAt($this->original, $this->current + 1, 1, ['Z'])) {
598 $this->current += 2;
599 } else {
600 $this->current += 1;
601 }
602 break;
603 }
604 if ($this->StringAt($this->original, $this->current, 2, ['SC'])) {
605 // Schlesinger's rule
606 if (substr($this->original, $this->current + 2, 1) === 'H') {
607 // dutch origin, e.g. 'school', 'schooner'
608 if ($this->StringAt($this->original, $this->current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM'])) {
609 // 'schermerhorn', 'schenker'
610 if ($this->StringAt($this->original, $this->current + 3, 2, ['ER', 'EN'])) {
611 $this->primary .= 'X';
612 $this->secondary .= 'SK';
613 } else {
614 $this->primary .= 'SK';
615 $this->secondary .= 'SK';
616 }
617 $this->current += 3;
618 break;
619 } else {
620 if ($this->current == 0 && !$this->IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) !== 'W') {
621 $this->primary .= 'X';
622 $this->secondary .= 'S';
623 } else {
624 $this->primary .= 'X';
625 $this->secondary .= 'X';
626 }
627 $this->current += 3;
628 break;
629 }
630 }
631 if ($this->StringAt($this->original, $this->current + 2, 1, ['I', 'E', 'Y'])) {
632 $this->primary .= 'S';
633 $this->secondary .= 'S';
634 $this->current += 3;
635 break;
636 }
637 // else
638 $this->primary .= 'SK';
639 $this->secondary .= 'SK';
640 $this->current += 3;
641 break;
642 }
643 // french e.g. 'resnais', 'artois'
644 if ($this->current == $this->last && $this->StringAt($this->original, $this->current - 2, 2, ['AI', 'OI'])) {
645 $this->primary .= '';
646 $this->secondary .= 'S';
647 } else {
648 $this->primary .= 'S';
649 $this->secondary .= 'S';
650 }
651 if ($this->StringAt($this->original, $this->current + 1, 1, ['S', 'Z'])) {
652 $this->current += 2;
653 } else {
654 $this->current += 1;
655 }
656 break;
657 case 'T':
658 if ($this->StringAt($this->original, $this->current, 4, ['TION'])) {
659 $this->primary .= 'X';
660 $this->secondary .= 'X';
661 $this->current += 3;
662 break;
663 }
664 if ($this->StringAt($this->original, $this->current, 3, ['TIA', 'TCH'])) {
665 $this->primary .= 'X';
666 $this->secondary .= 'X';
667 $this->current += 3;
668 break;
669 }
670 if ($this->StringAt($this->original, $this->current, 2, ['TH']) || $this->StringAt($this->original, $this->current, 3, ['TTH'])) {
671 // special case 'thomas', 'thames' or germanic
672 if ($this->StringAt($this->original, $this->current + 2, 2, ['OM', 'AM']) || $this->StringAt($this->original, 0, 4, ['VAN ', 'VON ']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
673 $this->primary .= 'T';
674 $this->secondary .= 'T';
675 } else {
676 $this->primary .= '0';
677 $this->secondary .= 'T';
678 }
679 $this->current += 2;
680 break;
681 }
682 if ($this->StringAt($this->original, $this->current + 1, 1, ['T', 'D'])) {
683 $this->current += 2;
684 } else {
685 $this->current += 1;
686 }
687 $this->primary .= 'T';
688 $this->secondary .= 'T';
689 break;
690 case 'V':
691 if (substr($this->original, $this->current + 1, 1) === 'V') {
692 $this->current += 2;
693 } else {
694 $this->current += 1;
695 }
696 $this->primary .= 'F';
697 $this->secondary .= 'F';
698 break;
699 case 'W':
700 // can also be in middle of word
701 if ($this->StringAt($this->original, $this->current, 2, ['WR'])) {
702 $this->primary .= 'R';
703 $this->secondary .= 'R';
704 $this->current += 2;
705 break;
706 }
707 if ($this->current == 0 && ($this->IsVowel($this->original, $this->current + 1) || $this->StringAt($this->original, $this->current, 2, ['WH']))) {
708 // Wasserman should match Vasserman
709 if ($this->IsVowel($this->original, $this->current + 1)) {
710 $this->primary .= 'A';
711 $this->secondary .= 'F';
712 } else {
713 // need Uomo to match Womo
714 $this->primary .= 'A';
715 $this->secondary .= 'A';
716 }
717 }
718 // Arnow should match Arnoff
719 if ($this->current == $this->last && $this->IsVowel($this->original, $this->current - 1) || $this->StringAt($this->original, $this->current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) || $this->StringAt($this->original, 0, 3, ['SCH'])) {
720 $this->primary .= '';
721 $this->secondary .= 'F';
722 $this->current += 1;
723 break;
724 }
725 // polish e.g. 'filipowicz'
726 if ($this->StringAt($this->original, $this->current, 4, ['WICZ', 'WITZ'])) {
727 $this->primary .= 'TS';
728 $this->secondary .= 'FX';
729 $this->current += 4;
730 break;
731 }
732 // else skip it
733 $this->current += 1;
734 break;
735 case 'X':
736 // french e.g. breaux
737 if (!($this->current == $this->last && ($this->StringAt($this->original, $this->current - 3, 3, ['IAU', 'EAU']) || $this->StringAt($this->original, $this->current - 2, 2, ['AU', 'OU'])))) {
738 $this->primary .= 'KS';
739 $this->secondary .= 'KS';
740 }
741 if ($this->StringAt($this->original, $this->current + 1, 1, ['C', 'X'])) {
742 $this->current += 2;
743 } else {
744 $this->current += 1;
745 }
746 break;
747 case 'Z':
748 // chinese pinyin e.g. 'zhao'
749 if (substr($this->original, $this->current + 1, 1) === 'H') {
750 $this->primary .= 'J';
751 $this->secondary .= 'J';
752 $this->current += 2;
753 break;
754 } elseif ($this->StringAt($this->original, $this->current + 1, 2, ['ZO', 'ZI', 'ZA']) || $this->SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) !== 'T')) {
755 $this->primary .= 'S';
756 $this->secondary .= 'TS';
757 } else {
758 $this->primary .= 'S';
759 $this->secondary .= 'S';
760 }
761 if (substr($this->original, $this->current + 1, 1) === 'Z') {
762 $this->current += 2;
763 } else {
764 $this->current += 1;
765 }
766 break;
767 default:
768 $this->current += 1;
769 }
770 }
771 // end while
772 $this->primary = substr($this->primary, 0, 4);
773 $this->secondary = substr($this->secondary, 0, 4);
774 $result['primary'] = $this->primary;
775 $result['secondary'] = $this->secondary;
776 return $result;
777 }
778
779 // end of function MetaPhone
780 // Private methods
781 /**
782 * String at
783 *
784 * @param string $string
785 * @param int $start
786 * @param int $length
787 * @param array $list
788 * @return bool
789 */
790 public function StringAt($string, $start, $length, $list)
791 {
792 if ($start < 0 || $start >= strlen($string)) {
793 return 0;
794 }
795 $listCount = count($list);
796 for ($i = 0; $i < $listCount; $i++) {
797 if ($list[$i] == substr($string, $start, $length)) {
798 return 1;
799 }
800 }
801 return 0;
802 }
803
804 /**
805 * Is vowel?
806 *
807 * @param string $string
808 * @param int $pos
809 * @return bool|int
810 */
811 public function IsVowel($string, $pos)
812 {
813 return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
814 }
815
816 /**
817 * Is slavogermanic?
818 *
819 * @param string $string
820 * @return bool|int
821 */
822 public function SlavoGermanic($string)
823 {
824 return preg_match('/W|K|CZ|WITZ/', $string);
825 }
826 }