[TASK] Fix dummy parameter and return texts in phpDoc
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Utility / DoubleMetaPhoneUtility.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Utility;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 /**
18 * TYPO3: Had to change name to "\TYPO3\CMS\IndexedSearch\Utility\DoubleMetaPhoneUtility" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
19 * TYPO3: If you want to use this metaphone method instead of the default in the indexer you can enable it in the extension configuration
20 * TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
21 */
22 class DoubleMetaPhoneUtility {
23
24 // properties
25 /**
26 * @var string
27 */
28 public $original = '';
29
30 /**
31 * @var string
32 */
33 public $primary = '';
34
35 /**
36 * @var string
37 */
38 public $secondary = '';
39
40 /**
41 * @var int
42 */
43 public $length = 0;
44
45 /**
46 * @var int
47 */
48 public $last = 0;
49
50 /**
51 * @var int
52 */
53 public $current = 0;
54
55 // methods
56 // TYPO3 specific API to this class. BEGIN
57 /**
58 * Metaphone
59 *
60 * @param string $string
61 * @param int $sys_language_uid
62 * @return string
63 */
64 public function metaphone($string, $sys_language_uid = 0) {
65 $res = $this->DoubleMetaPhone($string);
66 return $res['primary'];
67 }
68
69 // TYPO3 specific API to this class. END
70 // Public method
71 /**
72 * Double metaphone
73 *
74 * @param string $string
75 * @return array
76 */
77 public function DoubleMetaPhone($string) {
78 $this->primary = '';
79 $this->secondary = '';
80 $this->current = 0;
81 $this->current = 0;
82 $this->length = strlen($string);
83 $this->last = $this->length - 1;
84 $this->original = $string . ' ';
85 $this->original = strtoupper($this->original);
86 // skip this at beginning of word
87 if ($this->StringAt($this->original, 0, 2, array('GN', 'KN', 'PN', 'WR', 'PS'))) {
88 $this->current++;
89 }
90 // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
91 if ($this->original[0] === 'X') {
92 $this->primary .= 'S';
93 // 'Z' maps to 'S'
94 $this->secondary .= 'S';
95 $this->current++;
96 }
97 // main loop
98 while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
99 if ($this->current >= $this->length) {
100 break;
101 }
102 switch (substr($this->original, $this->current, 1)) {
103 case 'A':
104
105 case 'E':
106
107 case 'I':
108
109 case 'O':
110
111 case 'U':
112
113 case 'Y':
114 if ($this->current == 0) {
115 // all init vowels now map to 'A'
116 $this->primary .= 'A';
117 $this->secondary .= 'A';
118 }
119 $this->current += 1;
120 break;
121 case 'B':
122 // '-mb', e.g. "dumb", already skipped over ...
123 $this->primary .= 'P';
124 $this->secondary .= 'P';
125 if (substr($this->original, $this->current + 1, 1) == 'B') {
126 $this->current += 2;
127 } else {
128 $this->current += 1;
129 }
130 break;
131 case 'Ç':
132 $this->primary .= 'S';
133 $this->secondary .= 'S';
134 $this->current += 1;
135 break;
136 case 'C':
137 // various gremanic
138 if ($this->current > 1 && !$this->IsVowel($this->original, ($this->current - 2)) && $this->StringAt($this->original, $this->current - 1, 3, array('ACH')) && (substr($this->original, $this->current + 2, 1) != 'I' && (substr($this->original, $this->current + 2, 1) != 'E' || $this->StringAt($this->original, $this->current - 2, 6, array('BACHER', 'MACHER'))))) {
139 $this->primary .= 'K';
140 $this->secondary .= 'K';
141 $this->current += 2;
142 break;
143 }
144 // special case 'caesar'
145 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 6, array('CAESAR'))) {
146 $this->primary .= 'S';
147 $this->secondary .= 'S';
148 $this->current += 2;
149 break;
150 }
151 // italian 'chianti'
152 if ($this->StringAt($this->original, $this->current, 4, array('CHIA'))) {
153 $this->primary .= 'K';
154 $this->secondary .= 'K';
155 $this->current += 2;
156 break;
157 }
158 if ($this->StringAt($this->original, $this->current, 2, array('CH'))) {
159 // find 'michael'
160 if ($this->current > 0 && $this->StringAt($this->original, $this->current, 4, array('CHAE'))) {
161 $this->primary .= 'K';
162 $this->secondary .= 'X';
163 $this->current += 2;
164 break;
165 }
166 // greek roots e.g. 'chemistry', 'chorus'
167 if ($this->current == 0 && ($this->StringAt($this->original, $this->current + 1, 5, array('HARAC', 'HARIS')) || $this->StringAt($this->original, $this->current + 1, 3, array('HOR', 'HYM', 'HIA', 'HEM'))) && !$this->StringAt($this->original, 0, 5, array('CHORE'))) {
168 $this->primary .= 'K';
169 $this->secondary .= 'K';
170 $this->current += 2;
171 break;
172 }
173 // germanic, greek, or otherwise 'ch' for 'kh' sound
174 if ($this->StringAt($this->original, 0, 4, array('VAN ', 'VON ')) || $this->StringAt($this->original, 0, 3, array('SCH')) || $this->StringAt($this->original, $this->current - 2, 6, array('ORCHES', 'ARCHIT', 'ORCHID')) || $this->StringAt($this->original, $this->current + 2, 1, array('T', 'S')) || ($this->StringAt($this->original, $this->current - 1, 1, array('A', 'O', 'U', 'E')) || $this->current == 0) && $this->StringAt($this->original, $this->current + 2, 1, array('L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' '))) {
175 $this->primary .= 'K';
176 $this->secondary .= 'K';
177 } else {
178 if ($this->current > 0) {
179 if ($this->StringAt($this->original, 0, 2, array('MC'))) {
180 // e.g. 'McHugh'
181 $this->primary .= 'K';
182 $this->secondary .= 'K';
183 } else {
184 $this->primary .= 'X';
185 $this->secondary .= 'K';
186 }
187 } else {
188 $this->primary .= 'X';
189 $this->secondary .= 'X';
190 }
191 }
192 $this->current += 2;
193 break;
194 }
195 // e.g. 'czerny'
196 if ($this->StringAt($this->original, $this->current, 2, array('CZ')) && !$this->StringAt($this->original, ($this->current - 2), 4, array('WICZ'))) {
197 $this->primary .= 'S';
198 $this->secondary .= 'X';
199 $this->current += 2;
200 break;
201 }
202 // e.g. 'focaccia'
203 if ($this->StringAt($this->original, $this->current + 1, 3, array('CIA'))) {
204 $this->primary .= 'X';
205 $this->secondary .= 'X';
206 $this->current += 3;
207 break;
208 }
209 // double 'C', but not McClellan'
210 if ($this->StringAt($this->original, $this->current, 2, array('CC')) && !($this->current == 1 && $this->original[0] === 'M')) {
211 // 'bellocchio' but not 'bacchus'
212 if ($this->StringAt($this->original, $this->current + 2, 1, array('I', 'E', 'H')) && !$this->StringAt($this->original, ($this->current + 2), 2, array('HU'))) {
213 // 'accident', 'accede', 'succeed'
214 if ($this->current == 1 && substr($this->original, $this->current - 1, 1) == 'A' || $this->StringAt($this->original, $this->current - 1, 5, array('UCCEE', 'UCCES'))) {
215 $this->primary .= 'KS';
216 $this->secondary .= 'KS';
217 } else {
218 $this->primary .= 'X';
219 $this->secondary .= 'X';
220 }
221 $this->current += 3;
222 break;
223 } else {
224 // Pierce's rule
225 $this->primary .= 'K';
226 $this->secondary .= 'K';
227 $this->current += 2;
228 break;
229 }
230 }
231 if ($this->StringAt($this->original, $this->current, 2, array('CK', 'CG', 'CQ'))) {
232 $this->primary .= 'K';
233 $this->secondary .= 'K';
234 $this->current += 2;
235 break;
236 }
237 if ($this->StringAt($this->original, $this->current, 2, array('CI', 'CE', 'CY'))) {
238 // italian vs. english
239 if ($this->StringAt($this->original, $this->current, 3, array('CIO', 'CIE', 'CIA'))) {
240 $this->primary .= 'S';
241 $this->secondary .= 'X';
242 } else {
243 $this->primary .= 'S';
244 $this->secondary .= 'S';
245 }
246 $this->current += 2;
247 break;
248 }
249 // else
250 $this->primary .= 'K';
251 $this->secondary .= 'K';
252 // name sent in 'mac caffrey', 'mac gregor'
253 if ($this->StringAt($this->original, $this->current + 1, 2, array(' C', ' Q', ' G'))) {
254 $this->current += 3;
255 } else {
256 if ($this->StringAt($this->original, $this->current + 1, 1, array('C', 'K', 'Q')) && !$this->StringAt($this->original, ($this->current + 1), 2, array('CE', 'CI'))) {
257 $this->current += 2;
258 } else {
259 $this->current += 1;
260 }
261 }
262 break;
263 case 'D':
264 if ($this->StringAt($this->original, $this->current, 2, array('DG'))) {
265 if ($this->StringAt($this->original, $this->current + 2, 1, array('I', 'E', 'Y'))) {
266 // e.g. 'edge'
267 $this->primary .= 'J';
268 $this->secondary .= 'J';
269 $this->current += 3;
270 break;
271 } else {
272 // e.g. 'edgar'
273 $this->primary .= 'TK';
274 $this->secondary .= 'TK';
275 $this->current += 2;
276 break;
277 }
278 }
279 if ($this->StringAt($this->original, $this->current, 2, array('DT', 'DD'))) {
280 $this->primary .= 'T';
281 $this->secondary .= 'T';
282 $this->current += 2;
283 break;
284 }
285 // else
286 $this->primary .= 'T';
287 $this->secondary .= 'T';
288 $this->current += 1;
289 break;
290 case 'F':
291 if (substr($this->original, $this->current + 1, 1) == 'F') {
292 $this->current += 2;
293 } else {
294 $this->current += 1;
295 }
296 $this->primary .= 'F';
297 $this->secondary .= 'F';
298 break;
299 case 'G':
300 if (substr($this->original, $this->current + 1, 1) == 'H') {
301 if ($this->current > 0 && !$this->IsVowel($this->original, ($this->current - 1))) {
302 $this->primary .= 'K';
303 $this->secondary .= 'K';
304 $this->current += 2;
305 break;
306 }
307 if ($this->current < 3) {
308 // 'ghislane', 'ghiradelli'
309 if ($this->current == 0) {
310 if (substr($this->original, $this->current + 2, 1) == 'I') {
311 $this->primary .= 'J';
312 $this->secondary .= 'J';
313 } else {
314 $this->primary .= 'K';
315 $this->secondary .= 'K';
316 }
317 $this->current += 2;
318 break;
319 }
320 }
321 // Parker's rule (with some further refinements) - e.g. 'hugh'
322 if ($this->current > 1 && $this->StringAt($this->original, $this->current - 2, 1, array('B', 'H', 'D')) || $this->current > 2 && $this->StringAt($this->original, $this->current - 3, 1, array('B', 'H', 'D')) || $this->current > 3 && $this->StringAt($this->original, $this->current - 4, 1, array('B', 'H'))) {
323 $this->current += 2;
324 break;
325 } else {
326 // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
327 if ($this->current > 2 && substr($this->original, $this->current - 1, 1) == 'U' && $this->StringAt($this->original, $this->current - 3, 1, array('C', 'G', 'L', 'R', 'T'))) {
328 $this->primary .= 'F';
329 $this->secondary .= 'F';
330 } elseif ($this->current > 0 && substr($this->original, $this->current - 1, 1) != 'I') {
331 $this->primary .= 'K';
332 $this->secondary .= 'K';
333 }
334 $this->current += 2;
335 break;
336 }
337 }
338 if (substr($this->original, $this->current + 1, 1) == 'N') {
339 if ($this->current == 1 && $this->IsVowel($this->original, 0) && !$this->SlavoGermanic($this->original)) {
340 $this->primary .= 'KN';
341 $this->secondary .= 'N';
342 } else {
343 // not e.g. 'cagney'
344 if (!$this->StringAt($this->original, ($this->current + 2), 2, array('EY')) && substr($this->original, $this->current + 1) != 'Y' && !$this->SlavoGermanic($this->original)) {
345 $this->primary .= 'N';
346 $this->secondary .= 'KN';
347 } else {
348 $this->primary .= 'KN';
349 $this->secondary .= 'KN';
350 }
351 }
352 $this->current += 2;
353 break;
354 }
355 // 'tagliaro'
356 if ($this->StringAt($this->original, $this->current + 1, 2, array('LI')) && !$this->SlavoGermanic($this->original)) {
357 $this->primary .= 'KL';
358 $this->secondary .= 'L';
359 $this->current += 2;
360 break;
361 }
362 // -ges-, -gep-, -gel- at beginning
363 if ($this->current == 0 && (substr($this->original, $this->current + 1, 1) == 'Y' || $this->StringAt($this->original, $this->current + 1, 2, array(
364 'ES',
365 'EP',
366 'EB',
367 'EL',
368 'EY',
369 'IB',
370 'IL',
371 'IN',
372 'IE',
373 'EI',
374 'ER'
375 )))) {
376 $this->primary .= 'K';
377 $this->secondary .= 'J';
378 $this->current += 2;
379 break;
380 }
381 // -ger-, -gy-
382 if (($this->StringAt($this->original, $this->current + 1, 2, array('ER')) || substr($this->original, $this->current + 1, 1) == 'Y') && !$this->StringAt($this->original, 0, 6, array('DANGER', 'RANGER', 'MANGER')) && !$this->StringAt($this->original, ($this->current - 1), 1, array('E', 'I')) && !$this->StringAt($this->original, ($this->current - 1), 3, array('RGY', 'OGY'))) {
383 $this->primary .= 'K';
384 $this->secondary .= 'J';
385 $this->current += 2;
386 break;
387 }
388 // italian e.g. 'biaggi'
389 if ($this->StringAt($this->original, $this->current + 1, 1, array('E', 'I', 'Y')) || $this->StringAt($this->original, $this->current - 1, 4, array('AGGI', 'OGGI'))) {
390 // obvious germanic
391 if ($this->StringAt($this->original, 0, 4, array('VAN ', 'VON ')) || $this->StringAt($this->original, 0, 3, array('SCH')) || $this->StringAt($this->original, $this->current + 1, 2, array('ET'))) {
392 $this->primary .= 'K';
393 $this->secondary .= 'K';
394 } else {
395 // always soft if french ending
396 if ($this->StringAt($this->original, $this->current + 1, 4, array('IER '))) {
397 $this->primary .= 'J';
398 $this->secondary .= 'J';
399 } else {
400 $this->primary .= 'J';
401 $this->secondary .= 'K';
402 }
403 }
404 $this->current += 2;
405 break;
406 }
407 if (substr($this->original, $this->current + 1, 1) == 'G') {
408 $this->current += 2;
409 } else {
410 $this->current += 1;
411 }
412 $this->primary .= 'K';
413 $this->secondary .= 'K';
414 break;
415 case 'H':
416 // only keep if first & before vowel or btw. 2 vowels
417 if (($this->current == 0 || $this->IsVowel($this->original, $this->current - 1)) && $this->IsVowel($this->original, $this->current + 1)) {
418 $this->primary .= 'H';
419 $this->secondary .= 'H';
420 $this->current += 2;
421 } else {
422 $this->current += 1;
423 }
424 break;
425 case 'J':
426 // obvious spanish, 'jose', 'san jacinto'
427 if ($this->StringAt($this->original, $this->current, 4, array('JOSE')) || $this->StringAt($this->original, 0, 4, array('SAN '))) {
428 if ($this->current == 0 && substr($this->original, $this->current + 4, 1) == ' ' || $this->StringAt($this->original, 0, 4, array('SAN '))) {
429 $this->primary .= 'H';
430 $this->secondary .= 'H';
431 } else {
432 $this->primary .= 'J';
433 $this->secondary .= 'H';
434 }
435 $this->current += 1;
436 break;
437 }
438 if ($this->current == 0 && !$this->StringAt($this->original, $this->current, 4, array('JOSE'))) {
439 $this->primary .= 'J';
440 // Yankelovich/Jankelowicz
441 $this->secondary .= 'A';
442 } else {
443 // spanish pron. of .e.g. 'bajador'
444 if ($this->IsVowel($this->original, $this->current - 1) && !$this->SlavoGermanic($this->original) && (substr($this->original, $this->current + 1, 1) == 'A' || substr($this->original, $this->current + 1, 1) == 'O')) {
445 $this->primary .= 'J';
446 $this->secondary .= 'H';
447 } else {
448 if ($this->current == $this->last) {
449 $this->primary .= 'J';
450 $this->secondary .= '';
451 } else {
452 if (!$this->StringAt($this->original, ($this->current + 1), 1, array('L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z')) && !$this->StringAt($this->original, ($this->current - 1), 1, array('S', 'K', 'L'))) {
453 $this->primary .= 'J';
454 $this->secondary .= 'J';
455 }
456 }
457 }
458 }
459 if (substr($this->original, $this->current + 1, 1) == 'J') {
460 // it could happen
461 $this->current += 2;
462 } else {
463 $this->current += 1;
464 }
465 break;
466 case 'K':
467 if (substr($this->original, $this->current + 1, 1) == 'K') {
468 $this->current += 2;
469 } else {
470 $this->current += 1;
471 }
472 $this->primary .= 'K';
473 $this->secondary .= 'K';
474 break;
475 case 'L':
476 if (substr($this->original, $this->current + 1, 1) == 'L') {
477 // spanish e.g. 'cabrillo', 'gallegos'
478 if ($this->current == $this->length - 3 && $this->StringAt($this->original, $this->current - 1, 4, array('ILLO', 'ILLA', 'ALLE')) || ($this->StringAt($this->original, $this->last - 1, 2, array('AS', 'OS')) || $this->StringAt($this->original, $this->last, 1, array('A', 'O'))) && $this->StringAt($this->original, $this->current - 1, 4, array('ALLE'))) {
479 $this->primary .= 'L';
480 $this->secondary .= '';
481 $this->current += 2;
482 break;
483 }
484 $this->current += 2;
485 } else {
486 $this->current += 1;
487 }
488 $this->primary .= 'L';
489 $this->secondary .= 'L';
490 break;
491 case 'M':
492 if ($this->StringAt($this->original, $this->current - 1, 3, array('UMB')) && ($this->current + 1 == $this->last || $this->StringAt($this->original, $this->current + 2, 2, array('ER'))) || substr($this->original, $this->current + 1, 1) == 'M') {
493 $this->current += 2;
494 } else {
495 $this->current += 1;
496 }
497 $this->primary .= 'M';
498 $this->secondary .= 'M';
499 break;
500 case 'N':
501 if (substr($this->original, $this->current + 1, 1) == 'N') {
502 $this->current += 2;
503 } else {
504 $this->current += 1;
505 }
506 $this->primary .= 'N';
507 $this->secondary .= 'N';
508 break;
509 case 'Ñ':
510 $this->current += 1;
511 $this->primary .= 'N';
512 $this->secondary .= 'N';
513 break;
514 case 'P':
515 if (substr($this->original, $this->current + 1, 1) == 'H') {
516 $this->current += 2;
517 $this->primary .= 'F';
518 $this->secondary .= 'F';
519 break;
520 }
521 // also account for "campbell" and "raspberry"
522 if ($this->StringAt($this->original, $this->current + 1, 1, array('P', 'B'))) {
523 $this->current += 2;
524 } else {
525 $this->current += 1;
526 }
527 $this->primary .= 'P';
528 $this->secondary .= 'P';
529 break;
530 case 'Q':
531 if (substr($this->original, $this->current + 1, 1) == 'Q') {
532 $this->current += 2;
533 } else {
534 $this->current += 1;
535 }
536 $this->primary .= 'K';
537 $this->secondary .= 'K';
538 break;
539 case 'R':
540 // french e.g. 'rogier', but exclude 'hochmeier'
541 if ($this->current == $this->last && !$this->SlavoGermanic($this->original) && $this->StringAt($this->original, $this->current - 2, 2, array('IE')) && !$this->StringAt($this->original, ($this->current - 4), 2, array('ME', 'MA'))) {
542 $this->primary .= '';
543 $this->secondary .= 'R';
544 } else {
545 $this->primary .= 'R';
546 $this->secondary .= 'R';
547 }
548 if (substr($this->original, $this->current + 1, 1) == 'R') {
549 $this->current += 2;
550 } else {
551 $this->current += 1;
552 }
553 break;
554 case 'S':
555 // special cases 'island', 'isle', 'carlisle', 'carlysle'
556 if ($this->StringAt($this->original, $this->current - 1, 3, array('ISL', 'YSL'))) {
557 $this->current += 1;
558 break;
559 }
560 // special case 'sugar-'
561 if ($this->current == 0 && $this->StringAt($this->original, $this->current, 5, array('SUGAR'))) {
562 $this->primary .= 'X';
563 $this->secondary .= 'S';
564 $this->current += 1;
565 break;
566 }
567 if ($this->StringAt($this->original, $this->current, 2, array('SH'))) {
568 // germanic
569 if ($this->StringAt($this->original, $this->current + 1, 4, array('HEIM', 'HOEK', 'HOLM', 'HOLZ'))) {
570 $this->primary .= 'S';
571 $this->secondary .= 'S';
572 } else {
573 $this->primary .= 'X';
574 $this->secondary .= 'X';
575 }
576 $this->current += 2;
577 break;
578 }
579 // italian & armenian
580 if ($this->StringAt($this->original, $this->current, 3, array('SIO', 'SIA')) || $this->StringAt($this->original, $this->current, 4, array('SIAN'))) {
581 if (!$this->SlavoGermanic($this->original)) {
582 $this->primary .= 'S';
583 $this->secondary .= 'X';
584 } else {
585 $this->primary .= 'S';
586 $this->secondary .= 'S';
587 }
588 $this->current += 3;
589 break;
590 }
591 // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
592 // also, -sz- in slavic language altho in hungarian it is pronounced 's'
593 if ($this->current == 0 && $this->StringAt($this->original, $this->current + 1, 1, array('M', 'N', 'L', 'W')) || $this->StringAt($this->original, $this->current + 1, 1, array('Z'))) {
594 $this->primary .= 'S';
595 $this->secondary .= 'X';
596 if ($this->StringAt($this->original, $this->current + 1, 1, array('Z'))) {
597 $this->current += 2;
598 } else {
599 $this->current += 1;
600 }
601 break;
602 }
603 if ($this->StringAt($this->original, $this->current, 2, array('SC'))) {
604 // Schlesinger's rule
605 if (substr($this->original, $this->current + 2, 1) == 'H') {
606 // dutch origin, e.g. 'school', 'schooner'
607 if ($this->StringAt($this->original, $this->current + 3, 2, array('OO', 'ER', 'EN', 'UY', 'ED', 'EM'))) {
608 // 'schermerhorn', 'schenker'
609 if ($this->StringAt($this->original, $this->current + 3, 2, array('ER', 'EN'))) {
610 $this->primary .= 'X';
611 $this->secondary .= 'SK';
612 } else {
613 $this->primary .= 'SK';
614 $this->secondary .= 'SK';
615 }
616 $this->current += 3;
617 break;
618 } else {
619 if ($this->current == 0 && !$this->IsVowel($this->original, 3) && substr($this->original, $this->current + 3, 1) != 'W') {
620 $this->primary .= 'X';
621 $this->secondary .= 'S';
622 } else {
623 $this->primary .= 'X';
624 $this->secondary .= 'X';
625 }
626 $this->current += 3;
627 break;
628 }
629 }
630 if ($this->StringAt($this->original, $this->current + 2, 1, array('I', 'E', 'Y'))) {
631 $this->primary .= 'S';
632 $this->secondary .= 'S';
633 $this->current += 3;
634 break;
635 }
636 // else
637 $this->primary .= 'SK';
638 $this->secondary .= 'SK';
639 $this->current += 3;
640 break;
641 }
642 // french e.g. 'resnais', 'artois'
643 if ($this->current == $this->last && $this->StringAt($this->original, $this->current - 2, 2, array('AI', 'OI'))) {
644 $this->primary .= '';
645 $this->secondary .= 'S';
646 } else {
647 $this->primary .= 'S';
648 $this->secondary .= 'S';
649 }
650 if ($this->StringAt($this->original, $this->current + 1, 1, array('S', 'Z'))) {
651 $this->current += 2;
652 } else {
653 $this->current += 1;
654 }
655 break;
656 case 'T':
657 if ($this->StringAt($this->original, $this->current, 4, array('TION'))) {
658 $this->primary .= 'X';
659 $this->secondary .= 'X';
660 $this->current += 3;
661 break;
662 }
663 if ($this->StringAt($this->original, $this->current, 3, array('TIA', 'TCH'))) {
664 $this->primary .= 'X';
665 $this->secondary .= 'X';
666 $this->current += 3;
667 break;
668 }
669 if ($this->StringAt($this->original, $this->current, 2, array('TH')) || $this->StringAt($this->original, $this->current, 3, array('TTH'))) {
670 // special case 'thomas', 'thames' or germanic
671 if ($this->StringAt($this->original, $this->current + 2, 2, array('OM', 'AM')) || $this->StringAt($this->original, 0, 4, array('VAN ', 'VON ')) || $this->StringAt($this->original, 0, 3, array('SCH'))) {
672 $this->primary .= 'T';
673 $this->secondary .= 'T';
674 } else {
675 $this->primary .= '0';
676 $this->secondary .= 'T';
677 }
678 $this->current += 2;
679 break;
680 }
681 if ($this->StringAt($this->original, $this->current + 1, 1, array('T', 'D'))) {
682 $this->current += 2;
683 } else {
684 $this->current += 1;
685 }
686 $this->primary .= 'T';
687 $this->secondary .= 'T';
688 break;
689 case 'V':
690 if (substr($this->original, $this->current + 1, 1) == 'V') {
691 $this->current += 2;
692 } else {
693 $this->current += 1;
694 }
695 $this->primary .= 'F';
696 $this->secondary .= 'F';
697 break;
698 case 'W':
699 // can also be in middle of word
700 if ($this->StringAt($this->original, $this->current, 2, array('WR'))) {
701 $this->primary .= 'R';
702 $this->secondary .= 'R';
703 $this->current += 2;
704 break;
705 }
706 if ($this->current == 0 && ($this->IsVowel($this->original, $this->current + 1) || $this->StringAt($this->original, $this->current, 2, array('WH')))) {
707 // Wasserman should match Vasserman
708 if ($this->IsVowel($this->original, $this->current + 1)) {
709 $this->primary .= 'A';
710 $this->secondary .= 'F';
711 } else {
712 // need Uomo to match Womo
713 $this->primary .= 'A';
714 $this->secondary .= 'A';
715 }
716 }
717 // Arnow should match Arnoff
718 if ($this->current == $this->last && $this->IsVowel($this->original, $this->current - 1) || $this->StringAt($this->original, $this->current - 1, 5, array('EWSKI', 'EWSKY', 'OWSKI', 'OWSKY')) || $this->StringAt($this->original, 0, 3, array('SCH'))) {
719 $this->primary .= '';
720 $this->secondary .= 'F';
721 $this->current += 1;
722 break;
723 }
724 // polish e.g. 'filipowicz'
725 if ($this->StringAt($this->original, $this->current, 4, array('WICZ', 'WITZ'))) {
726 $this->primary .= 'TS';
727 $this->secondary .= 'FX';
728 $this->current += 4;
729 break;
730 }
731 // else skip it
732 $this->current += 1;
733 break;
734 case 'X':
735 // french e.g. breaux
736 if (!($this->current == $this->last && ($this->StringAt($this->original, $this->current - 3, 3, array('IAU', 'EAU')) || $this->StringAt($this->original, $this->current - 2, 2, array('AU', 'OU'))))) {
737 $this->primary .= 'KS';
738 $this->secondary .= 'KS';
739 }
740 if ($this->StringAt($this->original, $this->current + 1, 1, array('C', 'X'))) {
741 $this->current += 2;
742 } else {
743 $this->current += 1;
744 }
745 break;
746 case 'Z':
747 // chinese pinyin e.g. 'zhao'
748 if (substr($this->original, $this->current + 1, 1) == 'H') {
749 $this->primary .= 'J';
750 $this->secondary .= 'J';
751 $this->current += 2;
752 break;
753 } elseif ($this->StringAt($this->original, $this->current + 1, 2, array('ZO', 'ZI', 'ZA')) || $this->SlavoGermanic($this->original) && ($this->current > 0 && substr($this->original, $this->current - 1, 1) != 'T')) {
754 $this->primary .= 'S';
755 $this->secondary .= 'TS';
756 } else {
757 $this->primary .= 'S';
758 $this->secondary .= 'S';
759 }
760 if (substr($this->original, $this->current + 1, 1) == 'Z') {
761 $this->current += 2;
762 } else {
763 $this->current += 1;
764 }
765 break;
766 default:
767 $this->current += 1;
768 }
769 }
770 // end while
771 $this->primary = substr($this->primary, 0, 4);
772 $this->secondary = substr($this->secondary, 0, 4);
773 $result['primary'] = $this->primary;
774 $result['secondary'] = $this->secondary;
775 return $result;
776 }
777
778 // end of function MetaPhone
779 // Private methods
780 /**
781 * String at
782 *
783 * @param string $string
784 * @param int $start
785 * @param int $length
786 * @param array $list
787 * @return bool
788 */
789 public function StringAt($string, $start, $length, $list) {
790 if ($start < 0 || $start >= strlen($string)) {
791 return 0;
792 }
793 $listCount = count($list);
794 for ($i = 0; $i < $listCount; $i++) {
795 if ($list[$i] == substr($string, $start, $length)) {
796 return 1;
797 }
798 }
799 return 0;
800 }
801
802 /**
803 * Is vowel?
804 *
805 * @param string $string
806 * @param int $pos
807 * @return bool|int
808 */
809 public function IsVowel($string, $pos) {
810 return preg_match('/[AEIOUY]/', substr($string, $pos, 1));
811 }
812
813 /**
814 * Is slavogermanic?
815 *
816 * @param string $string
817 * @return bool|int
818 */
819 public function SlavoGermanic($string) {
820 return preg_match('/W|K|CZ|WITZ/', $string);
821 }
822
823 }