问题
I am working on an app where user's current playing song title is fetched and we look in the mysql database to see who else is playing a similar song.since the same song might be with many varied titles on everyone's phone , we need a way to effectively find as close results as possible.
The process that we are using right now gets all the songs from the table then do a foreach and compare each entry in the resultset with user's song. Here is a part of the function we have used:
$all_results = $db->select($sql);//all db entries from the songs table
foreach ( $all_results as $u ) {
$toTest = strtolower( $u['last_song'] );
$toTest = preg_replace('/[^A-Za-z0-9]/', ' ', $toTest);
$score = 0;
$score = $this->calSim( $user_last_song, $toTest, 0 ); //user last song is the current song being played by the user
if ( $score > 1 ) { //if at least there is some match then compare by another method
$score = $this->calMetaphone($user_last_song, $toTest, $score);
}
if ( $score > 5 ) {
//song matches.Push into a final array
}
}
=======================================
here are the 2 custom functions:
public function calMetaphone ( $x, $y, $pts ) {
$x = metaphone( $x );
$y = metaphone( $y );
$pts = $this->calSim( $x, $y, $pts );
return $pts;
}
public function calSim ( $x, $y, $pts ) {
similar_text($x, $y, $sim);
//echo "Similarity is :$sim<br>";
if ( $sim >= 90 ) {
$pts = $pts + 5;
} else if ( $sim >= 80 ) {
$pts = $pts + 4;
}
if ( $sim >= 70 ) {
$pts = $pts + 3;
}
if ( $sim >= 60 ) {
$pts = $pts + 2;
} else {
$pts = $pts + 0;
}
return $pts;
}
But I know this is a real bad way of doing it.If there are large number of entries in database it might take forever to compare them all 1 by 1.
Can anyone tell me the correct method that should be followed here.
Thanks Karam
回答1:
I can't take any credit for this but when I needed a similar function I found this (can't remember where though) for a metaphone:-
DROP FUNCTION `func_Double_Metaphone`//
CREATE DEFINER=`aaaa`@`%` FUNCTION `func_Double_Metaphone`(st VARCHAR(55)) RETURNS varchar(128) CHARSET utf8
NO SQL
BEGIN
DECLARE length, first, last, pos, prevpos, is_slavo_germanic SMALLINT;
DECLARE pri, sec VARCHAR(45) DEFAULT '';
DECLARE ch CHAR(1);
SET first = 3;
SET length = CHAR_LENGTH(st);
SET last = first + length -1;
SET st = CONCAT(REPEAT('-', first -1), UCASE(st), REPEAT(' ', 5)); SET is_slavo_germanic = (st LIKE '%W%' OR st LIKE '%K%' OR st LIKE '%CZ%'); SET pos = first; IF SUBSTRING(st, first, 2) IN ('GN', 'KN', 'PN', 'WR', 'PS') THEN
SET pos = pos + 1;
END IF;
IF SUBSTRING(st, first, 1) = 'X' THEN
SET pri = 'S', sec = 'S', pos = pos + 1; END IF;
WHILE pos <= last DO
SET prevpos = pos;
SET ch = SUBSTRING(st, pos, 1); CASE
WHEN ch IN ('A', 'E', 'I', 'O', 'U', 'Y') THEN
IF pos = first THEN SET pri = CONCAT(pri, 'A'), sec = CONCAT(sec, 'A'), pos = pos + 1; ELSE
SET pos = pos + 1;
END IF;
WHEN ch = 'B' THEN
IF SUBSTRING(st, pos+1, 1) = 'B' THEN
SET pri = CONCAT(pri, 'P'), sec = CONCAT(sec, 'P'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'P'), sec = CONCAT(sec, 'P'), pos = pos + 1; END IF;
WHEN ch = 'C' THEN
IF (pos > (first + 1) AND SUBSTRING(st, pos-2, 1) NOT IN ('A', 'E', 'I', 'O', 'U', 'Y') AND SUBSTRING(st, pos-1, 3) = 'ACH' AND
(SUBSTRING(st, pos+2, 1) NOT IN ('I', 'E') OR SUBSTRING(st, pos-2, 6) IN ('BACHER', 'MACHER'))) THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSEIF pos = first AND SUBSTRING(st, first, 6) = 'CAESAR' THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'), pos = pos + 2; ELSEIF SUBSTRING(st, pos, 4) = 'CHIA' THEN SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSEIF SUBSTRING(st, pos, 2) = 'CH' THEN
IF pos > first AND SUBSTRING(st, pos, 4) = 'CHAE' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'X'), pos = pos + 2; ELSEIF pos = first AND (SUBSTRING(st, pos+1, 5) IN ('HARAC', 'HARIS') OR
SUBSTRING(st, pos+1, 3) IN ('HOR', 'HYM', 'HIA', 'HEM')) AND SUBSTRING(st, first, 5) != 'CHORE' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSEIF SUBSTRING(st, first, 4) IN ('VAN ', 'VON ') OR SUBSTRING(st, first, 3) = 'SCH'
OR SUBSTRING(st, pos-2, 6) IN ('ORCHES', 'ARCHIT', 'ORCHID')
OR SUBSTRING(st, pos+2, 1) IN ('T', 'S')
OR ((SUBSTRING(st, pos-1, 1) IN ('A', 'O', 'U', 'E') OR pos = first)
AND SUBSTRING(st, pos+2, 1) IN ('L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' ')) THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
IF pos > first THEN
IF SUBSTRING(st, first, 2) = 'MC' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'K'), pos = pos + 2; END IF;
ELSE
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 2; END IF;
END IF;
ELSEIF SUBSTRING(st, pos, 2) = 'CZ' AND SUBSTRING(st, pos-2, 4) != 'WICZ' THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'X'), pos = pos + 2; ELSEIF SUBSTRING(st, pos+1, 3) = 'CIA' THEN
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 3; ELSEIF SUBSTRING(st, pos, 2) = 'CC' AND NOT (pos = (first +1) AND SUBSTRING(st, first, 1) = 'M') THEN
IF SUBSTRING(st, pos+2, 1) IN ('I', 'E', 'H') AND SUBSTRING(st, pos+2, 2) != 'HU' THEN
IF (pos = first +1 AND SUBSTRING(st, first) = 'A') OR
SUBSTRING(st, pos-1, 5) IN ('UCCEE', 'UCCES') THEN
SET pri = CONCAT(pri, 'KS'), sec = CONCAT(sec, 'KS'), pos = pos + 3; ELSE
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 3; END IF;
ELSE
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; END IF;
ELSEIF SUBSTRING(st, pos, 2) IN ('CK', 'CG', 'CQ') THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSEIF SUBSTRING(st, pos, 2) IN ('CI', 'CE', 'CY') THEN
IF SUBSTRING(st, pos, 3) IN ('CIO', 'CIE', 'CIA') THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'X'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'), pos = pos + 2; END IF;
ELSE
IF SUBSTRING(st, pos+1, 2) IN (' C', ' Q', ' G') THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 3; ELSE
IF SUBSTRING(st, pos+1, 1) IN ('C', 'K', 'Q') AND SUBSTRING(st, pos+1, 2) NOT IN ('CE', 'CI') THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 1; END IF;
END IF;
END IF;
WHEN ch = 'D' THEN
IF SUBSTRING(st, pos, 2) = 'DG' THEN
IF SUBSTRING(st, pos+2, 1) IN ('I', 'E', 'Y') THEN SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'J'), pos = pos + 3; ELSE
SET pri = CONCAT(pri, 'TK'), sec = CONCAT(sec, 'TK'), pos = pos + 2; END IF;
ELSEIF SUBSTRING(st, pos, 2) IN ('DT', 'DD') THEN
SET pri = CONCAT(pri, 'T'), sec = CONCAT(sec, 'T'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'T'), sec = CONCAT(sec, 'T'), pos = pos + 1; END IF;
WHEN ch = 'F' THEN
IF SUBSTRING(st, pos+1, 1) = 'F' THEN
SET pri = CONCAT(pri, 'F'), sec = CONCAT(sec, 'F'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'F'), sec = CONCAT(sec, 'F'), pos = pos + 1; END IF;
WHEN ch = 'G' THEN
IF SUBSTRING(st, pos+1, 1) = 'H' THEN
IF (pos > first AND SUBSTRING(st, pos-1, 1) NOT IN ('A', 'E', 'I', 'O', 'U', 'Y'))
OR ( pos = first AND SUBSTRING(st, pos+2, 1) != 'I') THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSEIF pos = first AND SUBSTRING(st, pos+2, 1) = 'I' THEN
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'J'), pos = pos + 2; ELSEIF (pos > (first + 1) AND SUBSTRING(st, pos-2, 1) IN ('B', 'H', 'D') )
OR (pos > (first + 2) AND SUBSTRING(st, pos-3, 1) IN ('B', 'H', 'D') )
OR (pos > (first + 3) AND SUBSTRING(st, pos-4, 1) IN ('B', 'H') ) THEN
SET pos = pos + 2; ELSE
IF pos > (first + 2) AND SUBSTRING(st, pos-1, 1) = 'U'
AND SUBSTRING(st, pos-3, 1) IN ('C', 'G', 'L', 'R', 'T') THEN
SET pri = CONCAT(pri, 'F'), sec = CONCAT(sec, 'F'), pos = pos + 2; ELSEIF pos > first AND SUBSTRING(st, pos-1, 1) != 'I' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
SET pos = pos + 1;
END IF;
END IF;
ELSEIF SUBSTRING(st, pos+1, 1) = 'N' THEN
IF pos = (first +1) AND SUBSTRING(st, first, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y') AND NOT is_slavo_germanic THEN
SET pri = CONCAT(pri, 'KN'), sec = CONCAT(sec, 'N'), pos = pos + 2; ELSE
IF SUBSTRING(st, pos+2, 2) != 'EY' AND SUBSTRING(st, pos+1, 1) != 'Y'
AND NOT is_slavo_germanic THEN
SET pri = CONCAT(pri, 'N'), sec = CONCAT(sec, 'KN'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'KN'), sec = CONCAT(sec, 'KN'), pos = pos + 2; END IF;
END IF;
ELSEIF SUBSTRING(st, pos+1, 2) = 'LI' AND NOT is_slavo_germanic THEN
SET pri = CONCAT(pri, 'KL'), sec = CONCAT(sec, 'L'), pos = pos + 2; ELSEIF pos = first AND (SUBSTRING(st, pos+1, 1) = 'Y'
OR SUBSTRING(st, pos+1, 2) IN ('ES', 'EP', 'EB', 'EL', 'EY', 'IB', 'IL', 'IN', 'IE', 'EI', 'ER')) THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'J'), pos = pos + 2; ELSEIF (SUBSTRING(st, pos+1, 2) = 'ER' OR SUBSTRING(st, pos+1, 1) = 'Y')
AND SUBSTRING(st, first, 6) NOT IN ('DANGER', 'RANGER', 'MANGER')
AND SUBSTRING(st, pos-1, 1) not IN ('E', 'I') AND SUBSTRING(st, pos-1, 3) NOT IN ('RGY', 'OGY') THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'J'), pos = pos + 2; ELSEIF SUBSTRING(st, pos+1, 1) IN ('E', 'I', 'Y') OR SUBSTRING(st, pos-1, 4) IN ('AGGI', 'OGGI') THEN
IF SUBSTRING(st, first, 4) IN ('VON ', 'VAN ') OR SUBSTRING(st, first, 3) = 'SCH'
OR SUBSTRING(st, pos+1, 2) = 'ET' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
IF SUBSTRING(st, pos+1, 4) = 'IER ' THEN
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'J'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'K'), pos = pos + 2; END IF;
END IF;
ELSEIF SUBSTRING(st, pos+1, 1) = 'G' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 1; END IF;
WHEN ch = 'H' THEN
IF (pos = first OR SUBSTRING(st, pos-1, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y'))
AND SUBSTRING(st, pos+1, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y') THEN
SET pri = CONCAT(pri, 'H'), sec = CONCAT(sec, 'H'), pos = pos + 2; ELSE SET pos = pos + 1; END IF;
WHEN ch = 'J' THEN
IF SUBSTRING(st, pos, 4) = 'JOSE' OR SUBSTRING(st, first, 4) = 'SAN ' THEN
IF (pos = first AND SUBSTRING(st, pos+4, 1) = ' ') OR SUBSTRING(st, first, 4) = 'SAN ' THEN
SET pri = CONCAT(pri, 'H'), sec = CONCAT(sec, 'H'); ELSE
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'H'); END IF;
ELSEIF pos = first AND SUBSTRING(st, pos, 4) != 'JOSE' THEN
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'A'); ELSE
IF SUBSTRING(st, pos-1, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y') AND NOT is_slavo_germanic
AND SUBSTRING(st, pos+1, 1) IN ('A', 'O') THEN
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'H'); ELSE
IF pos = last THEN
SET pri = CONCAT(pri, 'J'); ELSE
IF SUBSTRING(st, pos+1, 1) not IN ('L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z')
AND SUBSTRING(st, pos-1, 1) not IN ('S', 'K', 'L') THEN
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'J'); END IF;
END IF;
END IF;
END IF;
IF SUBSTRING(st, pos+1, 1) = 'J' THEN
SET pos = pos + 2;
ELSE
SET pos = pos + 1;
END IF;
WHEN ch = 'K' THEN
IF SUBSTRING(st, pos+1, 1) = 'K' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 1; END IF;
WHEN ch = 'L' THEN
IF SUBSTRING(st, pos+1, 1) = 'L' THEN
IF (pos = (last - 2) AND SUBSTRING(st, pos-1, 4) IN ('ILLO', 'ILLA', 'ALLE'))
OR ((SUBSTRING(st, last-1, 2) IN ('AS', 'OS') OR SUBSTRING(st, last) IN ('A', 'O'))
AND SUBSTRING(st, pos-1, 4) = 'ALLE') THEN
SET pri = CONCAT(pri, 'L'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'L'), sec = CONCAT(sec, 'L'), pos = pos + 2; END IF;
ELSE
SET pri = CONCAT(pri, 'L'), sec = CONCAT(sec, 'L'), pos = pos + 1; END IF;
WHEN ch = 'M' THEN
IF SUBSTRING(st, pos-1, 3) = 'UMB'
AND (pos + 1 = last OR SUBSTRING(st, pos+2, 2) = 'ER')
OR SUBSTRING(st, pos+1, 1) = 'M' THEN
SET pri = CONCAT(pri, 'M'), sec = CONCAT(sec, 'M'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'M'), sec = CONCAT(sec, 'M'), pos = pos + 1; END IF;
WHEN ch = 'N' THEN
IF SUBSTRING(st, pos+1, 1) = 'N' THEN
SET pri = CONCAT(pri, 'N'), sec = CONCAT(sec, 'N'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'N'), sec = CONCAT(sec, 'N'), pos = pos + 1; END IF;
WHEN ch = 'P' THEN
IF SUBSTRING(st, pos+1, 1) = 'H' THEN
SET pri = CONCAT(pri, 'F'), sec = CONCAT(sec, 'F'), pos = pos + 2; ELSEIF SUBSTRING(st, pos+1, 1) IN ('P', 'B') THEN SET pri = CONCAT(pri, 'P'), sec = CONCAT(sec, 'P'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'P'), sec = CONCAT(sec, 'P'), pos = pos + 1; END IF;
WHEN ch = 'Q' THEN
IF SUBSTRING(st, pos+1, 1) = 'Q' THEN
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'K'), sec = CONCAT(sec, 'K'), pos = pos + 1; END IF;
WHEN ch = 'R' THEN
IF pos = last AND not is_slavo_germanic
AND SUBSTRING(st, pos-2, 2) = 'IE' AND SUBSTRING(st, pos-4, 2) NOT IN ('ME', 'MA') THEN
SET sec = CONCAT(sec, 'R'); ELSE
SET pri = CONCAT(pri, 'R'), sec = CONCAT(sec, 'R'); END IF;
IF SUBSTRING(st, pos+1, 1) = 'R' THEN
SET pos = pos + 2;
ELSE
SET pos = pos + 1;
END IF;
WHEN ch = 'S' THEN
IF SUBSTRING(st, pos-1, 3) IN ('ISL', 'YSL') THEN
SET pos = pos + 1;
ELSEIF pos = first AND SUBSTRING(st, first, 5) = 'SUGAR' THEN
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'S'), pos = pos + 1; ELSEIF SUBSTRING(st, pos, 2) = 'SH' THEN
IF SUBSTRING(st, pos+1, 4) IN ('HEIM', 'HOEK', 'HOLM', 'HOLZ') THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 2; END IF;
ELSEIF SUBSTRING(st, pos, 3) IN ('SIO', 'SIA') OR SUBSTRING(st, pos, 4) = 'SIAN' THEN
IF NOT is_slavo_germanic THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'X'), pos = pos + 3; ELSE
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'), pos = pos + 3; END IF;
ELSEIF (pos = first AND SUBSTRING(st, pos+1, 1) IN ('M', 'N', 'L', 'W')) OR SUBSTRING(st, pos+1, 1) = 'Z' THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'X'); IF SUBSTRING(st, pos+1, 1) = 'Z' THEN
SET pos = pos + 2;
ELSE
SET pos = pos + 1;
END IF;
ELSEIF SUBSTRING(st, pos, 2) = 'SC' THEN
IF SUBSTRING(st, pos+2, 1) = 'H' THEN
IF SUBSTRING(st, pos+3, 2) IN ('OO', 'ER', 'EN', 'UY', 'ED', 'EM') THEN
IF SUBSTRING(st, pos+3, 2) IN ('ER', 'EN') THEN
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'SK'), pos = pos + 3; ELSE
SET pri = CONCAT(pri, 'SK'), sec = CONCAT(sec, 'SK'), pos = pos + 3; END IF;
ELSE
IF pos = first AND SUBSTRING(st, first+3, 1) not IN ('A', 'E', 'I', 'O', 'U', 'Y') AND SUBSTRING(st, first+3, 1) != 'W' THEN
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'S'), pos = pos + 3; ELSE
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 3; END IF;
END IF;
ELSEIF SUBSTRING(st, pos+2, 1) IN ('I', 'E', 'Y') THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'), pos = pos + 3; ELSE
SET pri = CONCAT(pri, 'SK'), sec = CONCAT(sec, 'SK'), pos = pos + 3; END IF;
ELSEIF pos = last AND SUBSTRING(st, pos-2, 2) IN ('AI', 'OI') THEN
SET sec = CONCAT(sec, 'S'), pos = pos + 1; ELSE
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'); IF SUBSTRING(st, pos+1, 1) IN ('S', 'Z') THEN
SET pos = pos + 2;
ELSE
SET pos = pos + 1;
END IF;
END IF;
WHEN ch = 'T' THEN
IF SUBSTRING(st, pos, 4) = 'TION' THEN
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 3; ELSEIF SUBSTRING(st, pos, 3) IN ('TIA', 'TCH') THEN
SET pri = CONCAT(pri, 'X'), sec = CONCAT(sec, 'X'), pos = pos + 3; ELSEIF SUBSTRING(st, pos, 2) = 'TH' OR SUBSTRING(st, pos, 3) = 'TTH' THEN
IF SUBSTRING(st, pos+2, 2) IN ('OM', 'AM') OR SUBSTRING(st, first, 4) IN ('VON ', 'VAN ')
OR SUBSTRING(st, first, 3) = 'SCH' THEN
SET pri = CONCAT(pri, 'T'), sec = CONCAT(sec, 'T'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, '0'), sec = CONCAT(sec, 'T'), pos = pos + 2; END IF;
ELSEIF SUBSTRING(st, pos+1, 1) IN ('T', 'D') THEN
SET pri = CONCAT(pri, 'T'), sec = CONCAT(sec, 'T'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'T'), sec = CONCAT(sec, 'T'), pos = pos + 1; END IF;
WHEN ch = 'V' THEN
IF SUBSTRING(st, pos+1, 1) = 'V' THEN
SET pri = CONCAT(pri, 'F'), sec = CONCAT(sec, 'F'), pos = pos + 2; ELSE
SET pri = CONCAT(pri, 'F'), sec = CONCAT(sec, 'F'), pos = pos + 1; END IF;
WHEN ch = 'W' THEN
IF SUBSTRING(st, pos, 2) = 'WR' THEN
SET pri = CONCAT(pri, 'R'), sec = CONCAT(sec, 'R'), pos = pos + 2; ELSEIF pos = first AND (SUBSTRING(st, pos+1, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y')
OR SUBSTRING(st, pos, 2) = 'WH') THEN
IF SUBSTRING(st, pos+1, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y') THEN
SET pri = CONCAT(pri, 'A'), sec = CONCAT(sec, 'F'), pos = pos + 1; ELSE
SET pri = CONCAT(pri, 'A'), sec = CONCAT(sec, 'A'), pos = pos + 1; END IF;
ELSEIF (pos = last AND SUBSTRING(st, pos-1, 1) IN ('A', 'E', 'I', 'O', 'U', 'Y'))
OR SUBSTRING(st, pos-1, 5) IN ('EWSKI', 'EWSKY', 'OWSKI', 'OWSKY')
OR SUBSTRING(st, first, 3) = 'SCH' THEN
SET sec = CONCAT(sec, 'F'), pos = pos + 1; ELSEIF SUBSTRING(st, pos, 4) IN ('WICZ', 'WITZ') THEN
SET pri = CONCAT(pri, 'TS'), sec = CONCAT(sec, 'FX'), pos = pos + 4; ELSE SET pos = pos + 1;
END IF;
WHEN ch = 'X' THEN
IF not(pos = last AND (SUBSTRING(st, pos-3, 3) IN ('IAU', 'EAU')
OR SUBSTRING(st, pos-2, 2) IN ('AU', 'OU'))) THEN
SET pri = CONCAT(pri, 'KS'), sec = CONCAT(sec, 'KS'); END IF;
IF SUBSTRING(st, pos+1, 1) IN ('C', 'X') THEN
SET pos = pos + 2;
ELSE
SET pos = pos + 1;
END IF;
WHEN ch = 'Z' THEN
IF SUBSTRING(st, pos+1, 1) = 'H' THEN
SET pri = CONCAT(pri, 'J'), sec = CONCAT(sec, 'J'), pos = pos + 1; ELSEIF SUBSTRING(st, pos+1, 3) IN ('ZO', 'ZI', 'ZA')
OR (is_slavo_germanic AND pos > first AND SUBSTRING(st, pos-1, 1) != 'T') THEN
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'TS'); ELSE
SET pri = CONCAT(pri, 'S'), sec = CONCAT(sec, 'S'); END IF;
IF SUBSTRING(st, pos+1, 1) = 'Z' THEN
SET pos = pos + 2;
ELSE
SET pos = pos + 1;
END IF;
ELSE
SET pos = pos + 1; END CASE;
IF pos = prevpos THEN
SET pos = pos +1;
SET pri = CONCAT(pri,'<didnt incr>'); END IF;
END WHILE;
IF pri != sec THEN
SET pri = CONCAT(pri, ';', sec);
END IF;
RETURN (pri);
END
I also needed a levenshtein function and managed to sort out one of those, but performance was such (when comparing values from many rows) that it was far faster to read the rows and use the php built in levenshtein function.
来源:https://stackoverflow.com/questions/19374514/mysql-equivalent-of-php-metaphone-and-soundex