You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@commons.apache.org by Voyer Henry <He...@loto-quebec.com> on 2004/10/18 19:44:41 UTC
RE : RE : RE: RE : Soundex Help
package org.apache.commons.codec;
import org.apache.commons.lang.StringUtils;
public class FrenchPhonex implements StringEncoder {
private static char[] VOYELLES = "aeiou".toCharArray();
private static char[] VOYELLES_SONORES = "aeiouy1234".toCharArray();
private static char[] TERMINAISONS = "txsz".toCharArray();
private static char[] CODES_NUMERIQUES = "".toCharArray();
public String encode(String arg0) throws EncoderException {
// Cas trivial
if (arg0 == null || arg0.length() == 0)
return "0.0";
String result = arg0.toLowerCase().trim();
// 1 - Remplacer les y par i
result = result.replace('y', 'i');
// 2 - Remplacer les lettres accentuées
result = normalisationDeCaractere(result);
// 3 - Remplacer les h muets
result = remplacerHMuets(result);
// 4 - Remplacer les ph par f
result = StringUtils.replace(result, "ph", "f");
// 5 - Remplacer les groupes de lettres suivantes
result = StringUtils.replace(result, "gan", "kan");
result = StringUtils.replace(result, "gam", "kam");
result = StringUtils.replace(result, "gain", "kain");
result = StringUtils.replace(result, "gaim", "kaim");
// 6 - Remplacer les son AI
result = remplacerSonAI(result);
// 7 - Remplacer le groupe de 3 lettres du son 'o', 'oua' et
'ein'
result = StringUtils.replace(result, "eau", "o");
result = StringUtils.replace(result, "oua", "2");
result = StringUtils.replace(result, "ein", "4");
result = StringUtils.replace(result, "ain", "4");
result = StringUtils.replace(result, "eim", "4");
result = StringUtils.replace(result, "aim", "4");
// 8 - Remplacer le son é
result = StringUtils.replace(result, "ai", "y");
result = StringUtils.replace(result, "ei", "y");
result = StringUtils.replace(result, "er", "yr");
result = StringUtils.replace(result, "ess", "yss");
result = StringUtils.replace(result, "et", "yt");
result = StringUtils.replace(result, "ez", "yz");
// 9 - Remplacer les groupes de 2 lettres du son "an"
// et "in" sauf si suivi par voyelle
result = SRSaufSuiviVoyelleSonore(result, "an", "1");
result = SRSaufSuiviVoyelleSonore(result, "am", "1");
result = SRSaufSuiviVoyelleSonore(result, "en", "1");
result = SRSaufSuiviVoyelleSonore(result, "em", "1");
result = SRSaufSuiviVoyelleSonore(result, "in", "4");
// 10 - Remplacer le son sch
result = StringUtils.replace(result, "sch", "5");
// 11 - Remplacer le s si precede et suivi d'une voyelle
sonore
result = SRSaufSuiviPrecedeVoyelleSonore(result, "s", "z");
// 12 - Remplacement des groupes de lettres suivantes
result = StringUtils.replace(result, "oe", "e");
result = StringUtils.replace(result, "ou", "e");
result = StringUtils.replace(result, "au", "o");
result = StringUtils.replace(result, "oi", "2");
result = StringUtils.replace(result, "oy", "2");
result = StringUtils.replace(result, "ou", "3");
result = StringUtils.replace(result, "ch", "5");
result = StringUtils.replace(result, "sh", "5");
result = StringUtils.replace(result, "ss", "s");
result = StringUtils.replace(result, "sc", "s");
// 13 - Remplacement du c par s s'il est suivi d'un e ou i
result = StringUtils.replace(result, "ce", "se");
result = StringUtils.replace(result, "ci", "si");
// 14 - Remplacement divers
result = StringUtils.replace(result, "c", "k");
result = StringUtils.replace(result, "q", "k");
result = StringUtils.replace(result, "qu", "k");
result = StringUtils.replace(result, "ga", "ka");
result = StringUtils.replace(result, "go", "ko");
result = StringUtils.replace(result, "gu", "ku");
result = StringUtils.replace(result, "gy", "ky");
result = StringUtils.replace(result, "g2", "k2");
result = StringUtils.replace(result, "g1", "k1");
result = StringUtils.replace(result, "g3", "k3");
result = StringUtils.replace(result, "a", "o");
result = StringUtils.replace(result, "d", "t");
result = StringUtils.replace(result, "p", "t");
result = StringUtils.replace(result, "j", "g");
result = StringUtils.replace(result, "b", "f");
result = StringUtils.replace(result, "v", "f");
result = StringUtils.replace(result, "m", "n");
// 15 - Suppression des lettres dupliques
result = suppressionDupliques(result);
// 16 - Suppression des terminaisons
result = suppressionTerminaisons(result);
// 17 - Transformations en codes
result = codesNumeriques(result);
// 18 - Conversion au code
result = conversionCode(result);
return result;
}
public Object encode(Object arg0) throws EncoderException {
Object result;
if (!(arg0 instanceof java.lang.String)) {
throw new EncoderException("Parameter supplied to
Soundex encode is not of type java.lang.String");
} else {
result = encode((String) arg0);
}
return result;
}
/**
* Elimine les accents, les non characteres
* @param result
* @return
*/
private String normalisationDeCaractere(String str) {
int count = 0;
int len = str.length();
StringBuffer buffer = new StringBuffer();
buffer.ensureCapacity(len);
for (int i = 0; i < len; i++) {
char character = str.charAt(i);
switch (character) {
case ('à') :
case ('ä') :
case ('â') :
character = 'a';
break;
case ('ç') :
character = 's';
break;
case ('ë') :
character = 'e';
break;
case ('ï') :
case ('î') :
character = 'i';
break;
case ('ô') :
case ('ö') :
character = 'o';
break;
case ('ù') :
case ('û') :
case ('ü') :
character = 'u';
break;
case ('é') :
case ('ê') :
character = 'y';
break;
}
if (Character.isLetter(character)) {
buffer.append(character);
}
}
return buffer.toString();
}
/**
* Remplace tout les h sauf ceux qui font partie de la combinaison
* ph, ch, sh
* @param result
* @return
*/
private String remplacerHMuets(String str) {
int len = str.length();
int count = 0;
StringBuffer buffer = new StringBuffer();
buffer.ensureCapacity(len);
for (int i = 0; i < len; i++) {
char character = str.charAt(i);
if (character == 'h' && i != 0) {
char precedent = str.charAt(i - 1);
if (precedent == 'c' || precedent == 'p' ||
precedent == 's') {
buffer.append(character);
}
} else if (character != 'h') {
buffer.append(character);
}
}
return buffer.toString();
}
/**
* Remplace le son AI
* @param result
* @return
*/
private String remplacerSonAI(String result) {
int len = VOYELLES.length;
for (int i = 0; i < len; i++) {
result =
StringUtils.replace(
result,
"ain" + VOYELLES[i],
"yn" + VOYELLES[i]);
result =
StringUtils.replace(
result,
"ein" + VOYELLES[i],
"yn" + VOYELLES[i]);
result =
StringUtils.replace(
result,
"aim" + VOYELLES[i],
"yn" + VOYELLES[i]);
result =
StringUtils.replace(
result,
"eim" + VOYELLES[i],
"yn" + VOYELLES[i]);
}
return result;
}
/**
* @param charPrecedent
* @return
*/
private boolean isInVoyelleSonore(char character) {
for (int i = 0; i < VOYELLES_SONORES.length; i++) {
if (character == VOYELLES_SONORES[i])
return true;
}
return false;
}
/**
* @param charPrecedent
* @return
*/
private boolean isInTerminaisons(char character) {
for (int i = 0; i < TERMINAISONS.length; i++) {
if (character == TERMINAISONS[i])
return true;
}
return false;
}
/**
* @param result
* @param string
* @param string2
* @return
*/
private String SRSaufSuiviVoyelleSonore(
String result,
String toReplace,
String withReplace) {
int sizeToReplace = toReplace.length();
int sizeWithReplace = withReplace.length();
int pos = result.indexOf(toReplace);
while (pos != -1)
{
int posNextChar = sizeToReplace + pos;
int resultLength = result.length();
if (posNextChar < resultLength)
{
char nextChar = result.charAt(posNextChar);
if (!isInVoyelleSonore(nextChar))
{
result = result.substring(0,pos)
+ withReplace
+
result.substring(posNextChar,resultLength);
pos = result.indexOf(toReplace, pos
+ sizeWithReplace);
}
else
{
pos = result.indexOf(toReplace, pos
+ sizeToReplace);
}
}
else // fin du texte
{
result = result.substring(0,pos)
+ withReplace
+
result.substring(posNextChar,resultLength);
pos = result.indexOf(toReplace, pos +
sizeWithReplace);
}
}
return result;
}
/**
* @param result
* @param string
* @param string2
* @return
*/
private String SRSaufSuiviPrecedeVoyelleSonore(
String result,
String string,
String string2) {
// TODO Auto-generated method stub
return result;
}
/**
* @param result
* @return
*/
private String suppressionDupliques(String result) {
// TODO Auto-generated method stub
return result;
}
/**
* @param result
* @return
*/
private String suppressionTerminaisons(String result) {
// TODO Auto-generated method stub
return result;
}
/**
* @param result
* @return
*/
private String conversionCode(String result) {
// TODO Auto-generated method stub
return result;
}
/**
* @param result
* @return
*/
private String codesNumeriques(String result) {
// TODO Auto-generated method stub
return result;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-user-help@jakarta.apache.org