You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@commons.apache.org by Voyer Henry <He...@loto-quebec.com> on 2004/10/18 19:44:41 UTC

RE : RE : RE: RE : Soundex Help

package org.apache.commons.codec;

import org.apache.commons.lang.StringUtils;

public class FrenchPhonex implements StringEncoder {

	private static char[] VOYELLES = "aeiou".toCharArray();
	private static char[] VOYELLES_SONORES = "aeiouy1234".toCharArray();
	private static char[] TERMINAISONS = "txsz".toCharArray();
	private static char[] CODES_NUMERIQUES = "".toCharArray();

	public String encode(String arg0) throws EncoderException {

		// Cas trivial
		if (arg0 == null || arg0.length() == 0)
			return "0.0";

		String result = arg0.toLowerCase().trim();

		// 1 - Remplacer les y par i
		result = result.replace('y', 'i');

		// 2 - Remplacer les lettres accentuées
		result = normalisationDeCaractere(result);

		// 3 - Remplacer les h muets
		result = remplacerHMuets(result);

		// 4 - Remplacer les ph par f
		result = StringUtils.replace(result, "ph", "f");

		// 5 - Remplacer les groupes de lettres suivantes
		result = StringUtils.replace(result, "gan", "kan");
		result = StringUtils.replace(result, "gam", "kam");
		result = StringUtils.replace(result, "gain", "kain");
		result = StringUtils.replace(result, "gaim", "kaim");

		// 6 - Remplacer les son AI
		result = remplacerSonAI(result);

		// 7 - Remplacer le groupe de 3 lettres du son 'o', 'oua' et
'ein'
		result = StringUtils.replace(result, "eau", "o");
		result = StringUtils.replace(result, "oua", "2");
		result = StringUtils.replace(result, "ein", "4");
		result = StringUtils.replace(result, "ain", "4");
		result = StringUtils.replace(result, "eim", "4");
		result = StringUtils.replace(result, "aim", "4");

		// 8 - Remplacer le son é
		result = StringUtils.replace(result, "ai", "y");
		result = StringUtils.replace(result, "ei", "y");
		result = StringUtils.replace(result, "er", "yr");
		result = StringUtils.replace(result, "ess", "yss");
		result = StringUtils.replace(result, "et", "yt");
		result = StringUtils.replace(result, "ez", "yz");

		// 9 - Remplacer les groupes de 2 lettres du son "an"
		// et "in" sauf si suivi par voyelle		
		result = SRSaufSuiviVoyelleSonore(result, "an", "1");
		result = SRSaufSuiviVoyelleSonore(result, "am", "1");
		result = SRSaufSuiviVoyelleSonore(result, "en", "1");
		result = SRSaufSuiviVoyelleSonore(result, "em", "1");
		result = SRSaufSuiviVoyelleSonore(result, "in", "4");

		// 10 - Remplacer le son sch
		result = StringUtils.replace(result, "sch", "5");

		// 11 - Remplacer le s si precede et suivi d'une voyelle
sonore
		result = SRSaufSuiviPrecedeVoyelleSonore(result, "s", "z");

		// 12 - Remplacement des groupes de lettres suivantes
		result = StringUtils.replace(result, "oe", "e");
		result = StringUtils.replace(result, "ou", "e");
		result = StringUtils.replace(result, "au", "o");
		result = StringUtils.replace(result, "oi", "2");
		result = StringUtils.replace(result, "oy", "2");
		result = StringUtils.replace(result, "ou", "3");
		result = StringUtils.replace(result, "ch", "5");
		result = StringUtils.replace(result, "sh", "5");
		result = StringUtils.replace(result, "ss", "s");
		result = StringUtils.replace(result, "sc", "s");

		// 13 - Remplacement du c par s s'il est suivi d'un e ou i
		result = StringUtils.replace(result, "ce", "se");
		result = StringUtils.replace(result, "ci", "si");

		// 14 - Remplacement divers
		result = StringUtils.replace(result, "c", "k");
		result = StringUtils.replace(result, "q", "k");
		result = StringUtils.replace(result, "qu", "k");

		result = StringUtils.replace(result, "ga", "ka");
		result = StringUtils.replace(result, "go", "ko");
		result = StringUtils.replace(result, "gu", "ku");
		result = StringUtils.replace(result, "gy", "ky");
		result = StringUtils.replace(result, "g2", "k2");
		result = StringUtils.replace(result, "g1", "k1");
		result = StringUtils.replace(result, "g3", "k3");

		result = StringUtils.replace(result, "a", "o");
		result = StringUtils.replace(result, "d", "t");
		result = StringUtils.replace(result, "p", "t");
		result = StringUtils.replace(result, "j", "g");
		result = StringUtils.replace(result, "b", "f");
		result = StringUtils.replace(result, "v", "f");
		result = StringUtils.replace(result, "m", "n");

		// 15 - Suppression des lettres dupliques
		result = suppressionDupliques(result);

		// 16 - Suppression des terminaisons
		result = suppressionTerminaisons(result);

		// 17 - Transformations en codes
		result = codesNumeriques(result);

		// 18 - Conversion au code
		result = conversionCode(result);

		return result;
	}

	public Object encode(Object arg0) throws EncoderException {

		Object result;

		if (!(arg0 instanceof java.lang.String)) {
			throw new EncoderException("Parameter supplied to
Soundex encode is not of type java.lang.String");
		} else {
			result = encode((String) arg0);
		}

		return result;
	}

	/**
	 * Elimine les accents, les non characteres 
	 * @param result
	 * @return
	 */
	private String normalisationDeCaractere(String str) {

		int count = 0;
		int len = str.length();

		StringBuffer buffer = new StringBuffer();
		buffer.ensureCapacity(len);

		for (int i = 0; i < len; i++) {

			char character = str.charAt(i);

			switch (character) {
				case ('à') :
				case ('ä') :
				case ('â') :
					character = 'a';
					break;
				case ('ç') :
					character = 's';
					break;
				case ('ë') :
					character = 'e';
					break;
				case ('ï') :
				case ('î') :
					character = 'i';
					break;
				case ('ô') :
				case ('ö') :
					character = 'o';
					break;
				case ('ù') :
				case ('û') :
				case ('ü') :
					character = 'u';
					break;
				case ('é') :
				case ('ê') :
					character = 'y';
					break;
			}

			if (Character.isLetter(character)) {
				buffer.append(character);
			}
		}

		return buffer.toString();
	}

	/**
	 * Remplace tout les h sauf ceux qui font partie de la combinaison
	 * ph, ch, sh
	 * @param result
	 * @return
	 */
	private String remplacerHMuets(String str) {

		int len = str.length();
		int count = 0;

		StringBuffer buffer = new StringBuffer();
		buffer.ensureCapacity(len);

		for (int i = 0; i < len; i++) {

			char character = str.charAt(i);

			if (character == 'h' && i != 0) {
				char precedent = str.charAt(i - 1);

				if (precedent == 'c' || precedent == 'p' ||
precedent == 's') {
					buffer.append(character);
				}
			} else if (character != 'h') {
				buffer.append(character);
			}
		}
		return buffer.toString();
	}

	/**
	 * Remplace le son AI
	 * @param result
	 * @return
	 */
	private String remplacerSonAI(String result) {
		int len = VOYELLES.length;

		for (int i = 0; i < len; i++) {
			result =
				StringUtils.replace(
					result,
					"ain" + VOYELLES[i],
					"yn" + VOYELLES[i]);
			result =
				StringUtils.replace(
					result,
					"ein" + VOYELLES[i],
					"yn" + VOYELLES[i]);
			result =
				StringUtils.replace(
					result,
					"aim" + VOYELLES[i],
					"yn" + VOYELLES[i]);
			result =
				StringUtils.replace(
					result,
					"eim" + VOYELLES[i],
					"yn" + VOYELLES[i]);
		}
		return result;
	}



	/**
	 * @param charPrecedent
	 * @return
	 */
	private boolean isInVoyelleSonore(char character) {

		for (int i = 0; i < VOYELLES_SONORES.length; i++) {
			if (character == VOYELLES_SONORES[i])
				return true;
		}
		return false;
	}

	/**
	 * @param charPrecedent
	 * @return
	 */
	private boolean isInTerminaisons(char character) {

		for (int i = 0; i < TERMINAISONS.length; i++) {
			if (character == TERMINAISONS[i])
				return true;
		}
		return false;
	}
	
	/**
	 * @param result
	 * @param string
	 * @param string2
	 * @return
	 */
	private String SRSaufSuiviVoyelleSonore(
		String result,
		String toReplace,
		String withReplace) {

        int sizeToReplace = toReplace.length();
        int sizeWithReplace = withReplace.length();
        
        int pos = result.indexOf(toReplace);
        
		while (pos != -1)
		{						
			int posNextChar = sizeToReplace + pos;
			int resultLength = result.length();
			
			if (posNextChar < resultLength)
			{

				char nextChar = result.charAt(posNextChar);
				
				if (!isInVoyelleSonore(nextChar))

				{
				    result = result.substring(0,pos)
				             + 	withReplace
				             +
result.substring(posNextChar,resultLength);
				             
					pos = result.indexOf(toReplace, pos
+ sizeWithReplace);
				}
				else
				{
					pos = result.indexOf(toReplace, pos
+ sizeToReplace);
				}
			}
			else // fin du texte
			{				
				result = result.substring(0,pos)
						   + withReplace
						   +
result.substring(posNextChar,resultLength);
						   
				pos = result.indexOf(toReplace, pos +
sizeWithReplace);		   
			}			
		}
		return result;
	}
	
	/**
	 * @param result
	 * @param string
	 * @param string2
	 * @return
	 */
	private String SRSaufSuiviPrecedeVoyelleSonore(
		String result,
		String string,
		String string2) {
		// TODO Auto-generated method stub
		return result;
	}
	/**
	 * @param result
	 * @return
	 */
	private String suppressionDupliques(String result) {
		// TODO Auto-generated method stub
		return result;
	}

	/**
	 * @param result
	 * @return
	 */
	private String suppressionTerminaisons(String result) {
		// TODO Auto-generated method stub
		return result;
	}

	/**
	 * @param result
	 * @return
	 */
	private String conversionCode(String result) {
		// TODO Auto-generated method stub
		return result;
	}

	/**
	 * @param result
	 * @return
	 */
	private String codesNumeriques(String result) {
		// TODO Auto-generated method stub
		return result;
	}
}

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-user-help@jakarta.apache.org