You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by to...@apache.org on 2003/10/12 21:56:13 UTC

cvs commit: jakarta-commons/codec/src/java/org/apache/commons/codec/language DoubleMetaphone.java

tobrien     2003/10/12 12:56:13

  Modified:    codec/src/java/org/apache/commons/codec/language
                        DoubleMetaphone.java
  Log:
  Removed all checkstyle violations from Base64, and the
  language encoders.  Most of the checkstyle violations fixed
  were violations dealing with the placement of operators
  on a newline instead of on the end of the previous line.
  
  In addition to the checkstyle fixes, DoubleMetaphone now
  handles two cases C with a Cedilla and N with a tilde (ene).
  
  Revision  Changes    Path
  1.12      +37 -84    jakarta-commons/codec/src/java/org/apache/commons/codec/language/DoubleMetaphone.java
  
  Index: DoubleMetaphone.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/language/DoubleMetaphone.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- DoubleMetaphone.java	5 Oct 2003 21:45:48 -0000	1.11
  +++ DoubleMetaphone.java	12 Oct 2003 19:56:13 -0000	1.12
  @@ -146,16 +146,13 @@
                   break;
               case 'B':
                   result.append('P');
  -                index = charAt(value, index + 1) == 'B' 
  -                    ? index + 2 
  -                    : index + 1;
  -                break;
  -                // FIXME: Removed support for this character until 
  -                // Unicode code is found.
  -                /*            case 'Ç':
  +                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
  +                break;
  +            case '\u00C7':
  +                // A C with a Cedilla
                   result.append('S');
                   index++;
  -                break; */
  +                break; 
               case 'C':
                   index = handleC(value, result, index);
                   break;
  @@ -164,9 +161,7 @@
                   break;
               case 'F':
                   result.append('F');
  -                index = charAt(value, index + 1) == 'F' 
  -                    ? index + 2 
  -                    : index + 1;
  +                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
                   break;
               case 'G':
                   index = handleG(value, result, index, slavoGermanic);
  @@ -179,9 +174,7 @@
                   break;
               case 'K':
                   result.append('K');
  -                index = charAt(value, index + 1) == 'K' 
  -                    ? index + 2 
  -                    : index + 1;
  +                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
                   break;
               case 'L':
                   index = handleL(value, result, index);
  @@ -192,25 +185,19 @@
                   break;
               case 'N':
                   result.append('N');
  -                index = charAt(value, index + 1) == 'N' 
  -                    ? index + 2 
  -                    : index + 1;
  -                break;
  -                // FIXME: Removed support for this character until we
  -                // find the Unicode code
  -                /*
  -            case 'Ñ':
  +                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
  +                break;
  +            case '\u00D1':
  +                // N with a tilde (spanish ene)
                   result.append('N');
                   index++;
  -                break; */
  +                break;
               case 'P':
                   index = handleP(value, result, index);
                   break;
               case 'Q':
                   result.append('K');
  -                index = charAt(value, index + 1) == 'Q' 
  -                    ? index + 2 
  -                    : index + 1;
  +                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
                   break;
               case 'R':
                   index = handleR(value, result, index, slavoGermanic);
  @@ -223,9 +210,7 @@
                   break;
               case 'V':
                   result.append('F');
  -                index = charAt(value, index + 1) == 'V' 
  -                    ? index + 2 
  -                    : index + 1;
  +                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
                   break;
               case 'W':
                   index = handleW(value, result, index);
  @@ -255,9 +240,7 @@
       public Object encode(Object obj) throws EncoderException {
   
           if (!(obj instanceof String)) {
  -            throw new EncoderException("Parameter supplied to Metaphone " 
  -                                       + "encode is not of type " 
  -                                       + "java.lang.String"); 
  +            throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String"); 
           } else {
               return doubleMetaphone((String) obj);
           }
  @@ -482,17 +465,12 @@
           } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
               result.append("KL", "L");
               index += 2;
  -        } else if (index == 0 
  -                   && (charAt(value, index + 1) == 'Y' 
  -                       || contains(value, 
  -                                   index + 1, 
  -                                   2, 
  -                                   ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
  +        } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
               //-- -ges-, -gep-, -gel-, -gie- at beginning --//
               result.append('K', 'J');
               index += 2;
  -        } else if ((contains(value, index + 1, 2, "ER") 
  -                    || charAt(value, index + 1) == 'Y') &&
  +        } else if ((contains(value, index + 1, 2, "ER") || 
  +                    charAt(value, index + 1) == 'Y') &&
                      !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
                      !contains(value, index - 1, 1, "E", "I") && 
                      !contains(value, index - 1, 3, "RGY", "OGY")) {
  @@ -502,9 +480,7 @@
           } else if (contains(value, index + 1, 1, "E", "I", "Y") || 
                      contains(value, index - 1, 4, "AGGI", "OGGI")) {
               //-- Italian "biaggi" --//
  -            if ((contains(value, 0 ,4, "VAN ", "VON ") 
  -                 || contains(value, 0, 3, "SCH")) ||
  -                contains(value, index + 1, 2, "ET")) {
  +            if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
                   //-- obvious germanic --//
                   result.append('K');
               } else if (contains(value, index + 1, 4, "IER")) {
  @@ -539,10 +515,8 @@
                   result.append('K');
               }
               index += 2;
  -        } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) 
  -                   ||
  -                   (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) 
  -                   ||
  +        } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
  +                   (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
                      (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
               //-- Parker's rule (with some further refinements) - "hugh"
               index += 2;
  @@ -582,8 +556,7 @@
        */
       private int handleJ(String value, DoubleMetaphoneResult result, int index, 
                           boolean slavoGermanic) {
  -        if (contains(value, index, 4, "JOSE") 
  -            || contains(value, 0, 4, "SAN ")) {
  +        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
                   //-- obvious Spanish, "Jose", "San Jacinto" --//
                   if ((index == 0 && (charAt(value, index + 4) == ' ') || 
                        value.length() == 4) || contains(value, 0, 4, "SAN ")) {
  @@ -595,15 +568,12 @@
               } else {
                   if (index == 0 && !contains(value, index, 4, "JOSE")) {
                       result.append('J', 'A');
  -                } else if (isVowel(charAt(value, index - 1)) 
  -                           && !slavoGermanic 
  -                           &&(charAt(value, index + 1) == 'A' || 
  -                              charAt(value, index + 1) == 'O')) {
  +                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && 
  +                              (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
                       result.append('J', 'H');
                   } else if (index == value.length() - 1) {
                       result.append('J', ' ');
  -                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) 
  -                           && !contains(value, index - 1, 1, "S", "K", "L")) {
  +                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
                       result.append('J');
                   }
   
  @@ -645,9 +615,7 @@
               index += 2;
           } else {
               result.append('P');
  -            index = contains(value, index + 1, 1, "P", "B") 
  -                ? index + 2 
  -                : index + 1;
  +            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
           }
           return index;
       }
  @@ -692,8 +660,7 @@
                   result.append('X');
               }
               index += 2;
  -        } else if (contains(value, index, 3, "SIO", "SIA") 
  -                   || contains(value, index, 4, "SIAN")) {
  +        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
               //-- Italian and Armenian --//
               if (slavoGermanic) {
                   result.append('S');
  @@ -701,9 +668,7 @@
                   result.append('S', 'X');
               }
               index += 3;
  -        } else if ((index == 0 
  -                    && contains(value, index + 1, 1, "M", "N", "L", "W")) 
  -                   || contains(value, index + 1, 1, "Z")) {
  +        } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
               //-- german & anglicisations, e.g. "smith" match "schmidt" //
               // "snider" match "schneider" --//
               //-- also, -sz- in slavic language altho in hungarian it //
  @@ -720,9 +685,7 @@
               } else {
                   result.append('S');
               }
  -            index = contains(value, index + 1, 1, "S", "Z") 
  -                ? index + 2 
  -                : index + 1;
  +            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
           }
           return index;
       }
  @@ -745,8 +708,7 @@
                       result.append("SK");
                   }
               } else {
  -                if (index == 0 && !isVowel(charAt(value, 3)) 
  -                    && charAt(value, 3) != 'W') {
  +                if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
                       result.append('X', 'S');
                   } else {
                       result.append('X');
  @@ -785,9 +747,7 @@
               index += 2;
           } else {
               result.append('T');
  -            index = contains(value, index + 1, 1, "T", "D") 
  -                ? index + 2 
  -                : index + 1;
  +            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
           }
           return index;
       }
  @@ -813,8 +773,7 @@
                       result.append('A');
                   }
                   index++;
  -            } else if ((index == value.length() - 1 
  -                        && isVowel(charAt(value, index - 1))) ||
  +            } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
                          contains(value, index - 1, 
                                   5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
                          contains(value, 0, 3, "SCH")) {
  @@ -848,9 +807,7 @@
                   //-- French e.g. breaux --//
                   result.append("KS");
               }
  -            index = contains(value, index + 1, 1, "C", "X") 
  -                ? index + 2 
  -                : index + 1;
  +            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
           }
           return index;
       }
  @@ -865,9 +822,7 @@
               result.append('J');
               index += 2;
           } else {
  -            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") 
  -                || (slavoGermanic 
  -                    && (index > 0 && charAt(value, index - 1) != 'T'))) {
  +            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
                   result.append("S", "TS");
               } else {
                   result.append('S');
  @@ -926,10 +881,8 @@
                                                                      3, "SCH")) ||
                   contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
                   contains(value, index + 2, 1, "T", "S") ||
  -                ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 
  -                  0) &&
  -                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index 
  -                  + 1 == value.length() - 1)));
  +                ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
  +                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
       }
       
       /**
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org