You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2001/06/20 17:18:50 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/readers MIME2Java.java

neilg       01/06/20 08:18:48

  Modified:    java/src/org/apache/xerces/readers MIME2Java.java
  Log:
  support IANA aliases for encodings that the Java supports
  
  Revision  Changes    Path
  1.5       +148 -31   xml-xerces/java/src/org/apache/xerces/readers/MIME2Java.java
  
  Index: MIME2Java.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/readers/MIME2Java.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- MIME2Java.java	2001/03/19 17:36:26	1.4
  +++ MIME2Java.java	2001/06/20 15:18:39	1.5
  @@ -445,9 +445,9 @@
    *      </TD>
    *  </TR>
    *  <TR>
  - *      <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
  + *      <TD WIDTH="33%">Japanese: ISO-2022-jp</TD>
    *      <TD WIDTH="15%">
  - *          <P ALIGN="CENTER">iso-2020-jp
  + *          <P ALIGN="CENTER">ISO-2020-jp
    *      </TD>
    *      <TD WIDTH="12%">
    *          <P ALIGN="CENTER">MIME
  @@ -518,7 +518,7 @@
    *  </TR>
    * </TABLE>
    * 
  - * @version $Id: MIME2Java.java,v 1.4 2001/03/19 17:36:26 neilg Exp $
  + * @version $Id: MIME2Java.java,v 1.5 2001/06/20 15:18:39 neilg Exp $
    * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
    */
   public class MIME2Java {
  @@ -531,17 +531,57 @@
           //    <preferred MIME name (uppercase)>, <Java encoding name>
           s_enchash.put("UTF-8", "UTF8");
           s_enchash.put("US-ASCII",        "ASCII");
  -        s_enchash.put("ISO-8859-1",      "8859_1");
  -        s_enchash.put("ISO-8859-2",      "8859_2");
  -        s_enchash.put("ISO-8859-3",      "8859_3");
  -        s_enchash.put("ISO-8859-4",      "8859_4");
  -        s_enchash.put("ISO-8859-5",      "8859_5");
  -        s_enchash.put("ISO-8859-6",      "8859_6");
  -        s_enchash.put("ISO-8859-7",      "8859_7");
  -        s_enchash.put("ISO-8859-8",      "8859_8");
  -        s_enchash.put("ISO-8859-9",      "8859_9");
  -        s_enchash.put("ISO-2022-JP",     "JIS");
  +        s_enchash.put("ISO-8859-1",      "ISO8859_1");
  +        s_enchash.put("ISO-IR-100",      "ISO8859_1");
  +        s_enchash.put("ISO_8859-1",      "ISO8859_1");
  +        s_enchash.put("LATIN1",      "ISO8859_1");
  +        s_enchash.put("L1",      "ISO8859_1");
  +        s_enchash.put("IBM819",      "ISO8859_1");
  +        s_enchash.put("CP819",      "ISO8859_1");
  +        s_enchash.put("ISO-8859-2",      "ISO8859_2");
  +        s_enchash.put("ISO-IR-101",      "ISO8859_2");
  +        s_enchash.put("ISO_8859-2",      "ISO8859_2");
  +        s_enchash.put("LATIN2",      "ISO8859_2");
  +        s_enchash.put("L2",      "ISO8859_2");
  +        s_enchash.put("ISO-8859-3",      "ISO8859_3");
  +        s_enchash.put("ISO-IR-109",      "ISO8859_3");
  +        s_enchash.put("ISO_8859-3",      "ISO8859_3");
  +        s_enchash.put("LATIN3",      "ISO8859_3");
  +        s_enchash.put("L3",      "ISO8859_3");
  +        s_enchash.put("ISO-8859-4",      "ISO8859_4");
  +        s_enchash.put("ISO-IR-110",      "ISO8859_4");
  +        s_enchash.put("ISO_8859-4",      "ISO8859_4");
  +        s_enchash.put("LATIN4",      "ISO8859_4");
  +        s_enchash.put("L4",      "ISO8859_4");
  +        s_enchash.put("ISO-8859-5",      "ISO8859_5");
  +        s_enchash.put("ISO-IR-144",      "ISO8859_5");
  +        s_enchash.put("ISO_8859-5",      "ISO8859_5");
  +        s_enchash.put("CYRILLIC",      "ISO8859_5");
  +        s_enchash.put("ISO-8859-6",      "ISO8859_6");
  +        s_enchash.put("ISO-IR-127",      "ISO8859_6");
  +        s_enchash.put("ISO_8859-6",      "ISO8859_6");
  +        s_enchash.put("ECMA-114",      "ISO8859_6");
  +        s_enchash.put("ASMO-708",      "ISO8859_6");
  +        s_enchash.put("ARABIC",      "ISO8859_6");
  +        s_enchash.put("ISO-8859-7",      "ISO8859_7");
  +        s_enchash.put("ISO-IR-126",      "ISO8859_7");
  +        s_enchash.put("ISO_8859-7",      "ISO8859_7");
  +        s_enchash.put("ELOT_928",      "ISO8859_7");
  +        s_enchash.put("ECMA-118",      "ISO8859_7");
  +        s_enchash.put("GREEK",      "ISO8859_7");
  +        s_enchash.put("GREEK8",      "ISO8859_7");
  +        s_enchash.put("ISO-8859-8",      "ISO8859_8");
  +        s_enchash.put("ISO-IR-138",      "ISO8859_8");
  +        s_enchash.put("ISO_8859-8",      "ISO8859_8");
  +        s_enchash.put("HEBREW",      "ISO8859_8");
  +        s_enchash.put("ISO-8859-9",      "ISO8859_9");
  +        s_enchash.put("ISO-IR-148",      "ISO8859_9");
  +        s_enchash.put("ISO_8859-9",      "ISO8859_9");
  +        s_enchash.put("LATIN5",      "ISO8859_9");
  +        s_enchash.put("L5",      "ISO8859_9");
  +        s_enchash.put("ISO-2022-JP",     "ISO2022JP");
           s_enchash.put("SHIFT_JIS",       "SJIS");
  +        s_enchash.put("MS_Kanji",       "SJIS");
           /**
            * MS932 is suitable for Windows-31J,
            * but JDK 1.1.x does not support MS932.
  @@ -552,16 +592,17 @@
           } else {
               s_enchash.put("WINDOWS-31J",      "MS932");
           }
  -        s_enchash.put("EUC-JP",          "EUCJIS");
  +        s_enchash.put("EUC-JP",          "EUC_JP");
           s_enchash.put("GB2312",          "GB2312");
           s_enchash.put("BIG5",            "Big5");
  -        s_enchash.put("EUC-KR",          "KSC5601");
  +        s_enchash.put("EUC-KR",          "EUC_KR");
           s_enchash.put("ISO-2022-KR",     "ISO2022KR");
           s_enchash.put("KOI8-R",          "KOI8_R");
   
           s_enchash.put("EBCDIC-CP-US",    "CP037");
           s_enchash.put("EBCDIC-CP-CA",    "CP037");
           s_enchash.put("EBCDIC-CP-NL",    "CP037");
  +        s_enchash.put("EBCDIC-CP-WT",    "CP037");
           s_enchash.put("EBCDIC-CP-DK",    "CP277");
           s_enchash.put("EBCDIC-CP-NO",    "CP277");
           s_enchash.put("EBCDIC-CP-FI",    "CP278");
  @@ -573,45 +614,103 @@
           s_enchash.put("EBCDIC-CP-AR1",   "CP420");
           s_enchash.put("EBCDIC-CP-HE",    "CP424");
           s_enchash.put("EBCDIC-CP-CH",    "CP500");
  +        s_enchash.put("EBCDIC-CP-BE",    "CP500");
  +        s_enchash.put("CP-AR",        "CP868");
  +        s_enchash.put("CP-GR",        "CP869");
           s_enchash.put("EBCDIC-CP-ROECE", "CP870");
           s_enchash.put("EBCDIC-CP-YU",    "CP870");
           s_enchash.put("EBCDIC-CP-IS",    "CP871");
           s_enchash.put("EBCDIC-CP-AR2",   "CP918");
   
  -	// Add support for Cp1252
  +	    // Add support for Cp1252 and its friends
           s_enchash.put("CP1252",   "Cp1252");
  -	// Add support for two Thai encodings.
  +        s_enchash.put("WINDOWS-1250",   "Cp1250");
  +        s_enchash.put("WINDOWS-1251",   "Cp1251");
  +        s_enchash.put("WINDOWS-1252",   "Cp1252");
  +        s_enchash.put("WINDOWS-1253",   "Cp1253");
  +        s_enchash.put("WINDOWS-1254",   "Cp1254");
  +        s_enchash.put("WINDOWS-1255",   "Cp1255");
  +        s_enchash.put("WINDOWS-1256",   "Cp1256");
  +        s_enchash.put("WINDOWS-1257",   "Cp1257");
  +        s_enchash.put("WINDOWS-1258",   "Cp1258");
  +	    // Add support for two Thai encodings.
           s_enchash.put("CP874",   "Cp874");
           s_enchash.put("TIS620",   "TIS620");
  +        s_enchash.put("TIS-620",   "TIS620");
                                                   // j:CNS11643 -> EUC-TW?
  -                                                // ISO-2022-CN? ISO-2022-CN-EXT?
  +        s_enchash.put("ISO-2022-CN",    "ISO2022CN");
  +        s_enchash.put("X0201",  "JIS0201");
  +        s_enchash.put("X0208",  "JIS0208");
  +        s_enchash.put("X0212",  "JIS0212");
  +        s_enchash.put("ISO-IR-159",  "JIS0212");
                                                   
           s_revhash = new Hashtable();
           //    <Java encoding name (uppercase)>, <preferred MIME name>
           s_revhash.put("UTF8", "UTF-8");
           s_revhash.put("ASCII", "US-ASCII");
  -        s_revhash.put("8859_1", "ISO-8859-1");
  -        s_revhash.put("8859_2", "ISO-8859-2");
  -        s_revhash.put("8859_3", "ISO-8859-3");
  -        s_revhash.put("8859_4", "ISO-8859-4");
  -        s_revhash.put("8859_5", "ISO-8859-5");
  -        s_revhash.put("8859_6", "ISO-8859-6");
  -        s_revhash.put("8859_7", "ISO-8859-7");
  -        s_revhash.put("8859_8", "ISO-8859-8");
  -        s_revhash.put("8859_9", "ISO-8859-9");
  -        s_revhash.put("JIS", "ISO-2022-JP");
  +        s_revhash.put("ISO8859_1", "ISO-8859-1");
  +        s_revhash.put("ISO8859_1", "ISO-IR-100");
  +        s_revhash.put("ISO8859_1", "ISO_8859-1");
  +        s_revhash.put("ISO8859_1", "LATIN1");
  +        s_revhash.put("ISO8859_1", "L1");
  +        s_revhash.put("ISO8859_1", "IBM819");
  +        s_revhash.put("ISO8859_1", "CP819");
  +        s_revhash.put("ISO8859_2", "ISO-8859-2");
  +        s_revhash.put("ISO8859_2", "ISO-IR-101");
  +        s_revhash.put("ISO8859_2", "ISO_8859-2");
  +        s_revhash.put("ISO8859_2", "LATIN2");
  +        s_revhash.put("ISO8859_2", "L2");
  +        s_revhash.put("ISO8859_3", "ISO-8859-3");
  +        s_revhash.put("ISO8859_3", "ISO-IR-109");
  +        s_revhash.put("ISO8859_3", "ISO_8859-3");
  +        s_revhash.put("ISO8859_3", "LATIN3");
  +        s_revhash.put("ISO8859_3", "L3");
  +        s_revhash.put("ISO8859_4", "ISO-8859-4");
  +        s_revhash.put("ISO8859_4", "ISO-IR-110");
  +        s_revhash.put("ISO8859_4", "ISO_8859-4");
  +        s_revhash.put("ISO8859_4", "LATIN4");
  +        s_revhash.put("ISO8859_4", "L4");
  +        s_revhash.put("ISO8859_5", "ISO-8859-5");
  +        s_revhash.put("ISO8859_5", "ISO-IR-144");
  +        s_revhash.put("ISO8859_5", "ISO_8859-5");
  +        s_revhash.put("ISO8859_5", "CYRILLIC");
  +        s_revhash.put("ISO8859_6", "ISO-8859-6");
  +        s_revhash.put("ISO8859_6", "ISO-IR-127");
  +        s_revhash.put("ISO8859_6", "ISO_8859-6");
  +        s_revhash.put("ISO8859_6", "ECMA-114");
  +        s_revhash.put("ISO8859_6", "ASMO-708");
  +        s_revhash.put("ISO8859_6", "ARABIC");
  +        s_revhash.put("ISO8859_7", "ISO-8859-7");
  +        s_revhash.put("ISO8859_7", "ISO-IR-126");
  +        s_revhash.put("ISO8859_7", "ISO_8859-7");
  +        s_revhash.put("ISO8859_7", "ELOT_928");
  +        s_revhash.put("ISO8859_7", "ECMA-118");
  +        s_revhash.put("ISO8859_7", "GREEK");
  +        s_revhash.put("ISO8859_7", "GREEK8");
  +        s_revhash.put("ISO8859_8", "ISO-8859-8");
  +        s_revhash.put("ISO8859_8", "ISO-IR-138");
  +        s_revhash.put("ISO8859_8", "ISO_8859-8");
  +        s_revhash.put("ISO8859_8", "HEBREW");
  +        s_revhash.put("ISO8859_9", "ISO-8859-9");
  +        s_revhash.put("ISO8859_9", "ISO-IR-148");
  +        s_revhash.put("ISO8859_9", "ISO_8859-9");
  +        s_revhash.put("ISO8859_9", "LATIN5");
  +        s_revhash.put("ISO8859_9", "L5");
  +        s_revhash.put("ISO2022JP", "ISO-2022-JP");
           s_revhash.put("SJIS", "Shift_JIS");
  +        s_revhash.put("SJIS", "MS_Kanji");
           s_revhash.put("MS932", "WINDOWS-31J");
  -        s_revhash.put("EUCJIS", "EUC-JP");
  +        s_revhash.put("EUC_JP", "EUC-JP");
           s_revhash.put("GB2312", "GB2312");
           s_revhash.put("BIG5", "Big5");
  -        s_revhash.put("KSC5601", "EUC-KR");
  +        s_revhash.put("EUC_KR", "EUC-KR");
           s_revhash.put("ISO2022KR", "ISO-2022-KR");
           s_revhash.put("KOI8_R", "KOI8-R");
   
           s_revhash.put("CP037", "EBCDIC-CP-US");
           s_revhash.put("CP037", "EBCDIC-CP-CA");
           s_revhash.put("CP037", "EBCDIC-CP-NL");
  +        s_revhash.put("CP037", "EBCDIC-CP-WT");
           s_revhash.put("CP277", "EBCDIC-CP-DK");
           s_revhash.put("CP277", "EBCDIC-CP-NO");
           s_revhash.put("CP278", "EBCDIC-CP-FI");
  @@ -623,18 +722,36 @@
           s_revhash.put("CP420", "EBCDIC-CP-AR1");
           s_revhash.put("CP424", "EBCDIC-CP-HE");
           s_revhash.put("CP500", "EBCDIC-CP-CH");
  +        s_revhash.put("CP500", "EBCDIC-CP-BE");
  +        s_revhash.put("CP868",      "CP-AR");
  +        s_revhash.put("CP869",      "CP-GR");
           s_revhash.put("CP870", "EBCDIC-CP-ROECE");
           s_revhash.put("CP870", "EBCDIC-CP-YU");
           s_revhash.put("CP871", "EBCDIC-CP-IS");
           s_revhash.put("CP918", "EBCDIC-CP-AR2");
   
  -	// Add support for Cp1252
  +	// Add support for Cp1252 and friends
   	// Since this code page should be written out in mixed case,
   	// there is no need to reverse the function.
           s_revhash.put("Cp1252", "Cp1252");
  +        s_revhash.put("Cp1250", "WINDOWS-1250");
  +        s_revhash.put("Cp1251", "WINDOWS-1251");
  +        s_revhash.put("Cp1252", "WINDOWS-1252");
  +        s_revhash.put("Cp1253", "WINDOWS-1253");
  +        s_revhash.put("Cp1254", "WINDOWS-1254");
  +        s_revhash.put("Cp1255", "WINDOWS-1255");
  +        s_revhash.put("Cp1256", "WINDOWS-1256");
  +        s_revhash.put("Cp1257", "WINDOWS-1257");
  +        s_revhash.put("Cp1258", "WINDOWS-1258");
   	// Add support for two Thai encodings.
           s_revhash.put("Cp874", "Cp874");
           s_revhash.put("TIS620", "TIS620");
  +        s_revhash.put("TIS620", "TIS-620");
  +        s_revhash.put("ISO2022CN",  "ISO-2022-CN");
  +        s_revhash.put("JIS0201",    "X0201");
  +        s_revhash.put("JIS0208",    "X0208");
  +        s_revhash.put("JIS0212",    "X0212");
  +        s_revhash.put("JIS0212",    "ISO-IR-159");
       }
   
       private MIME2Java() {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org