You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by el...@apache.org on 2001/05/07 14:36:04 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/utils/regex Token.java

elena       01/05/07 05:36:03

  Modified:    java/src/org/apache/xerces/utils/regex Token.java
  Log:
  Added support for \p{Pf} and \p{Pi} (Schema Datatypes: Regular Expressions)
  
  Revision  Changes    Path
  1.4       +29 -9     xml-xerces/java/src/org/apache/xerces/utils/regex/Token.java
  
  Index: Token.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/utils/regex/Token.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- Token.java	2000/06/20 23:57:18	1.3
  +++ Token.java	2001/05/07 12:36:00	1.4
  @@ -639,15 +639,20 @@
           "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
           "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
           "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
  -        "L", "M", "N", "Z", "C", "P", "S",      // 29-35
  +        "Pi", "Pf",  // 29, 30
  +        "L", "M", "N", "Z", "C", "P", "S",      // 31-37
       };
  -    static final int CHAR_LETTER = 29;
  -    static final int CHAR_MARK = 30;
  -    static final int CHAR_NUMBER = 31;
  -    static final int CHAR_SEPARATOR = 32;
  -    static final int CHAR_OTHER = 33;
  -    static final int CHAR_PUNCTUATION = 34;
  -    static final int CHAR_SYMBOL = 35;
  +
  +    // Schema Rec. {Datatypes} - Punctuation 
  +    static final int CHAR_INIT_QUOTE  = 29;     // Pi - initial quote
  +    static final int CHAR_FINAL_QUOTE = 30;     // Pf - final quote
  +    static final int CHAR_LETTER = 31;
  +    static final int CHAR_MARK = 32;
  +    static final int CHAR_NUMBER = 33;
  +    static final int CHAR_SEPARATOR = 34;
  +    static final int CHAR_OTHER = 35;
  +    static final int CHAR_PUNCTUATION = 36;
  +    static final int CHAR_SYMBOL = 37;
       static final String[] blockNames = {
           "Basic Latin",                          // 0
           "Latin-1 Supplement",
  @@ -739,8 +744,21 @@
                   for (int i = 0;  i < ranges.length;  i ++) {
                       ranges[i] = Token.createRange();
                   }
  +                int type;
                   for (int i = 0;  i < 0x10000;  i ++) {
  -                    int type = Character.getType((char)i);
  +                    type = Character.getType((char)i);
  +                    if (type == Character.START_PUNCTUATION || 
  +                        type == Character.END_PUNCTUATION) {
  +                        //build table of Pi values
  +                        if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
  +                            i == 0x201F || i == 0x2039) {
  +                            type = CHAR_INIT_QUOTE;
  +                        }
  +                        //build table of Pf values
  +                        if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
  +                            type = CHAR_FINAL_QUOTE;
  +                        }
  +                    }
                       ranges[type].addRange(i, i);
                       switch (type) {
                         case Character.UPPERCASE_LETTER:
  @@ -776,6 +794,8 @@
                         case Character.DASH_PUNCTUATION:
                         case Character.START_PUNCTUATION:
                         case Character.END_PUNCTUATION:
  +                      case CHAR_INIT_QUOTE:
  +                      case CHAR_FINAL_QUOTE:
                         case Character.OTHER_PUNCTUATION:
                           type = CHAR_PUNCTUATION;
                           break;
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org