You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2009/11/10 15:59:12 UTC

svn commit: r834487 - in /xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex: CaseInsensitiveMap.java RegexParser.java

Author: knoaman
Date: Tue Nov 10 14:59:12 2009
New Revision: 834487

URL: http://svn.apache.org/viewvc?rev=834487&view=rev
Log:
A further fix for JIRA Issue #1389: http://issues.apache.org/jira/browse/XERCESJ-1389. [Case insensitive matching]

Added:
    xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java   (with props)
Modified:
    xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/RegexParser.java

Added: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java?rev=834487&view=auto
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java (added)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java Tue Nov 10 14:59:12 2009
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xerces.impl.xpath.regex;
+
+/**
+ * @version $Id$
+ */
+public class CaseInsensitiveMap {
+
+    private static int CHUNK_SHIFT = 10;           /* 2^10 = 1k */
+    private static int CHUNK_SIZE = (1<<CHUNK_SHIFT);
+    private static int CHUNK_MASK = (CHUNK_SIZE-1);
+    private static int INITIAL_CHUNK_COUNT = 64;   /* up to 0xFFFF */
+
+    private static int[][][] caseInsensitiveMap;
+    private static Boolean mapBuilt = Boolean.FALSE;
+    
+    private static int LOWER_CASE_MATCH = 1;
+    private static int UPPER_CASE_MATCH = 2; 
+
+    /**
+     *  Return a list of code point characters (not including the input value)
+     *  that can be substituted in a case insensitive match
+     */
+    static public int[] get(int codePoint) {
+        if (mapBuilt == Boolean.FALSE) {
+            synchronized (mapBuilt) {
+                if (mapBuilt == Boolean.FALSE) {
+                    buildCaseInsensitiveMap();
+                }
+            } // synchronized
+        } // if mapBuilt
+
+        return (codePoint < 0x10000) ? getMapping(codePoint) : null;
+    }
+
+    private static int[] getMapping(int codePoint) {
+        int chunk = codePoint >>> CHUNK_SHIFT;
+        int offset = codePoint & CHUNK_MASK;
+        
+        return caseInsensitiveMap[chunk][offset];
+    }
+    
+    private static void buildCaseInsensitiveMap() {
+        caseInsensitiveMap = new int[INITIAL_CHUNK_COUNT][][];
+        for (int i=0; i<INITIAL_CHUNK_COUNT; i++) {
+            caseInsensitiveMap[i] = new int[CHUNK_SIZE][];
+        }
+        
+        int lc, uc;
+        for (int i=0; i<0x10000; i++) {
+            lc = Character.toLowerCase(i);
+            uc = Character.toUpperCase(i);
+
+            // lower/upper case value is not the same as code point
+            if (lc != uc || lc != i) {
+                int[] map = new int[2];
+                int index = 0;
+
+                if (lc != i) {
+                    map[index++] = lc;
+                    map[index++] = LOWER_CASE_MATCH;
+                    int[] lcMap = getMapping(lc);
+                    if (lcMap != null) {
+                        map = updateMap(i, map, lc, lcMap, LOWER_CASE_MATCH);
+                    }
+                }
+                
+                if (uc != i) {
+                    if (index == map.length) {
+                        map = expandMap(map, 2);
+                    }
+                    map[index++] = uc;
+                    map[index++] = UPPER_CASE_MATCH;
+                    int[] ucMap = getMapping(uc);
+                    if (ucMap != null) {
+                        map = updateMap(i, map, uc, ucMap, UPPER_CASE_MATCH);
+                    }
+                }
+                
+                set(i, map);
+            }
+        }
+
+        mapBuilt = Boolean.TRUE;
+    }
+    
+    private static int[] expandMap(int[] srcMap, int expandBy) {
+        final int oldLen = srcMap.length;
+        int[] newMap = new int[oldLen + expandBy];
+        
+        System.arraycopy(srcMap, 0, newMap, 0, oldLen);
+        return newMap;
+    }
+    
+    private static void set(int codePoint, int[] map) {
+        int chunk = codePoint >>> CHUNK_SHIFT;
+        int offset = codePoint & CHUNK_MASK;
+        
+        caseInsensitiveMap[chunk][offset] = map;
+    }
+
+    private static int[] updateMap(int codePoint, int[] codePointMap,
+            int ciCodePoint, int[] ciCodePointMap, int matchType) {
+        for (int i=0; i<ciCodePointMap.length; i+=2) {
+            int c = ciCodePointMap[i];
+            int[] cMap = getMapping(c);
+            if (cMap != null) {
+                if (contains(cMap, ciCodePoint, matchType)) {
+                    if (!contains(cMap, codePoint)) {
+                        cMap = expandAndAdd(cMap, codePoint, matchType);
+                        set(c, cMap);
+                    }
+                    if (!contains(codePointMap, c)) {
+                        codePointMap = expandAndAdd(codePointMap, c,matchType);
+                    }
+                }
+            }
+        }
+        
+        if (!contains(ciCodePointMap, codePoint)) {
+            ciCodePointMap = expandAndAdd(ciCodePointMap, codePoint, matchType);
+            set(ciCodePoint, ciCodePointMap);
+        }
+
+        return codePointMap;
+    }
+    
+    private static boolean contains(int[] map, int codePoint) {
+        for (int i=0; i<map.length; i += 2) {
+            if (map[i] == codePoint) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static boolean contains(int[] map, int codePoint, int matchType) {
+        for (int i=0; i<map.length; i += 2) {
+            if (map[i] == codePoint && map[i+1] == matchType) {
+                return true;
+            }
+        }
+        return false;
+    }
+    
+    private static int[] expandAndAdd(int[] srcMap, int codePoint, int matchType) {
+        final int oldLen = srcMap.length;
+        int[] newMap = new int[oldLen + 2];
+        
+        System.arraycopy(srcMap, 0, newMap, 0, oldLen);
+        newMap[oldLen] = codePoint;
+        newMap[oldLen+1] = matchType;
+        return newMap;
+    }
+}

Propchange: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/CaseInsensitiveMap.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Modified: xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/RegexParser.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/RegexParser.java?rev=834487&r1=834486&r2=834487&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/RegexParser.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/xpath/regex/RegexParser.java Tue Nov 10 14:59:12 2009
@@ -1169,18 +1169,19 @@
     }
     
     static protected final void addCaseInsensitiveChar(RangeToken tok, int c) {
+        final int[] caseMap = CaseInsensitiveMap.get(c);
         tok.addRange(c, c);
-        char cic = Character.toUpperCase((char)c);
-        if (cic != c) {
-            tok.addRange(cic, cic);
-        }
-        cic = Character.toLowerCase((char)c);
-        if (cic != c) {
-            tok.addRange(cic, cic);
+        
+        if (caseMap != null) {
+            for (int i=0; i<caseMap.length; i+=2) {
+                tok.addRange(caseMap[i], caseMap[i]);
+            }
         }
+
     }
     
     static protected final void addCaseInsensitiveCharRange(RangeToken tok, int start, int end) {
+        int[] caseMap;
         int r1, r2;
         if (start <= end) {
             r1 = start;
@@ -1192,19 +1193,10 @@
 
         tok.addRange(r1, r2);
         for (int ch = r1;  ch <= r2;  ch++) {
-            if (ch <= 0xffff) {
-                char uch = Character.toUpperCase((char)ch);
-                if (uch != ch) {
-                    tok.addRange(uch, uch);
-                }
-            }
-        }
-
-        for (int ch = r1;  ch <= r2;  ch++) {
-            if (ch <= 0xffff) {
-                char lch = Character.toLowerCase((char)ch);
-                if (lch != ch) {
-                    tok.addRange(lch, lch);
+            caseMap = CaseInsensitiveMap.get(ch);
+            if (caseMap != null) {
+                for (int i=0; i<caseMap.length; i+=2) {
+                    tok.addRange(caseMap[i], caseMap[i]);
                 }
             }
         }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org