You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@thrift.apache.org by br...@apache.org on 2009/02/10 19:10:57 UTC

svn commit: r743037 - in /incubator/thrift/trunk: compiler/cpp/src/generate/t_java_generator.cc lib/java/src/org/apache/thrift/IntRangeSet.java

Author: bryanduxbury
Date: Tue Feb 10 18:10:57 2009
New Revision: 743037

URL: http://svn.apache.org/viewvc?rev=743037&view=rev
Log:
THRIFT-318. java: Performance of HashSet for enumeration VALID_VALUES seems poor

Instead of a HashSet, enums will now use the special IntRangeSet implementation. 

Added:
    incubator/thrift/trunk/lib/java/src/org/apache/thrift/IntRangeSet.java
Modified:
    incubator/thrift/trunk/compiler/cpp/src/generate/t_java_generator.cc

Modified: incubator/thrift/trunk/compiler/cpp/src/generate/t_java_generator.cc
URL: http://svn.apache.org/viewvc/incubator/thrift/trunk/compiler/cpp/src/generate/t_java_generator.cc?rev=743037&r1=743036&r2=743037&view=diff
==============================================================================
--- incubator/thrift/trunk/compiler/cpp/src/generate/t_java_generator.cc (original)
+++ incubator/thrift/trunk/compiler/cpp/src/generate/t_java_generator.cc Tue Feb 10 18:10:57 2009
@@ -314,7 +314,8 @@
   f_enum << string() +
     "import java.util.Set;\n" +
     "import java.util.HashSet;\n" +
-    "import java.util.Collections;\n" << endl;
+    "import java.util.Collections;\n" +
+    "import org.apache.thrift.IntRangeSet;\n"<< endl;
 
   f_enum <<
     "public class " << tenum->get_name() << " ";
@@ -337,15 +338,18 @@
   
   // Create a static Set with all valid values for this enum
   f_enum << endl;
-  indent(f_enum) << "public static final Set<Integer> VALID_VALUES = Collections.unmodifiableSet(new HashSet<Integer>(){{" << endl;
+  indent(f_enum) << "public static final IntRangeSet VALID_VALUES = new IntRangeSet(";
   indent_up();
+  bool first = true;
   for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
-    // populate set 
-    if ((*c_iter)->has_value())
-      indent(f_enum) << "add(" << (*c_iter)->get_value() << ");" << endl;
+    // populate set
+    if ((*c_iter)->has_value()) {
+      f_enum << (first ? "" : ", ") << (*c_iter)->get_name();
+      first = false;
+    }
   }
   indent_down();
-  indent(f_enum) << "}});" << endl;
+  f_enum << ");" << endl;
 
   scope_down(f_enum);
   f_enum.close();

Added: incubator/thrift/trunk/lib/java/src/org/apache/thrift/IntRangeSet.java
URL: http://svn.apache.org/viewvc/incubator/thrift/trunk/lib/java/src/org/apache/thrift/IntRangeSet.java?rev=743037&view=auto
==============================================================================
--- incubator/thrift/trunk/lib/java/src/org/apache/thrift/IntRangeSet.java (added)
+++ incubator/thrift/trunk/lib/java/src/org/apache/thrift/IntRangeSet.java Tue Feb 10 18:10:57 2009
@@ -0,0 +1,152 @@
+package org.apache.thrift;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * IntRangeSet is a specialized Set<Integer> implementation designed 
+ * specifically to make the generated validate() method calls faster. It groups
+ * the set values into ranges, and in the contains() call, it does 
+ * num ranges * 2 comparisons max. For the common case, which is a single, 
+ * contiguous range, this approach is about 60% faster than using a HashSet. If
+ * you had a very ragged value set, like all the odd numbers, for instance, 
+ * then you would end up with pretty poor running time.  
+ */
+public class IntRangeSet implements Set<Integer> {
+  /**
+   * This array keeps the bounds of each extent in alternating cells, always 
+   * increasing. Example: [0,5,10,15], which corresponds to 0-5, 10-15.
+   */
+  private int[] extents;
+  
+  /**
+   * We'll keep a duplicate, real HashSet around internally to satisfy some of
+   * the other set operations. 
+   */
+  private Set<Integer> realSet = new HashSet<Integer>();
+  
+  public IntRangeSet(int... values) {
+    Arrays.sort(values);
+
+    List<Integer> extent_list = new ArrayList<Integer>();
+    
+    int ext_start = values[0];
+    int ext_end_so_far = values[0];
+    for (int i = 1; i < values.length; i++) {
+      realSet.add(values[i]);
+      
+      if (values[i] == ext_end_so_far + 1) {
+        // advance the end so far
+        ext_end_so_far = values[i];
+      } else {
+        // create an extent for everything we saw so far, move on to the next one
+        extent_list.add(ext_start);
+        extent_list.add(ext_end_so_far);
+        ext_start = values[i];
+        ext_end_so_far = values[i];
+      }
+    }
+    extent_list.add(ext_start);
+    extent_list.add(ext_end_so_far);
+    
+    extents = new int[extent_list.size()];
+    for (int i = 0; i < extent_list.size(); i++) {
+      extents[i] = extent_list.get(i);
+    }
+  }
+  
+  public boolean add(Integer i) {
+    throw new UnsupportedOperationException();
+  }
+  
+  public void clear() {
+    throw new UnsupportedOperationException();
+  }
+
+  public boolean addAll(Collection<? extends Integer> arg0) {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * While this method is here for Set interface compatibility, you should avoid 
+   * using it. It incurs boxing overhead! Use the int method directly, instead.
+   */
+  public boolean contains(Object arg0) {
+    return contains(((Integer)arg0).intValue());
+  }
+
+  /**
+   * This is much faster, since it doesn't stop at Integer on the way through.
+   * @param val the value you want to check set membership for
+   * @return true if val was found, false otherwise
+   */
+  public boolean contains(int val) {
+    for (int i = 0; i < extents.length / 2; i++) {
+      if (val < extents[i*2]) {
+        return false;
+      } else if (val <= extents[i*2+1]) {
+        return true;
+      }
+    }
+    
+    return false;
+  }
+  
+  public boolean containsAll(Collection<?> arg0) {
+    for (Object o : arg0) {
+      if (!contains(o)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  public boolean isEmpty() {
+    return realSet.isEmpty();
+  }
+
+  public Iterator<Integer> iterator() {
+    return realSet.iterator();
+  }
+
+  public boolean remove(Object arg0) {
+    throw new UnsupportedOperationException();
+  }
+
+  public boolean removeAll(Collection<?> arg0) {
+    throw new UnsupportedOperationException();
+  }
+
+  public boolean retainAll(Collection<?> arg0) {
+    throw new UnsupportedOperationException();
+  }
+
+  public int size() {
+    return realSet.size();
+  }
+
+  public Object[] toArray() {
+    return realSet.toArray();
+  }
+
+  public <T> T[] toArray(T[] arg0) {
+    return realSet.toArray(arg0);
+  }
+  
+  @Override
+  public String toString() {
+    String buf = "";
+    for (int i = 0; i < extents.length / 2; i++) {
+      if (i != 0) { 
+        buf += ", ";
+      }
+      buf += "[" + extents[i*2] + "," + extents[i*2+1] + "]"; 
+    }
+    return buf;
+  }
+}