You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/01/16 14:56:31 UTC

svn commit: r1433935 - in /lucene/dev/trunk/lucene: ./ facet/src/java/org/apache/lucene/facet/index/params/ facet/src/java/org/apache/lucene/util/encoding/ facet/src/test/org/apache/lucene/facet/index/params/ facet/src/test/org/apache/lucene/util/encod...

Author: shaie
Date: Wed Jan 16 13:56:30 2013
New Revision: 1433935

URL: http://svn.apache.org/viewvc?rev=1433935&view=rev
Log:
LUCENE-4686: Write a specialized DGapVIntEncoder/Decoder for facets

Added:
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java   (with props)
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1433935&r1=1433934&r2=1433935&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jan 16 13:56:30 2013
@@ -40,6 +40,11 @@ Changes in backwards compatibility polic
   support in-memory caching, CategoryListCache was removed too.
   (Shai Erera, Michael McCandless)
 
+New Features
+
+* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the 
+  default). (Shai Erera)
+
 ======================= Lucene 4.1.0 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java?rev=1433935&r1=1433934&r2=1433935&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java Wed Jan 16 13:56:30 2013
@@ -6,12 +6,11 @@ import java.io.Serializable;
 import org.apache.lucene.facet.search.CategoryListIterator;
 import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
 import org.apache.lucene.facet.util.PartitionsUtils;
-import org.apache.lucene.util.encoding.DGapIntEncoder;
+import org.apache.lucene.util.encoding.DGapVInt8IntEncoder;
 import org.apache.lucene.util.encoding.IntDecoder;
 import org.apache.lucene.util.encoding.IntEncoder;
 import org.apache.lucene.util.encoding.SortingIntEncoder;
 import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
-import org.apache.lucene.util.encoding.VInt8IntEncoder;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -78,7 +77,7 @@ public class CategoryListParams implemen
    * counting facets.
    */
   public IntEncoder createEncoder() {
-    return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
+    return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder()));
   }
 
   @Override

Added: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java?rev=1433935&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java (added)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java Wed Jan 16 13:56:30 2013
@@ -0,0 +1,67 @@
+package org.apache.lucene.util.encoding;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Decodes values encoded by {@link DGapVInt8IntDecoder}.
+ * 
+ * @lucene.experimental
+ */
+public final class DGapVInt8IntDecoder extends IntDecoder {
+
+  @Override
+  public void decode(BytesRef buf, IntsRef values) {
+    values.offset = values.length = 0;
+
+    // grow the buffer up front, even if by a large number of values (buf.length)
+    // that saves the need to check inside the loop for every decoded value if
+    // the buffer needs to grow.
+    if (values.ints.length < buf.length) {
+      values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)];
+    }
+
+    // it is better if the decoding is inlined like so, and not e.g.
+    // in a utility method
+    int upto = buf.offset + buf.length;
+    int value = 0;
+    int offset = buf.offset;
+    int prev = 0;
+    while (offset < upto) {
+      byte b = buf.bytes[offset++];
+      if (b >= 0) {
+        values.ints[values.length] = ((value << 7) | b) + prev;
+        value = 0;
+        prev = values.ints[values.length];
+        values.length++;
+      } else {
+        value = (value << 7) | (b & 0x7F);
+      }
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "DGapVInt8";
+  }
+
+} 

Added: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java?rev=1433935&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java (added)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java Wed Jan 16 13:56:30 2013
@@ -0,0 +1,89 @@
+package org.apache.lucene.util.encoding;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An {@link IntEncoder} which implements variable length encoding for the gap
+ * between values. It's a specialized form of the combination of
+ * {@link DGapIntEncoder} and {@link VInt8IntEncoder}.
+ * 
+ * @see VInt8IntEncoder
+ * @see DGapIntEncoder
+ * 
+ * @lucene.experimental
+ */
+public final class DGapVInt8IntEncoder extends IntEncoder {
+
+  @Override
+  public void encode(IntsRef values, BytesRef buf) {
+    buf.offset = buf.length = 0;
+    int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt
+    if (buf.bytes.length < maxBytesNeeded) {
+      buf.grow(maxBytesNeeded);
+    }
+    
+    int upto = values.offset + values.length;
+    int prev = 0;
+    for (int i = values.offset; i < upto; i++) {
+      // it is better if the encoding is inlined like so, and not e.g.
+      // in a utility method
+      int value = values.ints[i] - prev;
+      if ((value & ~0x7F) == 0) {
+        buf.bytes[buf.length] = (byte) value;
+        buf.length++;
+      } else if ((value & ~0x3FFF) == 0) {
+        buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
+        buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
+        buf.length += 2;
+      } else if ((value & ~0x1FFFFF) == 0) {
+        buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
+        buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
+        buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
+        buf.length += 3;
+      } else if ((value & ~0xFFFFFFF) == 0) {
+        buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
+        buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
+        buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
+        buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
+        buf.length += 4;
+      } else {
+        buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
+        buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
+        buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
+        buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
+        buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
+        buf.length += 5;
+      }
+      prev = values.ints[i];
+    }
+  }
+
+  @Override
+  public IntDecoder createMatchingDecoder() {
+    return new DGapVInt8IntDecoder();
+  }
+
+  @Override
+  public String toString() {
+    return "DGapVInt8";
+  }
+
+} 

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java?rev=1433935&r1=1433934&r2=1433935&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java Wed Jan 16 13:56:30 2013
@@ -1,12 +1,11 @@
 package org.apache.lucene.facet.index.params;
 
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.encoding.DGapIntEncoder;
+import org.apache.lucene.util.encoding.DGapVInt8IntEncoder;
 import org.apache.lucene.util.encoding.IntDecoder;
 import org.apache.lucene.util.encoding.IntEncoder;
 import org.apache.lucene.util.encoding.SortingIntEncoder;
 import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
-import org.apache.lucene.util.encoding.VInt8IntEncoder;
 import org.junit.Test;
 
 /*
@@ -32,7 +31,7 @@ public class CategoryListParamsTest exte
   public void testDefaultSettings() {
     CategoryListParams clp = new CategoryListParams();
     assertEquals("wrong default field", "$facets", clp.field);
-    IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
+    IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder()));
     IntDecoder decoder = encoder.createMatchingDecoder();
     assertEquals("unexpected default encoder", encoder.toString(), clp.createEncoder().toString());
     assertEquals("unexpected default decoder", decoder.toString(), clp.createEncoder().createMatchingDecoder().toString());

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java?rev=1433935&r1=1433934&r2=1433935&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java Wed Jan 16 13:56:30 2013
@@ -77,6 +77,7 @@ public class EncodingSpeed {
     encoderTest(new VInt8IntEncoder(), facetIDs, loopFactor);
     encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new VInt8IntEncoder())), facetIDs, loopFactor);
     encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))), facetIDs, loopFactor);
+    encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())), facetIDs, loopFactor);
     encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new EightFlagsIntEncoder()))), facetIDs, loopFactor);
     encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new FourFlagsIntEncoder()))), facetIDs, loopFactor);
     encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(3)))), facetIDs, loopFactor);

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java?rev=1433935&r1=1433934&r2=1433935&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java Wed Jan 16 13:56:30 2013
@@ -150,5 +150,10 @@ public class EncodingTest extends Lucene
   public void testSortingUniqueDGapNOnes3() throws Exception {
     encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(3)))), data, uniqueSortedData);
   }
+  
+  @Test
+  public void testSortingUniqueDGapVInt() throws Exception {
+    encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())), data, uniqueSortedData);
+  }
 
 }