You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/10 20:05:18 UTC

svn commit: r1181104 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/index/values/TypePromoter.java test/org/apache/lucene/index/values/TestTypePromotion.java

Author: rmuir
Date: Mon Oct 10 18:05:18 2011
New Revision: 1181104

URL: http://svn.apache.org/viewvc?rev=1181104&view=rev
Log:
LUCENE-3186: svn add

Added:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java   (with props)
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java   (with props)

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java?rev=1181104&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java Mon Oct 10 18:05:18 2011
@@ -0,0 +1,204 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Type promoter that promotes {@link IndexDocValues} during merge based on
+ * their {@link ValueType} and {@link #getValueSize()}
+ * 
+ * @lucene.internal
+ */
+public class TypePromoter {
+
+  private final static Map<Integer, ValueType> FLAGS_MAP = new HashMap<Integer, ValueType>();
+  private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
+  public static final int VAR_TYPE_VALUE_SIZE = -1;
+
+  private static final int IS_INT = 1 << 0;
+  private static final int IS_BYTE = 1 << 1;
+  private static final int IS_FLOAT = 1 << 2;
+  /* VAR & FIXED == VAR */
+  private static final int IS_VAR = 1 << 3;
+  private static final int IS_FIXED = 1 << 3 | 1 << 4;
+  /* if we have FIXED & FIXED with different size we promote to VAR */
+  private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3);
+  /* STRAIGHT & DEREF == STRAIGHT (dense values win) */
+  private static final int IS_STRAIGHT = 1 << 5;
+  private static final int IS_DEREF = 1 << 5 | 1 << 6;
+  private static final int IS_SORTED = 1 << 7;
+  /* more bits wins (int16 & int32 == int32) */
+  private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11;
+  private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11;
+  private static final int IS_32_BIT = 1 << 10 | 1 << 11;
+  private static final int IS_64_BIT = 1 << 11;
+
+  private final ValueType type;
+  private final int flags;
+  private final int valueSize;
+
+  /**
+   * Returns a positive value size if this {@link TypePromoter} represents a
+   * fixed variant, otherwise <code>-1</code>
+   * 
+   * @return a positive value size if this {@link TypePromoter} represents a
+   *         fixed variant, otherwise <code>-1</code>
+   */
+  public int getValueSize() {
+    return valueSize;
+  }
+
+  static {
+    for (ValueType type : ValueType.values()) {
+      TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE);
+      FLAGS_MAP.put(create.flags, type);
+    }
+  }
+
+  /**
+   * Creates a new {@link TypePromoter}
+   * 
+   * @param type
+   *          the {@link ValueType} this promoter represents
+   * @param flags
+   *          the promoters flags
+   * @param valueSize
+   *          the value size if {@link #IS_FIXED} or <code>-1</code> otherwise.
+   */
+  protected TypePromoter(ValueType type, int flags, int valueSize) {
+    this.type = type;
+    this.flags = flags;
+    this.valueSize = valueSize;
+  }
+
+  /**
+   * Creates a new promoted {@link TypePromoter} based on this and the given
+   * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter} 
+   * aren't compatible.
+   * 
+   * @param promoter
+   *          the incoming promoter
+   * @return a new promoted {@link TypePromoter} based on this and the given
+   *         {@link TypePromoter} or <code>null</code> iff the
+   *         {@link TypePromoter} aren't compatible.
+   */
+  public TypePromoter promote(TypePromoter promoter) {
+
+    int promotedFlags = promoter.flags & this.flags;
+    TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
+    if (promoted == null) {
+      return promoted;
+    }
+    if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == IS_FIXED) {
+      if (this.valueSize == promoter.valueSize) {
+        return promoted;
+      }
+      return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
+          VAR_TYPE_VALUE_SIZE);
+    }
+    return promoted;
+
+  }
+
+  /**
+   * Returns the {@link ValueType} of this {@link TypePromoter}
+   * 
+   * @return the {@link ValueType} of this {@link TypePromoter}
+   */
+  public ValueType type() {
+    return type;
+  }
+
+  @Override
+  public String toString() {
+    return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
+  }
+
+  /**
+   * Creates a new {@link TypePromoter} for the given type and size per value.
+   * 
+   * @param type
+   *          the {@link ValueType} to create the promoter for
+   * @param valueSize
+   *          the size per value in bytes or <code>-1</code> iff the types have
+   *          variable length.
+   * @return a new {@link TypePromoter}
+   */
+  public static TypePromoter create(ValueType type, int valueSize) {
+    if (type == null) {
+      return null;
+    }
+    switch (type) {
+    case BYTES_FIXED_DEREF:
+      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
+    case BYTES_FIXED_SORTED:
+      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
+    case BYTES_FIXED_STRAIGHT:
+      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
+    case BYTES_VAR_DEREF:
+      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE);
+    case BYTES_VAR_SORTED:
+      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE);
+    case BYTES_VAR_STRAIGHT:
+      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
+    case FIXED_INTS_16:
+      return new TypePromoter(type,
+          IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize);
+    case FIXED_INTS_32:
+      return new TypePromoter(type,
+          IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize);
+    case FIXED_INTS_64:
+      return new TypePromoter(type,
+          IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize);
+    case FIXED_INTS_8:
+      return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
+          valueSize);
+    case FLOAT_32:
+      return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
+          | IS_32_BIT, valueSize);
+    case FLOAT_64:
+      return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
+          | IS_64_BIT, valueSize);
+    case VAR_INTS:
+      return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
+    default:
+      throw new IllegalStateException();
+    }
+  }
+
+  /**
+   * Returns a {@link TypePromoter} that always promotes to the type provided to
+   * {@link #promote(TypePromoter)}
+   */
+  public static TypePromoter getIdentityPromoter() {
+    return IDENTITY_PROMOTER;
+  }
+
+  private static class IdentityTypePromoter extends TypePromoter {
+
+    public IdentityTypePromoter() {
+      super(null, 0, -1);
+    }
+
+    @Override
+    public TypePromoter promote(TypePromoter promoter) {
+      return promoter;
+    }
+  }
+}
\ No newline at end of file

Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1181104&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java Mon Oct 10 18:05:18 2011
@@ -0,0 +1,313 @@
+package org.apache.lucene.index.values;
+
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.Random;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IndexDocValuesField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.ReaderContext;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.IndexDocValues.Source;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Before;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+public class TestTypePromotion extends LuceneTestCase {
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    assumeFalse("cannot work with preflex codec", CodecProvider.getDefault()
+        .getDefaultFieldCodec().equals("PreFlex"));
+  }
+
+  private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS,
+      ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
+      ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8);
+
+  private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32,
+      ValueType.FLOAT_64);
+
+  private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of(
+      ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT,
+      ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF);
+
+  private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of(
+      ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
+  
+  public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random random) {
+    ValueType[] array = typeEnum.toArray(new ValueType[0]);
+    return array[random.nextInt(array.length)];
+  }
+  
+  private static enum TestType {
+    Int, Float, Byte
+  }
+
+  private void runTest(EnumSet<ValueType> types, TestType type)
+      throws CorruptIndexException, IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+    int num_1 = atLeast(200);
+    int num_2 = atLeast(200);
+    int num_3 = atLeast(200);
+    long[] values = new long[num_1 + num_2 + num_3];
+    index(writer, new IndexDocValuesField("promote"),
+        randomValueType(types, random), values, 0, num_1);
+    writer.commit();
+    
+    index(writer, new IndexDocValuesField("promote"),
+        randomValueType(types, random), values, num_1, num_2);
+    writer.commit();
+    
+    if (random.nextInt(4) == 0) {
+      // once in a while use addIndexes
+      writer.optimize();
+      
+      Directory dir_2 = newDirectory() ;
+      IndexWriter writer_2 = new IndexWriter(dir_2,
+          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+      index(writer_2, new IndexDocValuesField("promote"),
+          randomValueType(types, random), values, num_1 + num_2, num_3);
+      writer_2.commit();
+      writer_2.close();
+      if (random.nextBoolean()) {
+        writer.addIndexes(dir_2);
+      } else {
+        // do a real merge here
+        IndexReader open = IndexReader.open(dir_2);
+        writer.addIndexes(open);
+        open.close();
+      }
+      dir_2.close();
+    } else {
+      index(writer, new IndexDocValuesField("promote"),
+          randomValueType(types, random), values, num_1 + num_2, num_3);
+    }
+
+    writer.optimize();
+    writer.close();
+    assertValues(type, dir, values);
+    dir.close();
+  }
+
+  private void assertValues(TestType type, Directory dir, long[] values)
+      throws CorruptIndexException, IOException {
+    IndexReader reader = IndexReader.open(dir);
+    assertTrue(reader.isOptimized());
+    ReaderContext topReaderContext = reader.getTopReaderContext();
+    ReaderContext[] children = topReaderContext.children();
+    IndexDocValues docValues = children[0].reader.docValues("promote");
+    assertEquals(1, children.length);
+    Source directSource = docValues.getDirectSource();
+    for (int i = 0; i < values.length; i++) {
+      int id = Integer.parseInt(reader.document(i).get("id"));
+      String msg = "id: " + id + " doc: " + i;
+      switch (type) {
+      case Byte:
+        BytesRef bytes = directSource.getBytes(i, new BytesRef());
+        long value = 0;
+        switch(bytes.length) {
+        case 1:
+          value = bytes.bytes[bytes.offset];
+          break;
+        case 2:
+          value = bytes.asShort();
+          break;
+        case 4:
+          value = bytes.asInt();
+          break;
+        case 8:
+          value = bytes.asLong();
+          break;
+          
+        default:
+          fail(msg + " bytessize: " + bytes.length);
+        }
+        
+        assertEquals(msg  + " byteSize: " + bytes.length, values[id], value);
+        break;
+      case Float:
+          assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i)));
+        break;
+      case Int:
+        assertEquals(msg, values[id], directSource.getInt(i));
+      default:
+        break;
+      }
+
+    }
+    docValues.close();
+    reader.close();
+  }
+
+  public void index(IndexWriter writer, IndexDocValuesField valField,
+      ValueType valueType, long[] values, int offset, int num)
+      throws CorruptIndexException, IOException {
+    BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 });
+    for (int i = offset; i < offset + num; i++) {
+      Document doc = new Document();
+      doc.add(new Field("id", i + "", TextField.TYPE_STORED));
+      switch (valueType) {
+      case VAR_INTS:
+        values[i] = random.nextInt();
+        valField.setInt(values[i]);
+        break;
+      case FIXED_INTS_16:
+        values[i] = random.nextInt(Short.MAX_VALUE);
+        valField.setInt((short) values[i], true);
+        break;
+      case FIXED_INTS_32:
+        values[i] = random.nextInt();
+        valField.setInt((int) values[i], true);
+        break;
+      case FIXED_INTS_64:
+        values[i] = random.nextLong();
+        valField.setInt(values[i], true);
+        break;
+      case FLOAT_64:
+        double nextDouble = random.nextDouble();
+        values[i] = Double.doubleToRawLongBits(nextDouble);
+        valField.setFloat(nextDouble);
+        break;
+      case FLOAT_32:
+        final float nextFloat = random.nextFloat();
+        values[i] = Double.doubleToRawLongBits(nextFloat);
+        valField.setFloat(nextFloat);
+        break;
+      case FIXED_INTS_8:
+         values[i] = (byte) i;
+        valField.setInt((byte)values[i], true);
+        break;
+      case BYTES_FIXED_DEREF:
+      case BYTES_FIXED_SORTED:
+      case BYTES_FIXED_STRAIGHT:
+        values[i] = random.nextLong();
+        ref.copy(values[i]);
+        valField.setBytes(ref, valueType);
+        break;
+      case BYTES_VAR_DEREF:
+      case BYTES_VAR_SORTED:
+      case BYTES_VAR_STRAIGHT:
+        if (random.nextBoolean()) {
+          ref.copy(random.nextInt());
+          values[i] = ref.asInt();
+        } else {
+          ref.copy(random.nextLong());
+          values[i] = ref.asLong();
+        }
+        valField.setBytes(ref, valueType);
+        break;
+
+      default:
+        fail("unexpected value " + valueType);
+
+      }
+      doc.add(valField);
+      writer.addDocument(doc);
+      if (random.nextInt(10) == 0) {
+        writer.commit();
+      }
+    }
+  }
+
+  public void testPromoteBytes() throws IOException {
+    runTest(UNSORTED_BYTES, TestType.Byte);
+  }
+  
+  public void testSortedPromoteBytes() throws IOException {
+    runTest(SORTED_BYTES, TestType.Byte);
+  }
+
+  public void testPromotInteger() throws IOException {
+    runTest(INTEGERS, TestType.Int);
+  }
+
+  public void testPromotFloatingPoint() throws CorruptIndexException,
+      IOException {
+    runTest(FLOATS, TestType.Float);
+  }
+  
+  public void testMergeIncompatibleTypes() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+    writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
+    IndexWriter writer = new IndexWriter(dir, writerConfig);
+    int num_1 = atLeast(200);
+    int num_2 = atLeast(200);
+    long[] values = new long[num_1 + num_2];
+    index(writer, new IndexDocValuesField("promote"),
+        randomValueType(INTEGERS, random), values, 0, num_1);
+    writer.commit();
+    
+    if (random.nextInt(4) == 0) {
+      // once in a while use addIndexes
+      Directory dir_2 = newDirectory() ;
+      IndexWriter writer_2 = new IndexWriter(dir_2,
+          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+      index(writer_2, new IndexDocValuesField("promote"),
+          randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
+      writer_2.commit();
+      writer_2.close();
+      if (random.nextBoolean()) {
+        writer.addIndexes(dir_2);
+      } else {
+        // do a real merge here
+        IndexReader open = IndexReader.open(dir_2);
+        writer.addIndexes(open);
+        open.close();
+      }
+      dir_2.close();
+    } else {
+      index(writer, new IndexDocValuesField("promote"),
+          randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
+      writer.commit();
+    }
+    writer.close();
+    writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+    if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
+      writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we optimize to one segment (merge everything together)
+    }
+    writer = new IndexWriter(dir, writerConfig);
+    // now optimize
+    writer.optimize();
+    writer.close();
+    IndexReader reader = IndexReader.open(dir);
+    assertTrue(reader.isOptimized());
+    ReaderContext topReaderContext = reader.getTopReaderContext();
+    ReaderContext[] children = topReaderContext.children();
+    IndexDocValues docValues = children[0].reader.docValues("promote");
+    assertNotNull(docValues);
+    assertValues(TestType.Byte, dir, values);
+    assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type());
+    reader.close();
+    dir.close();
+  }
+
+}
\ No newline at end of file



Re: svn commit: r1181104 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/index/values/TypePromoter.java test/org/apache/lucene/index/values/TestTypePromotion.java

Posted by Simon Willnauer <si...@googlemail.com>.
ah crap! thanks robert!


On Mon, Oct 10, 2011 at 8:05 PM,  <rm...@apache.org> wrote:
> Author: rmuir
> Date: Mon Oct 10 18:05:18 2011
> New Revision: 1181104
>
> URL: http://svn.apache.org/viewvc?rev=1181104&view=rev
> Log:
> LUCENE-3186: svn add
>
> Added:
>    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java   (with props)
>    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java   (with props)
>
> Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java?rev=1181104&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (added)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java Mon Oct 10 18:05:18 2011
> @@ -0,0 +1,204 @@
> +package org.apache.lucene.index.values;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +import java.util.HashMap;
> +import java.util.Map;
> +
> +/**
> + * Type promoter that promotes {@link IndexDocValues} during merge based on
> + * their {@link ValueType} and {@link #getValueSize()}
> + *
> + * @lucene.internal
> + */
> +public class TypePromoter {
> +
> +  private final static Map<Integer, ValueType> FLAGS_MAP = new HashMap<Integer, ValueType>();
> +  private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
> +  public static final int VAR_TYPE_VALUE_SIZE = -1;
> +
> +  private static final int IS_INT = 1 << 0;
> +  private static final int IS_BYTE = 1 << 1;
> +  private static final int IS_FLOAT = 1 << 2;
> +  /* VAR & FIXED == VAR */
> +  private static final int IS_VAR = 1 << 3;
> +  private static final int IS_FIXED = 1 << 3 | 1 << 4;
> +  /* if we have FIXED & FIXED with different size we promote to VAR */
> +  private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3);
> +  /* STRAIGHT & DEREF == STRAIGHT (dense values win) */
> +  private static final int IS_STRAIGHT = 1 << 5;
> +  private static final int IS_DEREF = 1 << 5 | 1 << 6;
> +  private static final int IS_SORTED = 1 << 7;
> +  /* more bits wins (int16 & int32 == int32) */
> +  private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11;
> +  private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11;
> +  private static final int IS_32_BIT = 1 << 10 | 1 << 11;
> +  private static final int IS_64_BIT = 1 << 11;
> +
> +  private final ValueType type;
> +  private final int flags;
> +  private final int valueSize;
> +
> +  /**
> +   * Returns a positive value size if this {@link TypePromoter} represents a
> +   * fixed variant, otherwise <code>-1</code>
> +   *
> +   * @return a positive value size if this {@link TypePromoter} represents a
> +   *         fixed variant, otherwise <code>-1</code>
> +   */
> +  public int getValueSize() {
> +    return valueSize;
> +  }
> +
> +  static {
> +    for (ValueType type : ValueType.values()) {
> +      TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE);
> +      FLAGS_MAP.put(create.flags, type);
> +    }
> +  }
> +
> +  /**
> +   * Creates a new {@link TypePromoter}
> +   *
> +   * @param type
> +   *          the {@link ValueType} this promoter represents
> +   * @param flags
> +   *          the promoters flags
> +   * @param valueSize
> +   *          the value size if {@link #IS_FIXED} or <code>-1</code> otherwise.
> +   */
> +  protected TypePromoter(ValueType type, int flags, int valueSize) {
> +    this.type = type;
> +    this.flags = flags;
> +    this.valueSize = valueSize;
> +  }
> +
> +  /**
> +   * Creates a new promoted {@link TypePromoter} based on this and the given
> +   * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter}
> +   * aren't compatible.
> +   *
> +   * @param promoter
> +   *          the incoming promoter
> +   * @return a new promoted {@link TypePromoter} based on this and the given
> +   *         {@link TypePromoter} or <code>null</code> iff the
> +   *         {@link TypePromoter} aren't compatible.
> +   */
> +  public TypePromoter promote(TypePromoter promoter) {
> +
> +    int promotedFlags = promoter.flags & this.flags;
> +    TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
> +    if (promoted == null) {
> +      return promoted;
> +    }
> +    if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == IS_FIXED) {
> +      if (this.valueSize == promoter.valueSize) {
> +        return promoted;
> +      }
> +      return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
> +          VAR_TYPE_VALUE_SIZE);
> +    }
> +    return promoted;
> +
> +  }
> +
> +  /**
> +   * Returns the {@link ValueType} of this {@link TypePromoter}
> +   *
> +   * @return the {@link ValueType} of this {@link TypePromoter}
> +   */
> +  public ValueType type() {
> +    return type;
> +  }
> +
> +  @Override
> +  public String toString() {
> +    return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
> +  }
> +
> +  /**
> +   * Creates a new {@link TypePromoter} for the given type and size per value.
> +   *
> +   * @param type
> +   *          the {@link ValueType} to create the promoter for
> +   * @param valueSize
> +   *          the size per value in bytes or <code>-1</code> iff the types have
> +   *          variable length.
> +   * @return a new {@link TypePromoter}
> +   */
> +  public static TypePromoter create(ValueType type, int valueSize) {
> +    if (type == null) {
> +      return null;
> +    }
> +    switch (type) {
> +    case BYTES_FIXED_DEREF:
> +      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
> +    case BYTES_FIXED_SORTED:
> +      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
> +    case BYTES_FIXED_STRAIGHT:
> +      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
> +    case BYTES_VAR_DEREF:
> +      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE);
> +    case BYTES_VAR_SORTED:
> +      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE);
> +    case BYTES_VAR_STRAIGHT:
> +      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
> +    case FIXED_INTS_16:
> +      return new TypePromoter(type,
> +          IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize);
> +    case FIXED_INTS_32:
> +      return new TypePromoter(type,
> +          IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize);
> +    case FIXED_INTS_64:
> +      return new TypePromoter(type,
> +          IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize);
> +    case FIXED_INTS_8:
> +      return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
> +          valueSize);
> +    case FLOAT_32:
> +      return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
> +          | IS_32_BIT, valueSize);
> +    case FLOAT_64:
> +      return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
> +          | IS_64_BIT, valueSize);
> +    case VAR_INTS:
> +      return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
> +    default:
> +      throw new IllegalStateException();
> +    }
> +  }
> +
> +  /**
> +   * Returns a {@link TypePromoter} that always promotes to the type provided to
> +   * {@link #promote(TypePromoter)}
> +   */
> +  public static TypePromoter getIdentityPromoter() {
> +    return IDENTITY_PROMOTER;
> +  }
> +
> +  private static class IdentityTypePromoter extends TypePromoter {
> +
> +    public IdentityTypePromoter() {
> +      super(null, 0, -1);
> +    }
> +
> +    @Override
> +    public TypePromoter promote(TypePromoter promoter) {
> +      return promoter;
> +    }
> +  }
> +}
> \ No newline at end of file
>
> Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1181104&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (added)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java Mon Oct 10 18:05:18 2011
> @@ -0,0 +1,313 @@
> +package org.apache.lucene.index.values;
> +
> +import java.io.IOException;
> +import java.util.EnumSet;
> +import java.util.Random;
> +
> +import org.apache.lucene.analysis.MockAnalyzer;
> +import org.apache.lucene.document.Document;
> +import org.apache.lucene.document.Field;
> +import org.apache.lucene.document.IndexDocValuesField;
> +import org.apache.lucene.document.TextField;
> +import org.apache.lucene.index.CorruptIndexException;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.IndexReader.ReaderContext;
> +import org.apache.lucene.index.IndexWriter;
> +import org.apache.lucene.index.IndexWriterConfig;
> +import org.apache.lucene.index.NoMergePolicy;
> +import org.apache.lucene.index.codecs.CodecProvider;
> +import org.apache.lucene.index.values.IndexDocValues.Source;
> +import org.apache.lucene.store.Directory;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.LuceneTestCase;
> +import org.junit.Before;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with this
> + * work for additional information regarding copyright ownership. The ASF
> + * licenses this file to You under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
> + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
> + * License for the specific language governing permissions and limitations under
> + * the License.
> + */
> +public class TestTypePromotion extends LuceneTestCase {
> +  @Before
> +  public void setUp() throws Exception {
> +    super.setUp();
> +    assumeFalse("cannot work with preflex codec", CodecProvider.getDefault()
> +        .getDefaultFieldCodec().equals("PreFlex"));
> +  }
> +
> +  private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS,
> +      ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
> +      ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8);
> +
> +  private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32,
> +      ValueType.FLOAT_64);
> +
> +  private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of(
> +      ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT,
> +      ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF);
> +
> +  private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of(
> +      ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
> +
> +  public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random random) {
> +    ValueType[] array = typeEnum.toArray(new ValueType[0]);
> +    return array[random.nextInt(array.length)];
> +  }
> +
> +  private static enum TestType {
> +    Int, Float, Byte
> +  }
> +
> +  private void runTest(EnumSet<ValueType> types, TestType type)
> +      throws CorruptIndexException, IOException {
> +    Directory dir = newDirectory();
> +    IndexWriter writer = new IndexWriter(dir,
> +        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> +    int num_1 = atLeast(200);
> +    int num_2 = atLeast(200);
> +    int num_3 = atLeast(200);
> +    long[] values = new long[num_1 + num_2 + num_3];
> +    index(writer, new IndexDocValuesField("promote"),
> +        randomValueType(types, random), values, 0, num_1);
> +    writer.commit();
> +
> +    index(writer, new IndexDocValuesField("promote"),
> +        randomValueType(types, random), values, num_1, num_2);
> +    writer.commit();
> +
> +    if (random.nextInt(4) == 0) {
> +      // once in a while use addIndexes
> +      writer.optimize();
> +
> +      Directory dir_2 = newDirectory() ;
> +      IndexWriter writer_2 = new IndexWriter(dir_2,
> +          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> +      index(writer_2, new IndexDocValuesField("promote"),
> +          randomValueType(types, random), values, num_1 + num_2, num_3);
> +      writer_2.commit();
> +      writer_2.close();
> +      if (random.nextBoolean()) {
> +        writer.addIndexes(dir_2);
> +      } else {
> +        // do a real merge here
> +        IndexReader open = IndexReader.open(dir_2);
> +        writer.addIndexes(open);
> +        open.close();
> +      }
> +      dir_2.close();
> +    } else {
> +      index(writer, new IndexDocValuesField("promote"),
> +          randomValueType(types, random), values, num_1 + num_2, num_3);
> +    }
> +
> +    writer.optimize();
> +    writer.close();
> +    assertValues(type, dir, values);
> +    dir.close();
> +  }
> +
> +  private void assertValues(TestType type, Directory dir, long[] values)
> +      throws CorruptIndexException, IOException {
> +    IndexReader reader = IndexReader.open(dir);
> +    assertTrue(reader.isOptimized());
> +    ReaderContext topReaderContext = reader.getTopReaderContext();
> +    ReaderContext[] children = topReaderContext.children();
> +    IndexDocValues docValues = children[0].reader.docValues("promote");
> +    assertEquals(1, children.length);
> +    Source directSource = docValues.getDirectSource();
> +    for (int i = 0; i < values.length; i++) {
> +      int id = Integer.parseInt(reader.document(i).get("id"));
> +      String msg = "id: " + id + " doc: " + i;
> +      switch (type) {
> +      case Byte:
> +        BytesRef bytes = directSource.getBytes(i, new BytesRef());
> +        long value = 0;
> +        switch(bytes.length) {
> +        case 1:
> +          value = bytes.bytes[bytes.offset];
> +          break;
> +        case 2:
> +          value = bytes.asShort();
> +          break;
> +        case 4:
> +          value = bytes.asInt();
> +          break;
> +        case 8:
> +          value = bytes.asLong();
> +          break;
> +
> +        default:
> +          fail(msg + " bytessize: " + bytes.length);
> +        }
> +
> +        assertEquals(msg  + " byteSize: " + bytes.length, values[id], value);
> +        break;
> +      case Float:
> +          assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i)));
> +        break;
> +      case Int:
> +        assertEquals(msg, values[id], directSource.getInt(i));
> +      default:
> +        break;
> +      }
> +
> +    }
> +    docValues.close();
> +    reader.close();
> +  }
> +
> +  public void index(IndexWriter writer, IndexDocValuesField valField,
> +      ValueType valueType, long[] values, int offset, int num)
> +      throws CorruptIndexException, IOException {
> +    BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 });
> +    for (int i = offset; i < offset + num; i++) {
> +      Document doc = new Document();
> +      doc.add(new Field("id", i + "", TextField.TYPE_STORED));
> +      switch (valueType) {
> +      case VAR_INTS:
> +        values[i] = random.nextInt();
> +        valField.setInt(values[i]);
> +        break;
> +      case FIXED_INTS_16:
> +        values[i] = random.nextInt(Short.MAX_VALUE);
> +        valField.setInt((short) values[i], true);
> +        break;
> +      case FIXED_INTS_32:
> +        values[i] = random.nextInt();
> +        valField.setInt((int) values[i], true);
> +        break;
> +      case FIXED_INTS_64:
> +        values[i] = random.nextLong();
> +        valField.setInt(values[i], true);
> +        break;
> +      case FLOAT_64:
> +        double nextDouble = random.nextDouble();
> +        values[i] = Double.doubleToRawLongBits(nextDouble);
> +        valField.setFloat(nextDouble);
> +        break;
> +      case FLOAT_32:
> +        final float nextFloat = random.nextFloat();
> +        values[i] = Double.doubleToRawLongBits(nextFloat);
> +        valField.setFloat(nextFloat);
> +        break;
> +      case FIXED_INTS_8:
> +         values[i] = (byte) i;
> +        valField.setInt((byte)values[i], true);
> +        break;
> +      case BYTES_FIXED_DEREF:
> +      case BYTES_FIXED_SORTED:
> +      case BYTES_FIXED_STRAIGHT:
> +        values[i] = random.nextLong();
> +        ref.copy(values[i]);
> +        valField.setBytes(ref, valueType);
> +        break;
> +      case BYTES_VAR_DEREF:
> +      case BYTES_VAR_SORTED:
> +      case BYTES_VAR_STRAIGHT:
> +        if (random.nextBoolean()) {
> +          ref.copy(random.nextInt());
> +          values[i] = ref.asInt();
> +        } else {
> +          ref.copy(random.nextLong());
> +          values[i] = ref.asLong();
> +        }
> +        valField.setBytes(ref, valueType);
> +        break;
> +
> +      default:
> +        fail("unexpected value " + valueType);
> +
> +      }
> +      doc.add(valField);
> +      writer.addDocument(doc);
> +      if (random.nextInt(10) == 0) {
> +        writer.commit();
> +      }
> +    }
> +  }
> +
> +  public void testPromoteBytes() throws IOException {
> +    runTest(UNSORTED_BYTES, TestType.Byte);
> +  }
> +
> +  public void testSortedPromoteBytes() throws IOException {
> +    runTest(SORTED_BYTES, TestType.Byte);
> +  }
> +
> +  public void testPromotInteger() throws IOException {
> +    runTest(INTEGERS, TestType.Int);
> +  }
> +
> +  public void testPromotFloatingPoint() throws CorruptIndexException,
> +      IOException {
> +    runTest(FLOATS, TestType.Float);
> +  }
> +
> +  public void testMergeIncompatibleTypes() throws IOException {
> +    Directory dir = newDirectory();
> +    IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
> +    writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
> +    IndexWriter writer = new IndexWriter(dir, writerConfig);
> +    int num_1 = atLeast(200);
> +    int num_2 = atLeast(200);
> +    long[] values = new long[num_1 + num_2];
> +    index(writer, new IndexDocValuesField("promote"),
> +        randomValueType(INTEGERS, random), values, 0, num_1);
> +    writer.commit();
> +
> +    if (random.nextInt(4) == 0) {
> +      // once in a while use addIndexes
> +      Directory dir_2 = newDirectory() ;
> +      IndexWriter writer_2 = new IndexWriter(dir_2,
> +          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> +      index(writer_2, new IndexDocValuesField("promote"),
> +          randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
> +      writer_2.commit();
> +      writer_2.close();
> +      if (random.nextBoolean()) {
> +        writer.addIndexes(dir_2);
> +      } else {
> +        // do a real merge here
> +        IndexReader open = IndexReader.open(dir_2);
> +        writer.addIndexes(open);
> +        open.close();
> +      }
> +      dir_2.close();
> +    } else {
> +      index(writer, new IndexDocValuesField("promote"),
> +          randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
> +      writer.commit();
> +    }
> +    writer.close();
> +    writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
> +    if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
> +      writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we optimize to one segment (merge everything together)
> +    }
> +    writer = new IndexWriter(dir, writerConfig);
> +    // now optimize
> +    writer.optimize();
> +    writer.close();
> +    IndexReader reader = IndexReader.open(dir);
> +    assertTrue(reader.isOptimized());
> +    ReaderContext topReaderContext = reader.getTopReaderContext();
> +    ReaderContext[] children = topReaderContext.children();
> +    IndexDocValues docValues = children[0].reader.docValues("promote");
> +    assertNotNull(docValues);
> +    assertValues(TestType.Byte, dir, values);
> +    assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type());
> +    reader.close();
> +    dir.close();
> +  }
> +
> +}
> \ No newline at end of file
>
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org