You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/10 20:05:18 UTC
svn commit: r1181104 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/index/values/TypePromoter.java
test/org/apache/lucene/index/values/TestTypePromotion.java
Author: rmuir
Date: Mon Oct 10 18:05:18 2011
New Revision: 1181104
URL: http://svn.apache.org/viewvc?rev=1181104&view=rev
Log:
LUCENE-3186: svn add
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (with props)
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java?rev=1181104&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java Mon Oct 10 18:05:18 2011
@@ -0,0 +1,204 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Type promoter that promotes {@link IndexDocValues} during merge based on
+ * their {@link ValueType} and {@link #getValueSize()}
+ *
+ * @lucene.internal
+ */
+public class TypePromoter {
+
+ private final static Map<Integer, ValueType> FLAGS_MAP = new HashMap<Integer, ValueType>();
+ private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
+ public static final int VAR_TYPE_VALUE_SIZE = -1;
+
+ private static final int IS_INT = 1 << 0;
+ private static final int IS_BYTE = 1 << 1;
+ private static final int IS_FLOAT = 1 << 2;
+ /* VAR & FIXED == VAR */
+ private static final int IS_VAR = 1 << 3;
+ private static final int IS_FIXED = 1 << 3 | 1 << 4;
+ /* if we have FIXED & FIXED with different size we promote to VAR */
+ private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3);
+ /* STRAIGHT & DEREF == STRAIGHT (dense values win) */
+ private static final int IS_STRAIGHT = 1 << 5;
+ private static final int IS_DEREF = 1 << 5 | 1 << 6;
+ private static final int IS_SORTED = 1 << 7;
+ /* more bits wins (int16 & int32 == int32) */
+ private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11;
+ private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11;
+ private static final int IS_32_BIT = 1 << 10 | 1 << 11;
+ private static final int IS_64_BIT = 1 << 11;
+
+ private final ValueType type;
+ private final int flags;
+ private final int valueSize;
+
+ /**
+ * Returns a positive value size if this {@link TypePromoter} represents a
+ * fixed variant, otherwise <code>-1</code>
+ *
+ * @return a positive value size if this {@link TypePromoter} represents a
+ * fixed variant, otherwise <code>-1</code>
+ */
+ public int getValueSize() {
+ return valueSize;
+ }
+
+ static {
+ for (ValueType type : ValueType.values()) {
+ TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE);
+ FLAGS_MAP.put(create.flags, type);
+ }
+ }
+
+ /**
+ * Creates a new {@link TypePromoter}
+ *
+ * @param type
+ * the {@link ValueType} this promoter represents
+ * @param flags
+ * the promoters flags
+ * @param valueSize
+ * the value size if {@link #IS_FIXED} or <code>-1</code> otherwise.
+ */
+ protected TypePromoter(ValueType type, int flags, int valueSize) {
+ this.type = type;
+ this.flags = flags;
+ this.valueSize = valueSize;
+ }
+
+ /**
+ * Creates a new promoted {@link TypePromoter} based on this and the given
+ * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter}
+ * aren't compatible.
+ *
+ * @param promoter
+ * the incoming promoter
+ * @return a new promoted {@link TypePromoter} based on this and the given
+ * {@link TypePromoter} or <code>null</code> iff the
+ * {@link TypePromoter} aren't compatible.
+ */
+ public TypePromoter promote(TypePromoter promoter) {
+
+ int promotedFlags = promoter.flags & this.flags;
+ TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
+ if (promoted == null) {
+ return promoted;
+ }
+ if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == IS_FIXED) {
+ if (this.valueSize == promoter.valueSize) {
+ return promoted;
+ }
+ return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
+ VAR_TYPE_VALUE_SIZE);
+ }
+ return promoted;
+
+ }
+
+ /**
+ * Returns the {@link ValueType} of this {@link TypePromoter}
+ *
+ * @return the {@link ValueType} of this {@link TypePromoter}
+ */
+ public ValueType type() {
+ return type;
+ }
+
+ @Override
+ public String toString() {
+ return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
+ }
+
+ /**
+ * Creates a new {@link TypePromoter} for the given type and size per value.
+ *
+ * @param type
+ * the {@link ValueType} to create the promoter for
+ * @param valueSize
+ * the size per value in bytes or <code>-1</code> iff the types have
+ * variable length.
+ * @return a new {@link TypePromoter}
+ */
+ public static TypePromoter create(ValueType type, int valueSize) {
+ if (type == null) {
+ return null;
+ }
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
+ case BYTES_FIXED_SORTED:
+ return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
+ case BYTES_FIXED_STRAIGHT:
+ return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
+ case BYTES_VAR_DEREF:
+ return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE);
+ case BYTES_VAR_SORTED:
+ return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE);
+ case BYTES_VAR_STRAIGHT:
+ return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
+ case FIXED_INTS_16:
+ return new TypePromoter(type,
+ IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize);
+ case FIXED_INTS_32:
+ return new TypePromoter(type,
+ IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize);
+ case FIXED_INTS_64:
+ return new TypePromoter(type,
+ IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize);
+ case FIXED_INTS_8:
+ return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
+ valueSize);
+ case FLOAT_32:
+ return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
+ | IS_32_BIT, valueSize);
+ case FLOAT_64:
+ return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
+ | IS_64_BIT, valueSize);
+ case VAR_INTS:
+ return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
+ default:
+ throw new IllegalStateException();
+ }
+ }
+
+ /**
+ * Returns a {@link TypePromoter} that always promotes to the type provided to
+ * {@link #promote(TypePromoter)}
+ */
+ public static TypePromoter getIdentityPromoter() {
+ return IDENTITY_PROMOTER;
+ }
+
+ private static class IdentityTypePromoter extends TypePromoter {
+
+ public IdentityTypePromoter() {
+ super(null, 0, -1);
+ }
+
+ @Override
+ public TypePromoter promote(TypePromoter promoter) {
+ return promoter;
+ }
+ }
+}
\ No newline at end of file
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1181104&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java Mon Oct 10 18:05:18 2011
@@ -0,0 +1,313 @@
+package org.apache.lucene.index.values;
+
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.Random;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IndexDocValuesField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.ReaderContext;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.IndexDocValues.Source;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Before;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+public class TestTypePromotion extends LuceneTestCase {
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ assumeFalse("cannot work with preflex codec", CodecProvider.getDefault()
+ .getDefaultFieldCodec().equals("PreFlex"));
+ }
+
+ private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS,
+ ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
+ ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8);
+
+ private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32,
+ ValueType.FLOAT_64);
+
+ private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of(
+ ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT,
+ ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF);
+
+ private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of(
+ ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
+
+ public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random random) {
+ ValueType[] array = typeEnum.toArray(new ValueType[0]);
+ return array[random.nextInt(array.length)];
+ }
+
+ private static enum TestType {
+ Int, Float, Byte
+ }
+
+ private void runTest(EnumSet<ValueType> types, TestType type)
+ throws CorruptIndexException, IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ int num_1 = atLeast(200);
+ int num_2 = atLeast(200);
+ int num_3 = atLeast(200);
+ long[] values = new long[num_1 + num_2 + num_3];
+ index(writer, new IndexDocValuesField("promote"),
+ randomValueType(types, random), values, 0, num_1);
+ writer.commit();
+
+ index(writer, new IndexDocValuesField("promote"),
+ randomValueType(types, random), values, num_1, num_2);
+ writer.commit();
+
+ if (random.nextInt(4) == 0) {
+ // once in a while use addIndexes
+ writer.optimize();
+
+ Directory dir_2 = newDirectory() ;
+ IndexWriter writer_2 = new IndexWriter(dir_2,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ index(writer_2, new IndexDocValuesField("promote"),
+ randomValueType(types, random), values, num_1 + num_2, num_3);
+ writer_2.commit();
+ writer_2.close();
+ if (random.nextBoolean()) {
+ writer.addIndexes(dir_2);
+ } else {
+ // do a real merge here
+ IndexReader open = IndexReader.open(dir_2);
+ writer.addIndexes(open);
+ open.close();
+ }
+ dir_2.close();
+ } else {
+ index(writer, new IndexDocValuesField("promote"),
+ randomValueType(types, random), values, num_1 + num_2, num_3);
+ }
+
+ writer.optimize();
+ writer.close();
+ assertValues(type, dir, values);
+ dir.close();
+ }
+
+ private void assertValues(TestType type, Directory dir, long[] values)
+ throws CorruptIndexException, IOException {
+ IndexReader reader = IndexReader.open(dir);
+ assertTrue(reader.isOptimized());
+ ReaderContext topReaderContext = reader.getTopReaderContext();
+ ReaderContext[] children = topReaderContext.children();
+ IndexDocValues docValues = children[0].reader.docValues("promote");
+ assertEquals(1, children.length);
+ Source directSource = docValues.getDirectSource();
+ for (int i = 0; i < values.length; i++) {
+ int id = Integer.parseInt(reader.document(i).get("id"));
+ String msg = "id: " + id + " doc: " + i;
+ switch (type) {
+ case Byte:
+ BytesRef bytes = directSource.getBytes(i, new BytesRef());
+ long value = 0;
+ switch(bytes.length) {
+ case 1:
+ value = bytes.bytes[bytes.offset];
+ break;
+ case 2:
+ value = bytes.asShort();
+ break;
+ case 4:
+ value = bytes.asInt();
+ break;
+ case 8:
+ value = bytes.asLong();
+ break;
+
+ default:
+ fail(msg + " bytessize: " + bytes.length);
+ }
+
+ assertEquals(msg + " byteSize: " + bytes.length, values[id], value);
+ break;
+ case Float:
+ assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i)));
+ break;
+ case Int:
+ assertEquals(msg, values[id], directSource.getInt(i));
+ default:
+ break;
+ }
+
+ }
+ docValues.close();
+ reader.close();
+ }
+
+ public void index(IndexWriter writer, IndexDocValuesField valField,
+ ValueType valueType, long[] values, int offset, int num)
+ throws CorruptIndexException, IOException {
+ BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 });
+ for (int i = offset; i < offset + num; i++) {
+ Document doc = new Document();
+ doc.add(new Field("id", i + "", TextField.TYPE_STORED));
+ switch (valueType) {
+ case VAR_INTS:
+ values[i] = random.nextInt();
+ valField.setInt(values[i]);
+ break;
+ case FIXED_INTS_16:
+ values[i] = random.nextInt(Short.MAX_VALUE);
+ valField.setInt((short) values[i], true);
+ break;
+ case FIXED_INTS_32:
+ values[i] = random.nextInt();
+ valField.setInt((int) values[i], true);
+ break;
+ case FIXED_INTS_64:
+ values[i] = random.nextLong();
+ valField.setInt(values[i], true);
+ break;
+ case FLOAT_64:
+ double nextDouble = random.nextDouble();
+ values[i] = Double.doubleToRawLongBits(nextDouble);
+ valField.setFloat(nextDouble);
+ break;
+ case FLOAT_32:
+ final float nextFloat = random.nextFloat();
+ values[i] = Double.doubleToRawLongBits(nextFloat);
+ valField.setFloat(nextFloat);
+ break;
+ case FIXED_INTS_8:
+ values[i] = (byte) i;
+ valField.setInt((byte)values[i], true);
+ break;
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ values[i] = random.nextLong();
+ ref.copy(values[i]);
+ valField.setBytes(ref, valueType);
+ break;
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ if (random.nextBoolean()) {
+ ref.copy(random.nextInt());
+ values[i] = ref.asInt();
+ } else {
+ ref.copy(random.nextLong());
+ values[i] = ref.asLong();
+ }
+ valField.setBytes(ref, valueType);
+ break;
+
+ default:
+ fail("unexpected value " + valueType);
+
+ }
+ doc.add(valField);
+ writer.addDocument(doc);
+ if (random.nextInt(10) == 0) {
+ writer.commit();
+ }
+ }
+ }
+
+ public void testPromoteBytes() throws IOException {
+ runTest(UNSORTED_BYTES, TestType.Byte);
+ }
+
+ public void testSortedPromoteBytes() throws IOException {
+ runTest(SORTED_BYTES, TestType.Byte);
+ }
+
+ public void testPromotInteger() throws IOException {
+ runTest(INTEGERS, TestType.Int);
+ }
+
+ public void testPromotFloatingPoint() throws CorruptIndexException,
+ IOException {
+ runTest(FLOATS, TestType.Float);
+ }
+
+ public void testMergeIncompatibleTypes() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+ writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
+ IndexWriter writer = new IndexWriter(dir, writerConfig);
+ int num_1 = atLeast(200);
+ int num_2 = atLeast(200);
+ long[] values = new long[num_1 + num_2];
+ index(writer, new IndexDocValuesField("promote"),
+ randomValueType(INTEGERS, random), values, 0, num_1);
+ writer.commit();
+
+ if (random.nextInt(4) == 0) {
+ // once in a while use addIndexes
+ Directory dir_2 = newDirectory() ;
+ IndexWriter writer_2 = new IndexWriter(dir_2,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ index(writer_2, new IndexDocValuesField("promote"),
+ randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
+ writer_2.commit();
+ writer_2.close();
+ if (random.nextBoolean()) {
+ writer.addIndexes(dir_2);
+ } else {
+ // do a real merge here
+ IndexReader open = IndexReader.open(dir_2);
+ writer.addIndexes(open);
+ open.close();
+ }
+ dir_2.close();
+ } else {
+ index(writer, new IndexDocValuesField("promote"),
+ randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
+ writer.commit();
+ }
+ writer.close();
+ writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+ if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
+ writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we optimize to one segment (merge everything together)
+ }
+ writer = new IndexWriter(dir, writerConfig);
+ // now optimize
+ writer.optimize();
+ writer.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertTrue(reader.isOptimized());
+ ReaderContext topReaderContext = reader.getTopReaderContext();
+ ReaderContext[] children = topReaderContext.children();
+ IndexDocValues docValues = children[0].reader.docValues("promote");
+ assertNotNull(docValues);
+ assertValues(TestType.Byte, dir, values);
+ assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type());
+ reader.close();
+ dir.close();
+ }
+
+}
\ No newline at end of file
Re: svn commit: r1181104 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/index/values/TypePromoter.java test/org/apache/lucene/index/values/TestTypePromotion.java
Posted by Simon Willnauer <si...@googlemail.com>.
ah crap! thanks robert!
On Mon, Oct 10, 2011 at 8:05 PM, <rm...@apache.org> wrote:
> Author: rmuir
> Date: Mon Oct 10 18:05:18 2011
> New Revision: 1181104
>
> URL: http://svn.apache.org/viewvc?rev=1181104&view=rev
> Log:
> LUCENE-3186: svn add
>
> Added:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (with props)
> lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (with props)
>
> Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java?rev=1181104&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (added)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java Mon Oct 10 18:05:18 2011
> @@ -0,0 +1,204 @@
> +package org.apache.lucene.index.values;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +import java.util.HashMap;
> +import java.util.Map;
> +
> +/**
> + * Type promoter that promotes {@link IndexDocValues} during merge based on
> + * their {@link ValueType} and {@link #getValueSize()}
> + *
> + * @lucene.internal
> + */
> +public class TypePromoter {
> +
> + private final static Map<Integer, ValueType> FLAGS_MAP = new HashMap<Integer, ValueType>();
> + private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
> + public static final int VAR_TYPE_VALUE_SIZE = -1;
> +
> + private static final int IS_INT = 1 << 0;
> + private static final int IS_BYTE = 1 << 1;
> + private static final int IS_FLOAT = 1 << 2;
> + /* VAR & FIXED == VAR */
> + private static final int IS_VAR = 1 << 3;
> + private static final int IS_FIXED = 1 << 3 | 1 << 4;
> + /* if we have FIXED & FIXED with different size we promote to VAR */
> + private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3);
> + /* STRAIGHT & DEREF == STRAIGHT (dense values win) */
> + private static final int IS_STRAIGHT = 1 << 5;
> + private static final int IS_DEREF = 1 << 5 | 1 << 6;
> + private static final int IS_SORTED = 1 << 7;
> + /* more bits wins (int16 & int32 == int32) */
> + private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11;
> + private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11;
> + private static final int IS_32_BIT = 1 << 10 | 1 << 11;
> + private static final int IS_64_BIT = 1 << 11;
> +
> + private final ValueType type;
> + private final int flags;
> + private final int valueSize;
> +
> + /**
> + * Returns a positive value size if this {@link TypePromoter} represents a
> + * fixed variant, otherwise <code>-1</code>
> + *
> + * @return a positive value size if this {@link TypePromoter} represents a
> + * fixed variant, otherwise <code>-1</code>
> + */
> + public int getValueSize() {
> + return valueSize;
> + }
> +
> + static {
> + for (ValueType type : ValueType.values()) {
> + TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE);
> + FLAGS_MAP.put(create.flags, type);
> + }
> + }
> +
> + /**
> + * Creates a new {@link TypePromoter}
> + *
> + * @param type
> + * the {@link ValueType} this promoter represents
> + * @param flags
> + * the promoters flags
> + * @param valueSize
> + * the value size if {@link #IS_FIXED} or <code>-1</code> otherwise.
> + */
> + protected TypePromoter(ValueType type, int flags, int valueSize) {
> + this.type = type;
> + this.flags = flags;
> + this.valueSize = valueSize;
> + }
> +
> + /**
> + * Creates a new promoted {@link TypePromoter} based on this and the given
> + * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter}
> + * aren't compatible.
> + *
> + * @param promoter
> + * the incoming promoter
> + * @return a new promoted {@link TypePromoter} based on this and the given
> + * {@link TypePromoter} or <code>null</code> iff the
> + * {@link TypePromoter} aren't compatible.
> + */
> + public TypePromoter promote(TypePromoter promoter) {
> +
> + int promotedFlags = promoter.flags & this.flags;
> + TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
> + if (promoted == null) {
> + return promoted;
> + }
> + if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) == IS_FIXED) {
> + if (this.valueSize == promoter.valueSize) {
> + return promoted;
> + }
> + return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
> + VAR_TYPE_VALUE_SIZE);
> + }
> + return promoted;
> +
> + }
> +
> + /**
> + * Returns the {@link ValueType} of this {@link TypePromoter}
> + *
> + * @return the {@link ValueType} of this {@link TypePromoter}
> + */
> + public ValueType type() {
> + return type;
> + }
> +
> + @Override
> + public String toString() {
> + return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
> + }
> +
> + /**
> + * Creates a new {@link TypePromoter} for the given type and size per value.
> + *
> + * @param type
> + * the {@link ValueType} to create the promoter for
> + * @param valueSize
> + * the size per value in bytes or <code>-1</code> iff the types have
> + * variable length.
> + * @return a new {@link TypePromoter}
> + */
> + public static TypePromoter create(ValueType type, int valueSize) {
> + if (type == null) {
> + return null;
> + }
> + switch (type) {
> + case BYTES_FIXED_DEREF:
> + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
> + case BYTES_FIXED_SORTED:
> + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
> + case BYTES_FIXED_STRAIGHT:
> + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
> + case BYTES_VAR_DEREF:
> + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE);
> + case BYTES_VAR_SORTED:
> + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE);
> + case BYTES_VAR_STRAIGHT:
> + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
> + case FIXED_INTS_16:
> + return new TypePromoter(type,
> + IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize);
> + case FIXED_INTS_32:
> + return new TypePromoter(type,
> + IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize);
> + case FIXED_INTS_64:
> + return new TypePromoter(type,
> + IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize);
> + case FIXED_INTS_8:
> + return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
> + valueSize);
> + case FLOAT_32:
> + return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
> + | IS_32_BIT, valueSize);
> + case FLOAT_64:
> + return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
> + | IS_64_BIT, valueSize);
> + case VAR_INTS:
> + return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
> + default:
> + throw new IllegalStateException();
> + }
> + }
> +
> + /**
> + * Returns a {@link TypePromoter} that always promotes to the type provided to
> + * {@link #promote(TypePromoter)}
> + */
> + public static TypePromoter getIdentityPromoter() {
> + return IDENTITY_PROMOTER;
> + }
> +
> + private static class IdentityTypePromoter extends TypePromoter {
> +
> + public IdentityTypePromoter() {
> + super(null, 0, -1);
> + }
> +
> + @Override
> + public TypePromoter promote(TypePromoter promoter) {
> + return promoter;
> + }
> + }
> +}
> \ No newline at end of file
>
> Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1181104&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (added)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java Mon Oct 10 18:05:18 2011
> @@ -0,0 +1,313 @@
> +package org.apache.lucene.index.values;
> +
> +import java.io.IOException;
> +import java.util.EnumSet;
> +import java.util.Random;
> +
> +import org.apache.lucene.analysis.MockAnalyzer;
> +import org.apache.lucene.document.Document;
> +import org.apache.lucene.document.Field;
> +import org.apache.lucene.document.IndexDocValuesField;
> +import org.apache.lucene.document.TextField;
> +import org.apache.lucene.index.CorruptIndexException;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.IndexReader.ReaderContext;
> +import org.apache.lucene.index.IndexWriter;
> +import org.apache.lucene.index.IndexWriterConfig;
> +import org.apache.lucene.index.NoMergePolicy;
> +import org.apache.lucene.index.codecs.CodecProvider;
> +import org.apache.lucene.index.values.IndexDocValues.Source;
> +import org.apache.lucene.store.Directory;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.LuceneTestCase;
> +import org.junit.Before;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with this
> + * work for additional information regarding copyright ownership. The ASF
> + * licenses this file to You under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
> + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
> + * License for the specific language governing permissions and limitations under
> + * the License.
> + */
> +public class TestTypePromotion extends LuceneTestCase {
> + @Before
> + public void setUp() throws Exception {
> + super.setUp();
> + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault()
> + .getDefaultFieldCodec().equals("PreFlex"));
> + }
> +
> + private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS,
> + ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
> + ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8);
> +
> + private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32,
> + ValueType.FLOAT_64);
> +
> + private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of(
> + ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT,
> + ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF);
> +
> + private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of(
> + ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
> +
> + public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random random) {
> + ValueType[] array = typeEnum.toArray(new ValueType[0]);
> + return array[random.nextInt(array.length)];
> + }
> +
> + private static enum TestType {
> + Int, Float, Byte
> + }
> +
> + private void runTest(EnumSet<ValueType> types, TestType type)
> + throws CorruptIndexException, IOException {
> + Directory dir = newDirectory();
> + IndexWriter writer = new IndexWriter(dir,
> + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> + int num_1 = atLeast(200);
> + int num_2 = atLeast(200);
> + int num_3 = atLeast(200);
> + long[] values = new long[num_1 + num_2 + num_3];
> + index(writer, new IndexDocValuesField("promote"),
> + randomValueType(types, random), values, 0, num_1);
> + writer.commit();
> +
> + index(writer, new IndexDocValuesField("promote"),
> + randomValueType(types, random), values, num_1, num_2);
> + writer.commit();
> +
> + if (random.nextInt(4) == 0) {
> + // once in a while use addIndexes
> + writer.optimize();
> +
> + Directory dir_2 = newDirectory() ;
> + IndexWriter writer_2 = new IndexWriter(dir_2,
> + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> + index(writer_2, new IndexDocValuesField("promote"),
> + randomValueType(types, random), values, num_1 + num_2, num_3);
> + writer_2.commit();
> + writer_2.close();
> + if (random.nextBoolean()) {
> + writer.addIndexes(dir_2);
> + } else {
> + // do a real merge here
> + IndexReader open = IndexReader.open(dir_2);
> + writer.addIndexes(open);
> + open.close();
> + }
> + dir_2.close();
> + } else {
> + index(writer, new IndexDocValuesField("promote"),
> + randomValueType(types, random), values, num_1 + num_2, num_3);
> + }
> +
> + writer.optimize();
> + writer.close();
> + assertValues(type, dir, values);
> + dir.close();
> + }
> +
> + private void assertValues(TestType type, Directory dir, long[] values)
> + throws CorruptIndexException, IOException {
> + IndexReader reader = IndexReader.open(dir);
> + assertTrue(reader.isOptimized());
> + ReaderContext topReaderContext = reader.getTopReaderContext();
> + ReaderContext[] children = topReaderContext.children();
> + IndexDocValues docValues = children[0].reader.docValues("promote");
> + assertEquals(1, children.length);
> + Source directSource = docValues.getDirectSource();
> + for (int i = 0; i < values.length; i++) {
> + int id = Integer.parseInt(reader.document(i).get("id"));
> + String msg = "id: " + id + " doc: " + i;
> + switch (type) {
> + case Byte:
> + BytesRef bytes = directSource.getBytes(i, new BytesRef());
> + long value = 0;
> + switch(bytes.length) {
> + case 1:
> + value = bytes.bytes[bytes.offset];
> + break;
> + case 2:
> + value = bytes.asShort();
> + break;
> + case 4:
> + value = bytes.asInt();
> + break;
> + case 8:
> + value = bytes.asLong();
> + break;
> +
> + default:
> + fail(msg + " bytessize: " + bytes.length);
> + }
> +
> + assertEquals(msg + " byteSize: " + bytes.length, values[id], value);
> + break;
> + case Float:
> + assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i)));
> + break;
> + case Int:
> + assertEquals(msg, values[id], directSource.getInt(i));
> + default:
> + break;
> + }
> +
> + }
> + docValues.close();
> + reader.close();
> + }
> +
> + public void index(IndexWriter writer, IndexDocValuesField valField,
> + ValueType valueType, long[] values, int offset, int num)
> + throws CorruptIndexException, IOException {
> + BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 });
> + for (int i = offset; i < offset + num; i++) {
> + Document doc = new Document();
> + doc.add(new Field("id", i + "", TextField.TYPE_STORED));
> + switch (valueType) {
> + case VAR_INTS:
> + values[i] = random.nextInt();
> + valField.setInt(values[i]);
> + break;
> + case FIXED_INTS_16:
> + values[i] = random.nextInt(Short.MAX_VALUE);
> + valField.setInt((short) values[i], true);
> + break;
> + case FIXED_INTS_32:
> + values[i] = random.nextInt();
> + valField.setInt((int) values[i], true);
> + break;
> + case FIXED_INTS_64:
> + values[i] = random.nextLong();
> + valField.setInt(values[i], true);
> + break;
> + case FLOAT_64:
> + double nextDouble = random.nextDouble();
> + values[i] = Double.doubleToRawLongBits(nextDouble);
> + valField.setFloat(nextDouble);
> + break;
> + case FLOAT_32:
> + final float nextFloat = random.nextFloat();
> + values[i] = Double.doubleToRawLongBits(nextFloat);
> + valField.setFloat(nextFloat);
> + break;
> + case FIXED_INTS_8:
> + values[i] = (byte) i;
> + valField.setInt((byte)values[i], true);
> + break;
> + case BYTES_FIXED_DEREF:
> + case BYTES_FIXED_SORTED:
> + case BYTES_FIXED_STRAIGHT:
> + values[i] = random.nextLong();
> + ref.copy(values[i]);
> + valField.setBytes(ref, valueType);
> + break;
> + case BYTES_VAR_DEREF:
> + case BYTES_VAR_SORTED:
> + case BYTES_VAR_STRAIGHT:
> + if (random.nextBoolean()) {
> + ref.copy(random.nextInt());
> + values[i] = ref.asInt();
> + } else {
> + ref.copy(random.nextLong());
> + values[i] = ref.asLong();
> + }
> + valField.setBytes(ref, valueType);
> + break;
> +
> + default:
> + fail("unexpected value " + valueType);
> +
> + }
> + doc.add(valField);
> + writer.addDocument(doc);
> + if (random.nextInt(10) == 0) {
> + writer.commit();
> + }
> + }
> + }
> +
> + public void testPromoteBytes() throws IOException {
> + runTest(UNSORTED_BYTES, TestType.Byte);
> + }
> +
> + public void testSortedPromoteBytes() throws IOException {
> + runTest(SORTED_BYTES, TestType.Byte);
> + }
> +
> + public void testPromotInteger() throws IOException {
> + runTest(INTEGERS, TestType.Int);
> + }
> +
> + public void testPromotFloatingPoint() throws CorruptIndexException,
> + IOException {
> + runTest(FLOATS, TestType.Float);
> + }
> +
> + public void testMergeIncompatibleTypes() throws IOException {
> + Directory dir = newDirectory();
> + IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
> + writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
> + IndexWriter writer = new IndexWriter(dir, writerConfig);
> + int num_1 = atLeast(200);
> + int num_2 = atLeast(200);
> + long[] values = new long[num_1 + num_2];
> + index(writer, new IndexDocValuesField("promote"),
> + randomValueType(INTEGERS, random), values, 0, num_1);
> + writer.commit();
> +
> + if (random.nextInt(4) == 0) {
> + // once in a while use addIndexes
> + Directory dir_2 = newDirectory() ;
> + IndexWriter writer_2 = new IndexWriter(dir_2,
> + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> + index(writer_2, new IndexDocValuesField("promote"),
> + randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
> + writer_2.commit();
> + writer_2.close();
> + if (random.nextBoolean()) {
> + writer.addIndexes(dir_2);
> + } else {
> + // do a real merge here
> + IndexReader open = IndexReader.open(dir_2);
> + writer.addIndexes(open);
> + open.close();
> + }
> + dir_2.close();
> + } else {
> + index(writer, new IndexDocValuesField("promote"),
> + randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2);
> + writer.commit();
> + }
> + writer.close();
> + writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
> + if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
> + writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we optimize to one segment (merge everything together)
> + }
> + writer = new IndexWriter(dir, writerConfig);
> + // now optimize
> + writer.optimize();
> + writer.close();
> + IndexReader reader = IndexReader.open(dir);
> + assertTrue(reader.isOptimized());
> + ReaderContext topReaderContext = reader.getTopReaderContext();
> + ReaderContext[] children = topReaderContext.children();
> + IndexDocValues docValues = children[0].reader.docValues("promote");
> + assertNotNull(docValues);
> + assertValues(TestType.Byte, dir, values);
> + assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type());
> + reader.close();
> + dir.close();
> + }
> +
> +}
> \ No newline at end of file
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org