You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/06/10 13:35:49 UTC
svn commit: r1601606 - in /lucene/dev/trunk/lucene: ./
codecs/src/test/org/apache/lucene/codecs/diskdv/
codecs/src/test/org/apache/lucene/codecs/simpletext/
core/src/java/org/apache/lucene/codecs/lucene49/
core/src/test/org/apache/lucene/codecs/lucene4...
Author: rmuir
Date: Tue Jun 10 11:35:48 2014
New Revision: 1601606
URL: http://svn.apache.org/r1601606
Log:
LUCENE-5743: Add Lucene49NormsFormat
Added:
lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java (with props)
lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java (with props)
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java (with props)
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java (with props)
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java (with props)
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java (with props)
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1601606&r1=1601605&r2=1601606&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jun 10 11:35:48 2014
@@ -129,6 +129,9 @@ New Features
from Directory. Add Lucene49Codec and Lucene49DocValuesFormat that make
use of these. (Robert Muir)
+* LUCENE-5743: Add Lucene49NormsFormat, which can compress in some cases
+ such as very short fields. (Ryan Ernst, Adrien Grand, Robert Muir)
+
Changes in Backwards Compatibility Policy
* LUCENE-5634: Add reuse argument to IndexableField.tokenStream. This
Added: lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java (added)
+++ lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskNormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,32 @@
+package org.apache.lucene.codecs.diskdv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
+import org.apache.lucene.index.BaseNormsFormatTestCase;
+
+/** Tests DiskNormsFormat */
+public class TestDiskNormsFormat extends BaseNormsFormatTestCase {
+ private final Codec codec = new CheapBastardCodec();
+
+ @Override
+ protected Codec getCodec() {
+ return codec;
+ }
+}
Added: lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java (added)
+++ lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextNormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,31 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseNormsFormatTestCase;
+
+/** Tests SimpleTextNormsFormat */
+public class TestSimpleTextNormsFormat extends BaseNormsFormatTestCase {
+ private final Codec codec = new SimpleTextCodec();
+
+ @Override
+ protected Codec getCodec() {
+ return codec;
+ }
+}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java?rev=1601606&r1=1601605&r2=1601606&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49Codec.java Tue Jun 10 11:35:48 2014
@@ -131,7 +131,7 @@ public class Lucene49Codec extends Codec
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene49");
- private final NormsFormat normsFormat = new Lucene42NormsFormat();
+ private final NormsFormat normsFormat = new Lucene49NormsFormat();
@Override
public final NormsFormat normsFormat() {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java?rev=1601606&r1=1601605&r2=1601606&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesConsumer.java Tue Jun 10 11:35:48 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene4
import java.io.Closeable; // javadocs
import java.io.IOException;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -199,6 +200,7 @@ public class Lucene49DocValuesConsumer e
break;
case TABLE_COMPRESSED:
final Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
+ Arrays.sort(decode);
final HashMap<Long,Integer> encode = new HashMap<>();
meta.writeVInt(decode.length);
for (int i = 0; i < decode.length; i++) {
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsConsumer.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,208 @@
+package org.apache.lucene.codecs.lucene49;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_CURRENT;
+
+/**
+ * Writer for {@link Lucene49NormsFormat}
+ */
+class Lucene49NormsConsumer extends DocValuesConsumer {
+ static final byte DELTA_COMPRESSED = 0;
+ static final byte TABLE_COMPRESSED = 1;
+ static final byte CONST_COMPRESSED = 2;
+ static final byte UNCOMPRESSED = 3;
+ static final int BLOCK_SIZE = 16384;
+
+ IndexOutput data, meta;
+ final int maxDoc;
+
+ Lucene49NormsConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+ maxDoc = state.segmentInfo.getDocCount();
+ boolean success = false;
+ try {
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ data = state.directory.createOutput(dataName, state.context);
+ CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+ String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+ meta = state.directory.createOutput(metaName, state.context);
+ CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this);
+ }
+ }
+ }
+
+ // we explicitly use only certain bits per value and a specified format, so we statically check this will work
+ static {
+ assert PackedInts.Format.PACKED_SINGLE_BLOCK.isSupported(1);
+ assert PackedInts.Format.PACKED_SINGLE_BLOCK.isSupported(2);
+ assert PackedInts.Format.PACKED_SINGLE_BLOCK.isSupported(4);
+ }
+
+ @Override
+ public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+ meta.writeVInt(field.number);
+ long minValue = Long.MAX_VALUE;
+ long maxValue = Long.MIN_VALUE;
+ // TODO: more efficient?
+ HashSet<Long> uniqueValues = null;
+ uniqueValues = new HashSet<>();
+
+ long count = 0;
+ for (Number nv : values) {
+ if (nv == null) {
+ throw new IllegalStateException("illegal norms data for field " + field.name + ", got null for value: " + count);
+ }
+ final long v = nv.longValue();
+
+ minValue = Math.min(minValue, v);
+ maxValue = Math.max(maxValue, v);
+
+ if (uniqueValues != null) {
+ if (uniqueValues.add(v)) {
+ if (uniqueValues.size() > 256) {
+ uniqueValues = null;
+ }
+ }
+ }
+ ++count;
+ }
+
+ if (count != maxDoc) {
+ throw new IllegalStateException("illegal norms data for field " + field.name + ", expected " + maxDoc + " values, got " + count);
+ }
+
+ if (uniqueValues != null && uniqueValues.size() == 1) {
+ // 0 bpv
+ meta.writeByte(CONST_COMPRESSED);
+ meta.writeLong(minValue);
+ } else if (uniqueValues != null) {
+ // small number of unique values: this is the typical case:
+ // we only use bpv=1,2,4,8
+ PackedInts.Format format = PackedInts.Format.PACKED_SINGLE_BLOCK;
+ int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
+ if (bitsPerValue == 3) {
+ bitsPerValue = 4;
+ } else if (bitsPerValue > 4) {
+ bitsPerValue = 8;
+ }
+
+ if (bitsPerValue == 8 && minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
+ meta.writeByte(UNCOMPRESSED); // uncompressed byte[]
+ meta.writeLong(data.getFilePointer());
+ for (Number nv : values) {
+ data.writeByte(nv == null ? 0 : (byte) nv.longValue());
+ }
+ } else {
+ meta.writeByte(TABLE_COMPRESSED); // table-compressed
+ meta.writeLong(data.getFilePointer());
+ data.writeVInt(PackedInts.VERSION_CURRENT);
+
+ Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
+ Arrays.sort(decode);
+ final HashMap<Long,Integer> encode = new HashMap<>();
+ // upgrade to power of two sized array
+ int size = 1 << bitsPerValue;
+ data.writeVInt(size);
+ for (int i = 0; i < decode.length; i++) {
+ data.writeLong(decode[i]);
+ encode.put(decode[i], i);
+ }
+ for (int i = decode.length; i < size; i++) {
+ data.writeLong(0);
+ }
+
+ data.writeVInt(format.getId());
+ data.writeVInt(bitsPerValue);
+
+ final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, format, maxDoc, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+ for(Number nv : values) {
+ writer.add(encode.get(nv.longValue()));
+ }
+ writer.finish();
+ }
+ } else {
+ meta.writeByte(DELTA_COMPRESSED); // delta-compressed
+ meta.writeLong(data.getFilePointer());
+ data.writeVInt(PackedInts.VERSION_CURRENT);
+ data.writeVInt(BLOCK_SIZE);
+
+ final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+ for (Number nv : values) {
+ writer.add(nv.longValue());
+ }
+ writer.finish();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ boolean success = false;
+ try {
+ if (meta != null) {
+ meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
+ }
+ if (data != null) {
+ CodecUtil.writeFooter(data); // write checksum
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(data, meta);
+ } else {
+ IOUtils.closeWhileHandlingException(data, meta);
+ }
+ meta = data = null;
+ }
+ }
+
+ @Override
+ public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+}
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,121 @@
+package org.apache.lucene.codecs.lucene49;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * Lucene 4.9 Score normalization format.
+ * <p>
+ * Encodes normalization values with these strategies:
+ * <p>
+ * <ul>
+ * <li>Uncompressed: when values fit into a single byte and would require more than 4 bits
+ * per value, they are just encoded as an uncompressed byte array.
+ * <li>Constant: when there is only one value present for the entire field, no actual data
+ * is written: this constant is encoded in the metadata
+ * <li>Table-compressed: when the number of unique values is very small (< 64), and
+ * when there are unused "gaps" in the range of values used (such as {@link SmallFloat}),
+ * a lookup table is written instead. Each per-document entry is instead the ordinal
+ * to this table, and those ordinals are compressed with bitpacking ({@link PackedInts}).
+ * <li>Delta-compressed: per-document integers written as deltas from the minimum value,
+ * compressed with bitpacking. For more information, see {@link BlockPackedWriter}.
+ * This is only used when norms of larger than one byte are present.
+ * </ul>
+ * <p>
+ * Files:
+ * <ol>
+ * <li><tt>.nvd</tt>: Norms data</li>
+ * <li><tt>.nvm</tt>: Norms metadata</li>
+ * </ol>
+ * <ol>
+ * <li><a name="nvm" id="nvm"></a>
+ * <p>The Norms metadata or .nvm file.</p>
+ * <p>For each norms field, this stores metadata, such as the offset into the
+ * Norms data (.nvd)</p>
+ * <p>Norms metadata (.dvm) --> Header,<Entry><sup>NumFields</sup>,Footer</p>
+ * <ul>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>Entry --> FieldNumber,Type,Offset</li>
+ * <li>FieldNumber --> {@link DataOutput#writeVInt vInt}</li>
+ * <li>Type --> {@link DataOutput#writeByte Byte}</li>
+ * <li>Offset --> {@link DataOutput#writeLong Int64}</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * <p>FieldNumber of -1 indicates the end of metadata.</p>
+ * <p>Offset is the pointer to the start of the data in the norms data (.nvd), or the singleton value for Constant</p>
+ * <p>Type indicates how Numeric values will be compressed:
+ * <ul>
+ * <li>0 --> delta-compressed. For each block of 16k integers, every integer is delta-encoded
+ * from the minimum value within the block.
+ * <li>1 --> table-compressed. When the number of unique numeric values is small and it would save space,
+ * a lookup table of unique values is written, followed by the ordinal for each document.
+ * <li>2 --> constant. When there is a single value for the entire field.
+ * <li>3 --> uncompressed: Values written as a simple byte[].
+ * </ul>
+ * <li><a name="nvd" id="nvd"></a>
+ * <p>The Norms data or .nvd file.</p>
+ * <p>For each Norms field, this stores the actual per-document data (the heavy-lifting)</p>
+ * <p>Norms data (.nvd) --> Header,<Uncompressed | TableCompressed | DeltaCompressed><sup>NumFields</sup>,Footer</p>
+ * <ul>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>Uncompressed --> {@link DataOutput#writeByte Byte}<sup>maxDoc</sup></li>
+ * <li>TableCompressed --> PackedIntsVersion,Table,BitPackedData</li>
+ * <li>Table --> TableSize, {@link DataOutput#writeLong int64}<sup>TableSize</sup></li>
+ * <li>BitpackedData --> {@link PackedInts}</li>
+ * <li>DeltaCompressed --> PackedIntsVersion,BlockSize,DeltaCompressedData</li>
+ * <li>DeltaCompressedData --> {@link BlockPackedWriter BlockPackedWriter(blockSize=16k)}</li>
+ * <li>PackedIntsVersion,BlockSize,TableSize --> {@link DataOutput#writeVInt vInt}</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
+ * </ul>
+ * </ol>
+ * @lucene.experimental
+ */
+public class Lucene49NormsFormat extends NormsFormat {
+
+ /** Sole Constructor */
+ public Lucene49NormsFormat() {}
+
+ @Override
+ public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
+ return new Lucene49NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+ }
+
+ @Override
+ public DocValuesProducer normsProducer(SegmentReadState state) throws IOException {
+ return new Lucene49NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+ }
+
+ private static final String DATA_CODEC = "Lucene49NormsData";
+ private static final String DATA_EXTENSION = "nvd";
+ private static final String METADATA_CODEC = "Lucene49NormsMetadata";
+ private static final String METADATA_EXTENSION = "nvm";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+}
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,233 @@
+package org.apache.lucene.codecs.lucene49;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.PackedInts;
+
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_START;
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsFormat.VERSION_CURRENT;
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.CONST_COMPRESSED;
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.TABLE_COMPRESSED;
+import static org.apache.lucene.codecs.lucene49.Lucene49NormsConsumer.UNCOMPRESSED;
+
+/**
+ * Reader for {@link Lucene49NormsFormat}
+ */
+class Lucene49NormsProducer extends DocValuesProducer {
+ // metadata maps (just file pointers and minimal stuff)
+ private final Map<Integer,NormsEntry> norms = new HashMap<>();
+ private final IndexInput data;
+ private final int version;
+
+ // ram instances we have already loaded
+ final Map<Integer,NumericDocValues> instances = new HashMap<>();
+
+ private final int maxDoc;
+ private final AtomicLong ramBytesUsed;
+
+ Lucene49NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
+ maxDoc = state.segmentInfo.getDocCount();
+ String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+ // read in the entries from the metadata file.
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
+ boolean success = false;
+ ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
+ try {
+ version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT);
+ readFields(in, state.fieldInfos);
+ CodecUtil.checkFooter(in);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(in);
+ } else {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ this.data = state.directory.openInput(dataName, state.context);
+ success = false;
+ try {
+ final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
+ if (version != version2) {
+ throw new CorruptIndexException("Format versions mismatch");
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this.data);
+ }
+ }
+ }
+
+ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
+ int fieldNumber = meta.readVInt();
+ while (fieldNumber != -1) {
+ FieldInfo info = infos.fieldInfo(fieldNumber);
+ if (info == null) {
+ throw new CorruptIndexException("Invalid field number: " + fieldNumber + " (resource=" + meta + ")");
+ } else if (!info.hasNorms()) {
+ throw new CorruptIndexException("Invalid field: " + info.name + " (resource=" + meta + ")");
+ }
+ NormsEntry entry = new NormsEntry();
+ entry.format = meta.readByte();
+ entry.offset = meta.readLong();
+ switch(entry.format) {
+ case CONST_COMPRESSED:
+ case UNCOMPRESSED:
+ case TABLE_COMPRESSED:
+ case DELTA_COMPRESSED:
+ break;
+ default:
+ throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+ }
+ norms.put(fieldNumber, entry);
+ fieldNumber = meta.readVInt();
+ }
+ }
+
+ @Override
+ public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
+ NumericDocValues instance = instances.get(field.number);
+ if (instance == null) {
+ instance = loadNorms(field);
+ instances.put(field.number, instance);
+ }
+ return instance;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed.get();
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ CodecUtil.checksumEntireFile(data);
+ }
+
+ private NumericDocValues loadNorms(FieldInfo field) throws IOException {
+ NormsEntry entry = norms.get(field.number);
+ switch(entry.format) {
+ case CONST_COMPRESSED:
+ final long v = entry.offset;
+ return new NumericDocValues() {
+ @Override
+ public long get(int docID) {
+ return v;
+ }
+ };
+ case UNCOMPRESSED:
+ data.seek(entry.offset);
+ final byte bytes[] = new byte[maxDoc];
+ data.readBytes(bytes, 0, bytes.length);
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes));
+ return new NumericDocValues() {
+ @Override
+ public long get(int docID) {
+ return bytes[docID];
+ }
+ };
+ case DELTA_COMPRESSED:
+ data.seek(entry.offset);
+ int packedIntsVersion = data.readVInt();
+ int blockSize = data.readVInt();
+ final BlockPackedReader reader = new BlockPackedReader(data, packedIntsVersion, blockSize, maxDoc, false);
+ ramBytesUsed.addAndGet(reader.ramBytesUsed());
+ return reader;
+ case TABLE_COMPRESSED:
+ data.seek(entry.offset);
+ int packedVersion = data.readVInt();
+ int size = data.readVInt();
+ if (size > 256) {
+ throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
+ }
+ final long decode[] = new long[size];
+ for (int i = 0; i < decode.length; i++) {
+ decode[i] = data.readLong();
+ }
+ final int formatID = data.readVInt();
+ final int bitsPerValue = data.readVInt();
+ final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), packedVersion, maxDoc, bitsPerValue);
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
+ return new NumericDocValues() {
+ @Override
+ public long get(int docID) {
+ return decode[(int)ordsReader.get(docID)];
+ }
+ };
+ default:
+ throw new AssertionError();
+ }
+ }
+
+ @Override
+ public BinaryDocValues getBinary(FieldInfo field) throws IOException {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public SortedDocValues getSorted(FieldInfo field) throws IOException {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void close() throws IOException {
+ data.close();
+ }
+
+ static class NormsEntry {
+ byte format;
+ long offset;
+ }
+}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html?rev=1601606&r1=1601605&r2=1601606&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/package.html Tue Jun 10 11:35:48 2014
@@ -173,7 +173,7 @@ term occurs in each document. Note that
all documents omit position data.
</li>
<li>
-{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}.
+{@link org.apache.lucene.codecs.lucene49.Lucene49NormsFormat Normalization factors}.
For each field in each document, a value is stored
that is multiplied into the score for hits on that field.
</li>
@@ -289,7 +289,7 @@ systems that frequently run out of file
<td>Stores additional per-position metadata information such as character offsets and user payloads</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}</td>
+<td>{@link org.apache.lucene.codecs.lucene49.Lucene49NormsFormat Norms}</td>
<td>.nvd, .nvm</td>
<td>Encodes length and boost factors for docs and fields</td>
</tr>
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40NormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,38 @@
+package org.apache.lucene.codecs.lucene40;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseNormsFormatTestCase;
+import org.junit.BeforeClass;
+
+
+/** Tests Lucene40's norms format */
+public class TestLucene40NormsFormat extends BaseNormsFormatTestCase {
+ final Codec codec = new Lucene40RWCodec();
+
+ @Override
+ protected Codec getCodec() {
+ return codec;
+ }
+
+ @BeforeClass
+ public static void beforeClass() {
+ OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+ }
+}
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42NormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,38 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseNormsFormatTestCase;
+import org.junit.BeforeClass;
+
+
+/** Tests Lucene42's norms format */
+public class TestLucene42NormsFormat extends BaseNormsFormatTestCase {
+ final Codec codec = new Lucene42RWCodec();
+
+ @Override
+ protected Codec getCodec() {
+ return codec;
+ }
+
+ @BeforeClass
+ public static void beforeClass() {
+ OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+ }
+}
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene49/TestLucene49NormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,33 @@
+package org.apache.lucene.codecs.lucene49;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseNormsFormatTestCase;
+
+/**
+ * Tests Lucene49NormsFormat
+ */
+public class TestLucene49NormsFormat extends BaseNormsFormatTestCase {
+ final Codec codec = new Lucene49Codec();
+
+ @Override
+ protected Codec getCodec() {
+ return codec;
+ }
+}
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestNormsFormat.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,30 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+
+/** Tests the codec configuration defined by LuceneTestCase randomly
+ */
+public class TestNormsFormat extends BaseNormsFormatTestCase {
+
+ @Override
+ protected Codec getCodec() {
+ return Codec.getDefault();
+ }
+}
Added: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java?rev=1601606&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java (added)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java Tue Jun 10 11:35:48 2014
@@ -0,0 +1,183 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.TestUtil;
+
+/**
+ * Abstract class to do basic tests for a norms format.
+ * NOTE: This test focuses on the norms impl, nothing else.
+ * The [stretch] goal is for this test to be
+ * so thorough in testing a new NormsFormat that if this
+ * test passes, then all Lucene/Solr tests should also pass. Ie,
+ * if there is some bug in a given NormsFormat that this
+ * test fails to catch then this test needs to be improved! */
+public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCase {
+
+ public void testByteRange() throws Exception {
+ int iterations = atLeast(1);
+ final Random r = random();
+ for (int i = 0; i < iterations; i++) {
+ doTestNormsVersusStoredFields(new LongProducer() {
+ @Override
+ long next() {
+ return TestUtil.nextLong(r, Byte.MIN_VALUE, Byte.MAX_VALUE);
+ }
+ });
+ }
+ }
+
+ public void testLongRange() throws Exception {
+ int iterations = atLeast(1);
+ final Random r = random();
+ for (int i = 0; i < iterations; i++) {
+ doTestNormsVersusStoredFields(new LongProducer() {
+ @Override
+ long next() {
+ return TestUtil.nextLong(r, Long.MIN_VALUE, Long.MAX_VALUE);
+ }
+ });
+ }
+ }
+
+ public void testFewValues() throws Exception {
+ int iterations = atLeast(1);
+ final Random r = random();
+ for (int i = 0; i < iterations; i++) {
+ doTestNormsVersusStoredFields(new LongProducer() {
+ @Override
+ long next() {
+ return r.nextBoolean() ? 20 : 3;
+ }
+ });
+ }
+ }
+
+ public void testAllZeros() throws Exception {
+ int iterations = atLeast(1);
+ final Random r = random();
+ for (int i = 0; i < iterations; i++) {
+ doTestNormsVersusStoredFields(new LongProducer() {
+ @Override
+ long next() {
+ return 0;
+ }
+ });
+ }
+ }
+
+ private void doTestNormsVersusStoredFields(LongProducer longs) throws Exception {
+ int numDocs = atLeast(500);
+ long norms[] = new long[numDocs];
+ for (int i = 0; i < numDocs; i++) {
+ norms[i] = longs.next();
+ }
+
+ Directory dir = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
+ IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ conf.setSimilarity(new CannedNormSimilarity(norms));
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
+ Document doc = new Document();
+ Field idField = new StringField("id", "", Field.Store.NO);
+ Field storedField = newTextField("stored", "", Field.Store.YES);
+ doc.add(idField);
+ doc.add(storedField);
+
+ for (int i = 0; i < numDocs; i++) {
+ idField.setStringValue(Integer.toString(i));
+ long value = norms[i];
+ storedField.setStringValue(Long.toString(value));
+ writer.addDocument(doc);
+ if (random().nextInt(31) == 0) {
+ writer.commit();
+ }
+ }
+
+ // delete some docs
+ int numDeletions = random().nextInt(numDocs/10);
+ for (int i = 0; i < numDeletions; i++) {
+ int id = random().nextInt(numDocs);
+ writer.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+
+ writer.shutdown();
+
+ // compare
+ DirectoryReader ir = DirectoryReader.open(dir);
+ for (AtomicReaderContext context : ir.leaves()) {
+ AtomicReader r = context.reader();
+ NumericDocValues docValues = r.getNormValues("stored");
+ for (int i = 0; i < r.maxDoc(); i++) {
+ long storedValue = Long.parseLong(r.document(i).get("stored"));
+ assertEquals(storedValue, docValues.get(i));
+ }
+ }
+ ir.close();
+ dir.close();
+ }
+
+
+ static abstract class LongProducer {
+ abstract long next();
+ }
+
+ static class CannedNormSimilarity extends Similarity {
+ final long norms[];
+ int index = 0;
+
+ CannedNormSimilarity(long norms[]) {
+ this.norms = norms;
+ }
+
+ @Override
+ public long computeNorm(FieldInvertState state) {
+ return norms[index++];
+ }
+
+ @Override
+ public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ @Override
+ protected void addRandomFields(Document doc) {
+ // TODO: improve
+ doc.add(new TextField("foobar", "boo", Field.Store.NO));
+ }
+}