You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/11/19 04:18:58 UTC
svn commit: r1640472 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/test/org/apache/lucene/index/
Author: rmuir
Date: Wed Nov 19 03:18:57 2014
New Revision: 1640472
URL: http://svn.apache.org/r1640472
Log:
LUCENE-6062: pass correct fieldinfos to dv producer when the segment has updates
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java Wed Nov 19 03:18:57 2014
@@ -37,8 +37,7 @@ final class SegmentDocValues {
private final Map<Long,RefCount<DocValuesProducer>> genDVProducers = new HashMap<>();
- private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, IOContext context, Directory dir,
- DocValuesFormat dvFormat, final Long gen, FieldInfos infos) throws IOException {
+ private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, Directory dir, final Long gen, FieldInfos infos) throws IOException {
Directory dvDir = dir;
String segmentSuffix = "";
if (gen.longValue() != -1) {
@@ -47,7 +46,8 @@ final class SegmentDocValues {
}
// set SegmentReadState to list only the fields that are relevant to that gen
- SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, context, segmentSuffix);
+ SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, IOContext.READ, segmentSuffix);
+ DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
return new RefCount<DocValuesProducer>(dvFormat.fieldsProducer(srs)) {
@SuppressWarnings("synthetic-access")
@Override
@@ -61,11 +61,10 @@ final class SegmentDocValues {
}
/** Returns the {@link DocValuesProducer} for the given generation. */
- synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, IOContext context, Directory dir,
- DocValuesFormat dvFormat, FieldInfos infos) throws IOException {
+ synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, Directory dir, FieldInfos infos) throws IOException {
RefCount<DocValuesProducer> dvp = genDVProducers.get(gen);
if (dvp == null) {
- dvp = newDocValuesProducer(si, context, dir, dvFormat, gen, infos);
+ dvp = newDocValuesProducer(si, dir, gen, infos);
assert dvp != null;
genDVProducers.put(gen, dvp);
} else {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java Wed Nov 19 03:18:57 2014
@@ -26,10 +26,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
@@ -49,28 +47,37 @@ class SegmentDocValuesProducer extends D
final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
final List<Long> dvGens = new ArrayList<>();
- SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos fieldInfos, SegmentDocValues segDocValues, DocValuesFormat dvFormat) throws IOException {
+ /**
+ * Creates a new producer that handles updated docvalues fields
+ * @param si commit point
+ * @param dir directory
+ * @param coreInfos fieldinfos for the segment
+ * @param allInfos all fieldinfos including updated ones
+ * @param segDocValues producer map
+ */
+ SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos coreInfos, FieldInfos allInfos, SegmentDocValues segDocValues) throws IOException {
boolean success = false;
try {
Version ver = si.info.getVersion();
if (ver != null && ver.onOrAfter(Version.LUCENE_4_9_0)) {
DocValuesProducer baseProducer = null;
- for (FieldInfo fi : fieldInfos) {
+ for (FieldInfo fi : allInfos) {
if (fi.getDocValuesType() == DocValuesType.NONE) {
continue;
}
long docValuesGen = fi.getDocValuesGen();
if (docValuesGen == -1) {
if (baseProducer == null) {
- // the base producer gets all the fields, so the Codec can validate properly
- baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
+ // the base producer gets the original fieldinfos it wrote
+ baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, dir, coreInfos);
dvGens.add(docValuesGen);
dvProducers.add(baseProducer);
}
dvProducersByField.put(fi.name, baseProducer);
} else {
assert !dvGens.contains(docValuesGen);
- final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(new FieldInfo[] { fi }));
+ // otherwise, producer sees only the one fieldinfo it wrote
+ final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen, si, dir, new FieldInfos(new FieldInfo[] { fi }));
dvGens.add(docValuesGen);
dvProducers.add(dvp);
dvProducersByField.put(fi.name, dvp);
@@ -81,7 +88,7 @@ class SegmentDocValuesProducer extends D
// FieldInfos could belong to the same dvGen. Therefore need to make sure
// we initialize each DocValuesProducer once per gen.
Map<Long,List<FieldInfo>> genInfos = new HashMap<>();
- for (FieldInfo fi : fieldInfos) {
+ for (FieldInfo fi : allInfos) {
if (fi.getDocValuesType() == DocValuesType.NONE) {
continue;
}
@@ -100,9 +107,9 @@ class SegmentDocValuesProducer extends D
if (docValuesGen == -1) {
// we need to send all FieldInfos to gen=-1, but later we need to
// record the DVP only for the "true" gen=-1 fields (not updated)
- dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, fieldInfos);
+ dvp = segDocValues.getDocValuesProducer(docValuesGen, si, dir, coreInfos);
} else {
- dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir, dvFormat, new FieldInfos(infos.toArray(new FieldInfo[infos.size()])));
+ dvp = segDocValues.getDocValuesProducer(docValuesGen, si, dir, new FieldInfos(infos.toArray(new FieldInfo[infos.size()])));
}
dvGens.add(docValuesGen);
dvProducers.add(dvp);
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java Wed Nov 19 03:18:57 2014
@@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldsProducer;
@@ -159,15 +158,14 @@ public final class SegmentReader extends
*/
private DocValuesProducer initDocValuesProducer() throws IOException {
final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
- final DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
if (!fieldInfos.hasDocValues()) {
return null;
} else if (si.hasFieldUpdates()) {
- return new SegmentDocValuesProducer(si, dir, fieldInfos, segDocValues, dvFormat);
+ return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues);
} else {
// simple case, no DocValues updates
- return segDocValues.getDocValuesProducer(-1L, si, IOContext.READ, dir, dvFormat, fieldInfos);
+ return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos);
}
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java Wed Nov 19 03:18:57 2014
@@ -19,6 +19,13 @@ import org.apache.lucene.document.Numeri
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.NRTCachingDirectory;
@@ -782,6 +789,92 @@ public class TestNumericDocValuesUpdates
}
@Test
+ public void testUpdateSegmentWithNoDocValues2() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+ // prevent merges, otherwise by the time updates are applied
+ // (writer.close()), the segments might have merged and that update becomes
+ // legit.
+ conf.setMergePolicy(NoMergePolicy.INSTANCE);
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ // first segment with NDV
+ Document doc = new Document();
+ doc.add(new StringField("id", "doc0", Store.NO));
+ doc.add(new NumericDocValuesField("ndv", 3));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new StringField("id", "doc4", Store.NO)); // document without 'ndv' field
+ writer.addDocument(doc);
+ writer.commit();
+
+ // second segment with no NDV, but another dv field "foo"
+ doc = new Document();
+ doc.add(new StringField("id", "doc1", Store.NO));
+ doc.add(new NumericDocValuesField("foo", 3));
+ writer.addDocument(doc);
+ doc = new Document();
+ doc.add(new StringField("id", "doc2", Store.NO)); // document that isn't updated
+ writer.addDocument(doc);
+ writer.commit();
+
+ // update document in the first segment - should not affect docsWithField of
+ // the document without NDV field
+ writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L);
+
+ // update document in the second segment - field should be added and we should
+ // be able to handle the other document correctly (e.g. no NPE)
+ writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L);
+ writer.close();
+
+ DirectoryReader reader = DirectoryReader.open(dir);
+ for (LeafReaderContext context : reader.leaves()) {
+ LeafReader r = context.reader();
+ NumericDocValues ndv = r.getNumericDocValues("ndv");
+ Bits docsWithField = r.getDocsWithField("ndv");
+ assertNotNull(docsWithField);
+ assertTrue(docsWithField.get(0));
+ assertEquals(5L, ndv.get(0));
+ assertFalse(docsWithField.get(1));
+ assertEquals(0L, ndv.get(1));
+ }
+ reader.close();
+
+ TestUtil.checkIndex(dir);
+
+ conf = newIndexWriterConfig(new MockAnalyzer(random()));
+ writer = new IndexWriter(dir, conf);
+ writer.forceMerge(1);
+ writer.close();
+
+ reader = DirectoryReader.open(dir);
+ LeafReader ar = getOnlySegmentReader(reader);
+ assertEquals(DocValuesType.NUMERIC, ar.getFieldInfos().fieldInfo("foo").getDocValuesType());
+ IndexSearcher searcher = new IndexSearcher(reader);
+ TopFieldDocs td;
+ // doc0
+ td = searcher.search(new TermQuery(new Term("id", "doc0")), 1,
+ new Sort(new SortField("ndv", SortField.Type.LONG)));
+ assertEquals(5L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+ // doc1
+ td = searcher.search(new TermQuery(new Term("id", "doc1")), 1,
+ new Sort(new SortField("ndv", SortField.Type.LONG), new SortField("foo", SortField.Type.LONG)));
+ assertEquals(5L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+ assertEquals(3L, ((FieldDoc)td.scoreDocs[0]).fields[1]);
+ // doc2
+ td = searcher.search(new TermQuery(new Term("id", "doc2")), 1,
+ new Sort(new SortField("ndv", SortField.Type.LONG)));
+ assertEquals(0L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+ // doc4
+ td = searcher.search(new TermQuery(new Term("id", "doc4")), 1,
+ new Sort(new SortField("ndv", SortField.Type.LONG)));
+ assertEquals(0L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+ reader.close();
+
+ dir.close();
+ }
+
+ @Test
public void testUpdateSegmentWithPostingButNoDocValues() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));