You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2014/11/27 12:51:51 UTC

svn commit: r1642120 - /lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java

Author: romseygeek
Date: Thu Nov 27 11:51:50 2014
New Revision: 1642120

URL: http://svn.apache.org/r1642120
Log:
Fix SimpleTextReader nextPosition() impls

Modified:
    lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java

Modified: lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1642120&r1=1642119&r2=1642120&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Thu Nov 27 11:51:50 2014
@@ -69,8 +69,8 @@ import static org.apache.lucene.codecs.s
 class SimpleTextFieldsReader extends FieldsProducer {
 
   private static final long BASE_RAM_BYTES_USED =
-        RamUsageEstimator.shallowSizeOfInstance(SimpleTextFieldsReader.class)
-      + RamUsageEstimator.shallowSizeOfInstance(TreeMap.class);
+      RamUsageEstimator.shallowSizeOfInstance(SimpleTextFieldsReader.class)
+          + RamUsageEstimator.shallowSizeOfInstance(TreeMap.class);
 
   private final TreeMap<String,Long> fields;
   private final IndexInput in;
@@ -91,12 +91,12 @@ class SimpleTextFieldsReader extends Fie
       }
     }
   }
-  
+
   private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
     ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
     BytesRefBuilder scratch = new BytesRefBuilder();
     TreeMap<String,Long> fields = new TreeMap<>();
-    
+
     while (true) {
       SimpleTextUtil.readLine(input, scratch);
       if (scratch.get().equals(END)) {
@@ -204,16 +204,20 @@ class SimpleTextFieldsReader extends Fie
     public long totalTermFreq() {
       return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq;
     }
- 
+
     @Override
     public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+
+      if ((flags & DocsEnum.FLAG_POSITIONS) >= DocsEnum.FLAG_POSITIONS)
+        return docsAndPositions(liveDocs, reuse, flags);
+
       SimpleTextDocsEnum docsEnum;
       if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
         docsEnum = (SimpleTextDocsEnum) reuse;
       } else {
         docsEnum = new SimpleTextDocsEnum();
       }
-      return docsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
+      return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS, docFreq);
     }
 
     @Override
@@ -224,12 +228,12 @@ class SimpleTextFieldsReader extends Fie
         return null;
       }
 
-      SimpleTextDocsEnum docsAndPositionsEnum;
-      if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
-        docsAndPositionsEnum = (SimpleTextDocsEnum) reuse;
+      SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
+      if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
+        docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
       } else {
-        docsAndPositionsEnum = new SimpleTextDocsEnum();
-      } 
+        docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
+      }
       return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
     }
   }
@@ -237,6 +241,135 @@ class SimpleTextFieldsReader extends Fie
   private class SimpleTextDocsEnum extends DocsEnum {
     private final IndexInput inStart;
     private final IndexInput in;
+    private boolean omitTF;
+    private int docID = -1;
+    private int tf;
+    private Bits liveDocs;
+    private final BytesRefBuilder scratch = new BytesRefBuilder();
+    private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
+    private int cost;
+
+    public SimpleTextDocsEnum() {
+      this.inStart = SimpleTextFieldsReader.this.in;
+      this.in = this.inStart.clone();
+    }
+
+    public boolean canReuse(IndexInput in) {
+      return in == inStart;
+    }
+
+    public SimpleTextDocsEnum reset(long fp, Bits liveDocs, boolean omitTF, int docFreq) throws IOException {
+      this.liveDocs = liveDocs;
+      in.seek(fp);
+      this.omitTF = omitTF;
+      docID = -1;
+      tf = 1;
+      cost = docFreq;
+      return this;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int freq() throws IOException {
+      return tf;
+    }
+
+    @Override
+    public int nextPosition() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int startPosition() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int endPosition() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int startOffset() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public BytesRef getPayload() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      if (docID == NO_MORE_DOCS) {
+        return docID;
+      }
+      boolean first = true;
+      int termFreq = 0;
+      while(true) {
+        final long lineStart = in.getFilePointer();
+        SimpleTextUtil.readLine(in, scratch);
+        if (StringHelper.startsWith(scratch.get(), DOC)) {
+          if (!first && (liveDocs == null || liveDocs.get(docID))) {
+            in.seek(lineStart);
+            if (!omitTF) {
+              tf = termFreq;
+            }
+            return docID;
+          }
+          scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
+          docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
+          termFreq = 0;
+          first = false;
+        } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
+          scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
+          termFreq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
+        } else if (StringHelper.startsWith(scratch.get(), POS)) {
+          // skip termFreq++;
+        } else if (StringHelper.startsWith(scratch.get(), START_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch.get(), END_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch.get(), PAYLOAD)) {
+          // skip
+        } else {
+          assert StringHelper.startsWith(scratch.get(), TERM) || StringHelper.startsWith(scratch.get(), FIELD) || StringHelper.startsWith(scratch.get(), END): "scratch=" + scratch.get().utf8ToString();
+          if (!first && (liveDocs == null || liveDocs.get(docID))) {
+            in.seek(lineStart);
+            if (!omitTF) {
+              tf = termFreq;
+            }
+            return docID;
+          }
+          return docID = NO_MORE_DOCS;
+        }
+      }
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      // Naive -- better to index skip data
+      return slowAdvance(target);
+    }
+
+    @Override
+    public long cost() {
+      return cost;
+    }
+  }
+
+  private class SimpleTextDocsAndPositionsEnum extends DocsEnum {
+    private final IndexInput inStart;
+    private final IndexInput in;
     private int docID = -1;
     private int tf;
     private Bits liveDocs;
@@ -244,18 +377,16 @@ class SimpleTextFieldsReader extends Fie
     private final BytesRefBuilder scratch2 = new BytesRefBuilder();
     private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
     private final CharsRefBuilder scratchUTF16_2 = new CharsRefBuilder();
+    private int pos;
     private BytesRef payload;
     private long nextDocStart;
-    private boolean omitTF;
     private boolean readOffsets;
     private boolean readPositions;
-    private int pos;
     private int startOffset;
     private int endOffset;
-    private int posPending;
     private int cost;
 
-    public SimpleTextDocsEnum() {
+    public SimpleTextDocsAndPositionsEnum() {
       this.inStart = SimpleTextFieldsReader.this.in;
       this.in = inStart.clone();
     }
@@ -264,18 +395,16 @@ class SimpleTextFieldsReader extends Fie
       return in == inStart;
     }
 
-    public SimpleTextDocsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
+    public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
       this.liveDocs = liveDocs;
       nextDocStart = fp;
       docID = -1;
-      omitTF = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) < 0;
       readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
       readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
       if (!readOffsets) {
         startOffset = -1;
         endOffset = -1;
       }
-      tf = 1;
       cost = docFreq;
       return this;
     }
@@ -295,8 +424,6 @@ class SimpleTextFieldsReader extends Fie
       boolean first = true;
       in.seek(nextDocStart);
       long posStart = 0;
-      int termFreq = 0;
-      pos = -1;
       while(true) {
         final long lineStart = in.getFilePointer();
         SimpleTextUtil.readLine(in, scratch);
@@ -305,19 +432,15 @@ class SimpleTextFieldsReader extends Fie
           if (!first && (liveDocs == null || liveDocs.get(docID))) {
             nextDocStart = lineStart;
             in.seek(posStart);
-            if (!omitTF) {
-              tf = termFreq;
-            }
             return docID;
           }
           scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
           docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
-          termFreq = 0;
-          posPending = 0;
+          tf = 0;
           first = false;
         } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
           scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
-          termFreq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
+          tf = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
           posStart = in.getFilePointer();
         } else if (StringHelper.startsWith(scratch.get(), POS)) {
           // skip
@@ -332,8 +455,6 @@ class SimpleTextFieldsReader extends Fie
           if (!first && (liveDocs == null || liveDocs.get(docID))) {
             nextDocStart = lineStart;
             in.seek(posStart);
-            if (!omitTF)
-              tf = termFreq;
             return docID;
           }
           return docID = NO_MORE_DOCS;
@@ -349,9 +470,12 @@ class SimpleTextFieldsReader extends Fie
 
     @Override
     public int nextPosition() throws IOException {
-      final int pos;
       if (readPositions) {
         SimpleTextUtil.readLine(in, scratch);
+        if (StringHelper.startsWith(scratch.get(), DOC)) {
+          pos = NO_MORE_POSITIONS;
+          return pos;
+        }
         assert StringHelper.startsWith(scratch.get(), POS): "got line=" + scratch.get().utf8ToString();
         scratchUTF16_2.copyUTF8Bytes(scratch.bytes(), POS.length, scratch.length()-POS.length);
         pos = ArrayUtil.parseInt(scratchUTF16_2.chars(), 0, scratchUTF16_2.length());
@@ -410,7 +534,7 @@ class SimpleTextFieldsReader extends Fie
     public BytesRef getPayload() {
       return payload;
     }
-    
+
     @Override
     public long cost() {
       return cost;
@@ -428,9 +552,9 @@ class SimpleTextFieldsReader extends Fie
   }
 
   private static final long TERMS_BASE_RAM_BYTES_USED =
-        RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
-      + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
-      + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
+      RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
+          + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
+          + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
   private class SimpleTextTerms extends Terms implements Accountable {
     private final long termsStart;
     private final FieldInfo fieldInfo;
@@ -455,7 +579,7 @@ class SimpleTextFieldsReader extends Fie
       final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
       final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
       final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
-                                                                                                                      outputsInner);
+          outputsInner);
       b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
       IndexInput in = SimpleTextFieldsReader.this.in.clone();
       in.seek(termsStart);
@@ -470,8 +594,8 @@ class SimpleTextFieldsReader extends Fie
         if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
           if (lastDocsStart != -1) {
             b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
-                  outputs.newPair(lastDocsStart,
-                                  outputsInner.newPair((long) docFreq, totalTermFreq)));
+                outputs.newPair(lastDocsStart,
+                    outputsInner.newPair((long) docFreq, totalTermFreq)));
             sumTotalTermFreq += totalTermFreq;
           }
           break;
@@ -487,7 +611,7 @@ class SimpleTextFieldsReader extends Fie
         } else if (StringHelper.startsWith(scratch.get(), TERM)) {
           if (lastDocsStart != -1) {
             b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
-                                                                            outputsInner.newPair((long) docFreq, totalTermFreq)));
+                outputsInner.newPair((long) docFreq, totalTermFreq)));
           }
           lastDocsStart = in.getFilePointer();
           final int len = scratch.length() - TERM.length;
@@ -574,7 +698,7 @@ class SimpleTextFieldsReader extends Fie
     public boolean hasPositions() {
       return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
     }
-    
+
     @Override
     public boolean hasPayloads() {
       return fieldInfo.hasPayloads();