You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/16 20:09:42 UTC

svn commit: r1202842 [6/6] - in /lucene/dev/trunk: ./ dev-tools/eclipse/ dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contrib/instantiated/ dev-tools/maven/lucene/contrib/ dev-tools/maven/lucene/contrib/instantiated/ lucene/ lucene/contrib/ luc...

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1202842&r1=1202841&r2=1202842&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java Wed Nov 16 19:09:35 2011
@@ -8,16 +8,17 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.StoredFieldVisitor.Status;
-import org.apache.lucene.index.TermVectorMapper;
-import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
@@ -225,12 +226,11 @@ public class TermVectorComponent extends
       }
     };
 
-    TVMapper mapper = new TVMapper(reader);
-    mapper.fieldOptions = allFields; //this will only stay set if fieldOptions.isEmpty() (in other words, only if the user didn't set any fields)
+    TermsEnum termsEnum = null;
+
     while (iter.hasNext()) {
       Integer docId = iter.next();
       NamedList<Object> docNL = new NamedList<Object>();
-      mapper.docNL = docNL;
       termVectors.add("doc-" + docId, docNL);
 
       if (keyField != null) {
@@ -245,12 +245,91 @@ public class TermVectorComponent extends
       }
       if (!fieldOptions.isEmpty()) {
         for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
-          mapper.fieldOptions = entry.getValue();
-          reader.getTermFreqVector(docId, entry.getKey(), mapper);
+          final String field = entry.getKey();
+          final Terms vector = reader.getTermVector(docId, field);
+          if (vector != null) {
+            termsEnum = vector.iterator(termsEnum);
+            mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
+          }
         }
       } else {
-        //deal with all fields by using the allFieldMapper
-        reader.getTermFreqVector(docId, mapper);
+        // extract all fields
+        final Fields vectors = reader.getTermVectors(docId);
+        final FieldsEnum fieldsEnum = vectors.iterator();
+        String field;
+        while((field = fieldsEnum.next()) != null) {
+          Terms terms = fieldsEnum.terms();
+          if (terms != null) {
+            termsEnum = terms.iterator(termsEnum);
+            mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
+          }
+        }
+      }
+    }
+  }
+
+  private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader reader, int docID, TermsEnum termsEnum, String field) throws IOException {
+    NamedList<Object> fieldNL = new NamedList<Object>();
+    docNL.add(field, fieldNL);
+
+    BytesRef text;
+    DocsAndPositionsEnum dpEnum = null;
+    while((text = termsEnum.next()) != null) {
+      String term = text.utf8ToString();
+      NamedList<Object> termInfo = new NamedList<Object>();
+      fieldNL.add(term, termInfo);
+      final int freq = (int) termsEnum.totalTermFreq();
+      if (fieldOptions.termFreq == true) {
+        termInfo.add("tf", freq);
+      }
+
+      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
+
+      boolean usePositions = false;
+      boolean useOffsets = false;
+      OffsetAttribute offsetAtt = null;
+      if (dpEnum != null) {
+        dpEnum.nextDoc();
+        usePositions = fieldOptions.positions;
+        if (fieldOptions.offsets && dpEnum.attributes().hasAttribute(OffsetAttribute.class)) {
+          useOffsets = true;
+          offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
+        }
+      }
+
+      NamedList<Number> theOffsets = null;
+      if (useOffsets) {
+        theOffsets = new NamedList<Number>();
+        termInfo.add("offsets", theOffsets);
+      }
+
+      NamedList<Integer> positionsNL = null;
+
+      if (usePositions || theOffsets != null) {
+        for (int i = 0; i < freq; i++) {
+          final int pos = dpEnum.nextPosition();
+          if (usePositions && pos >= 0) {
+            if (positionsNL == null) {
+              positionsNL = new NamedList<Integer>();
+              termInfo.add("positions", positionsNL);
+            }
+            positionsNL.add("position", pos);
+          }
+
+          if (theOffsets != null) {
+            theOffsets.add("start", offsetAtt.startOffset());
+            theOffsets.add("end", offsetAtt.endOffset());
+          }
+        }
+      }
+
+      if (fieldOptions.docFreq) {
+        termInfo.add("df", getDocFreq(reader, field, text));
+      }
+
+      if (fieldOptions.tfIdf) {
+        double tfIdfVal = ((double) freq) / getDocFreq(reader, field, text);
+        termInfo.add("tf-idf", tfIdfVal);
       }
     }
   }
@@ -310,90 +389,20 @@ public class TermVectorComponent extends
     return result;
   }
 
-  private static class TVMapper extends TermVectorMapper {
-    private IndexReader reader;
-    private NamedList<Object> docNL;
-
-    //needs to be set for each new field
-    FieldOptions fieldOptions;
-
-    //internal vars not passed in by construction
-    private boolean useOffsets, usePositions;
-    //private Map<String, Integer> idfCache;
-    private NamedList<Object> fieldNL;
-    private String field;
-
-
-    public TVMapper(IndexReader reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
-      NamedList<Object> termInfo = new NamedList<Object>();
-      fieldNL.add(term.utf8ToString(), termInfo);
-      if (fieldOptions.termFreq == true) {
-        termInfo.add("tf", frequency);
-      }
-      if (useOffsets) {
-        NamedList<Number> theOffsets = new NamedList<Number>();
-        termInfo.add("offsets", theOffsets);
-        for (int i = 0; i < offsets.length; i++) {
-          TermVectorOffsetInfo offset = offsets[i];
-          theOffsets.add("start", offset.getStartOffset());
-          theOffsets.add("end", offset.getEndOffset());
-        }
-      }
-      if (usePositions) {
-        NamedList<Integer> positionsNL = new NamedList<Integer>();
-        for (int i = 0; i < positions.length; i++) {
-          positionsNL.add("position", positions[i]);
-        }
-        termInfo.add("positions", positionsNL);
-      }
-      if (fieldOptions.docFreq) {
-        termInfo.add("df", getDocFreq(term));
-      }
-      if (fieldOptions.tfIdf) {
-        double tfIdfVal = ((double) frequency) / getDocFreq(term);
-        termInfo.add("tf-idf", tfIdfVal);
-      }
-    }
-
-    private int getDocFreq(BytesRef term) {
-      int result = 1;
-      try {
-        Terms terms = MultiFields.getTerms(reader, field);
-        if (terms != null) {
-          TermsEnum termsEnum = terms.iterator();
-          if (termsEnum.seekExact(term, true)) {
-            result = termsEnum.docFreq();
-          }
+  private static int getDocFreq(IndexReader reader, String field, BytesRef term) {
+    int result = 1;
+    try {
+      Terms terms = MultiFields.getTerms(reader, field);
+      if (terms != null) {
+        TermsEnum termsEnum = terms.iterator(null);
+        if (termsEnum.seekExact(term, true)) {
+          result = termsEnum.docFreq();
         }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
       }
-      return result;
-    }
-
-    @Override
-    public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
-      this.field = field;
-      useOffsets = storeOffsets && fieldOptions.offsets;
-      usePositions = storePositions && fieldOptions.positions;
-      fieldNL = new NamedList<Object>();
-      docNL.add(field, fieldNL);
-    }
-
-    @Override
-    public boolean isIgnoringPositions() {
-      return !fieldOptions.positions;  // if we are not interested in positions, then return true telling Lucene to skip loading them
-    }
-
-    @Override
-    public boolean isIgnoringOffsets() {
-      return !fieldOptions.offsets;  //  if we are not interested in offsets, then return true telling Lucene to skip loading them
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
+    return result;
   }
 
   @Override

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java?rev=1202842&r1=1202841&r2=1202842&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java Wed Nov 16 19:09:35 2011
@@ -158,7 +158,7 @@ public class TermsComponent extends Sear
       }
 
 
-     TermsEnum termsEnum = terms.iterator();
+     TermsEnum termsEnum = terms.iterator(null);
      BytesRef term = null;
 
       if (lowerBytes != null) {

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java?rev=1202842&r1=1202841&r2=1202842&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java Wed Nov 16 19:09:35 2011
@@ -633,7 +633,7 @@ public class SimpleFacets {
     SolrIndexSearcher.DocsEnumState deState = null;
     BytesRef term = null;
     if (terms != null) {
-      termsEnum = terms.iterator();
+      termsEnum = terms.iterator(null);
 
       // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
       // facet.offset when sorting by index order.

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java?rev=1202842&r1=1202841&r2=1202842&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java Wed Nov 16 19:09:35 2011
@@ -270,8 +270,8 @@ class JoinQuery extends Query {
       BytesRef prefix = prefixStr == null ? null : new BytesRef(prefixStr);
 
       BytesRef term = null;
-      TermsEnum  termsEnum = terms.iterator();
-      TermsEnum  toTermsEnum = toTerms.iterator();
+      TermsEnum  termsEnum = terms.iterator(null);
+      TermsEnum  toTermsEnum = toTerms.iterator(null);
       SolrIndexSearcher.DocsEnumState fromDeState = null;
       SolrIndexSearcher.DocsEnumState toDeState = null;
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java?rev=1202842&r1=1202841&r2=1202842&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java Wed Nov 16 19:09:35 2011
@@ -241,7 +241,7 @@ public class FileFloatSource extends Val
     BytesRef internalKey = new BytesRef();
 
     try {
-      TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator();
+      TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator(null);
       DocsEnum docsEnum = null;
 
       // removing deleted docs shouldn't matter