You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/07/22 18:59:08 UTC

svn commit: r1505731 - in /lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight: FieldPhraseList.java FieldQuery.java

Author: jpountz
Date: Mon Jul 22 16:59:08 2013
New Revision: 1505731

URL: http://svn.apache.org/r1505731
Log:
LUCENE-4734: Better memory efficiency.

Modified:
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1505731&r1=1505730&r2=1505731&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Mon Jul 22 16:59:08 2013
@@ -16,9 +16,7 @@ package org.apache.lucene.search.vectorh
  * limitations under the License.
  */
 
-import java.util.ArrayDeque;
 import java.util.ArrayList;
-import java.util.Deque;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -62,73 +60,46 @@ public class FieldPhraseList {
   public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
     final String field = fieldTermStack.getFieldName();
 
-    @SuppressWarnings("unchecked")
-    Deque<TermInfo>[] termStacks = new Deque[] {new ArrayDeque<TermInfo>()};
-    for (TermInfo ti = fieldTermStack.pop(); ti != null; ti = fieldTermStack.pop()) {
-      // If there are tokens at the same position, compute all combinations
-      if (!fieldTermStack.isEmpty() && fieldTermStack.peek().getPosition() == ti.getPosition()) {
-        List<TermInfo> samePositionTermInfos = new ArrayList<>(2);
-        samePositionTermInfos.add(ti);
-        samePositionTermInfos.add(fieldTermStack.pop());
-        while (!fieldTermStack.isEmpty() && fieldTermStack.peek().getPosition() == ti.getPosition()) {
-          samePositionTermInfos.add(fieldTermStack.pop());
-        }
-        final int numTokensAtSamePosition = samePositionTermInfos.size();
-        @SuppressWarnings("unchecked")
-        Deque<TermInfo>[] newTermStacks = new Deque[termStacks.length * numTokensAtSamePosition];
-        for (int i = 0, k = 0; i < termStacks.length; ++i) {
-          for (int j = 0; j < numTokensAtSamePosition; ++j) {
-            if (j == numTokensAtSamePosition - 1) {
-              newTermStacks[k] = termStacks[i];
-            } else {
-              newTermStacks[k] = new ArrayDeque<>(termStacks[i]);
-            }
-            newTermStacks[k++].offer(samePositionTermInfos.get(j));
-          }
-        }
-        termStacks = newTermStacks;
-      } else {
-        for (Deque<TermInfo> d : termStacks) {
-          d.offer(ti);
-        }
-      }
+    QueryPhraseMap qpm = fieldQuery.getRootMap(field);
+    if (qpm != null) {
+      LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
+      extractPhrases(fieldTermStack.termList, qpm, phraseCandidate, 0);
+      assert phraseCandidate.size() == 0;
     }
+  }
 
-    for (Deque<TermInfo> d : termStacks) {
-      extractPhrases(field, d, fieldQuery, phraseLimit);
+  void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) {
+    if (terms.isEmpty()) {
+      if (longest > 0) {
+        addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+      }
+      return;
     }
-  }
+    ArrayList<TermInfo> samePositionTerms = new ArrayList<TermInfo>();
+    do {
+      samePositionTerms.add(terms.pop());
+    } while (!terms.isEmpty() && terms.get(0).getPosition() == samePositionTerms.get(0).getPosition());
 
-  void extractPhrases(String field, Deque<TermInfo> fieldTermStack, FieldQuery fieldQuery, int phraseLimit) {
-    LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
-    while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) ) {
-
-      int longest = 0;
-      phraseCandidate.clear();
-      QueryPhraseMap currMap = null;
-      for (TermInfo ti : fieldTermStack) {
-        QueryPhraseMap nextMap = null;
-        if (currMap == null) {
-          nextMap = fieldQuery.getFieldTermMap(field, ti.getText());
-          if (nextMap == null) {
-            break;
-          }
-        } else {
-          nextMap = currMap.getTermMap(ti.getText());
-        }
-        if (nextMap != null) {
-          currMap = nextMap;
-          phraseCandidate.add(ti);
-          if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
-            longest = phraseCandidate.size();
-          }
+    // try all next terms at the same position
+    for (TermInfo nextTerm : samePositionTerms) {
+      QueryPhraseMap nextMap = currMap.getTermMap(nextTerm.getText());
+      if (nextMap != null) {
+        phraseCandidate.add(nextTerm);
+        int l = longest;
+        if(nextMap.isValidTermOrPhrase( phraseCandidate ) ){
+          l = phraseCandidate.size();
         }
+        extractPhrases(terms, nextMap, phraseCandidate, l);
+        phraseCandidate.removeLast();
       }
+    }
 
-      if (longest > 0) {
-        addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
-      }
-      fieldTermStack.pop();
+    // ignore the next term
+    extractPhrases(terms, currMap, phraseCandidate, longest);
+
+    // add terms back
+    for (TermInfo nextTerm : samePositionTerms) {
+      terms.push(nextTerm);
     }
   }
 

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1505731&r1=1505730&r2=1505731&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Mon Jul 22 16:59:08 2013
@@ -333,7 +333,8 @@ public class FieldQuery {
     return root.searchPhrase( phraseCandidate );
   }
   
-  private QueryPhraseMap getRootMap( String fieldName ){
+  /** Get the root map for the given field name. */
+  public QueryPhraseMap getRootMap( String fieldName ){
     return rootMaps.get( fieldMatch ? fieldName : null );
   }