You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/08 17:41:29 UTC
svn commit: r1672107 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java

Author: rmuir
Date: Wed Apr  8 15:41:29 2015
New Revision: 1672107

URL: http://svn.apache.org/r1672107
Log:
LUCENE-6401: refactor hairy logic of QueryBuilder

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java?rev=1672107&r1=1672106&r2=1672107&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java Wed Apr  8 15:41:29 2015
@@ -192,182 +192,204 @@ public class QueryBuilder {
    */
   protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int phraseSlop) {
     assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
-    // Use the analyzer to get all the tokens, and then build a TermQuery,
-    // PhraseQuery, or nothing based on the term count
-    CachingTokenFilter buffer = null;
-    TermToBytesRefAttribute termAtt = null;
-    PositionIncrementAttribute posIncrAtt = null;
-    int numTokens = 0;
-    int positionCount = 0;
-    boolean severalTokensAtSamePosition = false;
-    boolean hasMoreTokens = false;
-
-    try (TokenStream source = analyzer.tokenStream(field, queryText)) {
-      buffer = new CachingTokenFilter(source);
-      buffer.reset();
-
-      termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
-      posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
-
-      if (termAtt != null) {
-        try {
-          hasMoreTokens = buffer.incrementToken();
-          while (hasMoreTokens) {
-            numTokens++;
-            int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
-            if (positionIncrement != 0) {
-              positionCount += positionIncrement;
-            } else {
-              severalTokensAtSamePosition = true;
-            }
-            hasMoreTokens = buffer.incrementToken();
-          }
-        } catch (IOException e) {
-          // ignore
+    
+    // Use the analyzer to get all the tokens, and then build an appropriate
+    // query based on the analysis chain.
+    
+    try (TokenStream source = analyzer.tokenStream(field, queryText);
+         CachingTokenFilter stream = new CachingTokenFilter(source)) {
+      
+      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+      PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
+      
+      if (termAtt == null) {
+        return null; 
+      }
+      
+      // phase 1: read through the stream and assess the situation:
+      // counting the number of tokens/positions and marking if we have any synonyms.
+      
+      int numTokens = 0;
+      int positionCount = 0;
+      boolean hasSynonyms = false;
+
+      stream.reset();
+      while (stream.incrementToken()) {
+        numTokens++;
+        int positionIncrement = posIncAtt.getPositionIncrement();
+        if (positionIncrement != 0) {
+          positionCount += positionIncrement;
+        } else {
+          hasSynonyms = true;
+        }
+      }
+      
+      // phase 2: based on token count, presence of synonyms, and options
+      // formulate a single term, boolean, or phrase.
+      
+      if (numTokens == 0) {
+        return null;
+      } else if (numTokens == 1) {
+        // single term
+        return analyzeTerm(field, stream);
+      } else if (quoted && positionCount > 1) {
+        // phrase
+        if (hasSynonyms) {
+          // complex phrase with synonyms
+          return analyzeMultiPhrase(field, stream, phraseSlop);
+        } else {
+          // simple phrase
+          return analyzePhrase(field, stream, phraseSlop);
+        }
+      } else {
+        // boolean
+        if (positionCount == 1) {
+          // only one position, with synonyms
+          return analyzeBoolean(field, stream);
+        } else {
+          // complex case: multiple positions
+          return analyzeMultiBoolean(field, stream, operator);
         }
       }
     } catch (IOException e) {
       throw new RuntimeException("Error analyzing query text", e);
     }
+  }
+  
+  /** 
+   * Creates simple term query from the cached tokenstream contents 
+   */
+  private Query analyzeTerm(String field, TokenStream stream) throws IOException {
+    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+    BytesRef bytes = termAtt.getBytesRef();
+    
+    stream.reset();
+    if (!stream.incrementToken()) {
+      throw new AssertionError();
+    }
+    
+    termAtt.fillBytesRef();
+    return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
+  }
+  
+  /** 
+   * Creates simple boolean query from the cached tokenstream contents 
+   */
+  private Query analyzeBoolean(String field, TokenStream stream) throws IOException {
+    BooleanQuery q = newBooleanQuery(true);
 
-    // rewind the buffer stream
-    try {
-      if (numTokens > 0) {
-        buffer.reset();//will never throw; the buffer is cached
+    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+    BytesRef bytes = termAtt.getBytesRef();
+    
+    stream.reset();
+    while (stream.incrementToken()) {
+      termAtt.fillBytesRef();
+      Query currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
+      q.add(currentQuery, BooleanClause.Occur.SHOULD);
+    }
+    
+    return q;
+  }
+  
+  /** 
+   * Creates complex boolean query from the cached tokenstream contents 
+   */
+  private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
+    BooleanQuery q = newBooleanQuery(false);
+    Query currentQuery = null;
+    
+    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+    BytesRef bytes = termAtt.getBytesRef();
+
+    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
+    
+    stream.reset();
+    while (stream.incrementToken()) {
+      termAtt.fillBytesRef();
+      if (posIncrAtt.getPositionIncrement() == 0) {
+        if (!(currentQuery instanceof BooleanQuery)) {
+          Query t = currentQuery;
+          currentQuery = newBooleanQuery(true);
+          ((BooleanQuery)currentQuery).add(t, BooleanClause.Occur.SHOULD);
+        }
+        ((BooleanQuery)currentQuery).add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
+      } else {
+        if (currentQuery != null) {
+          q.add(currentQuery, operator);
+        }
+        currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
       }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
     }
-
-    BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
-
-    if (numTokens == 0) {
-      return null;
-    } else if (numTokens == 1) {
-      try {
-        boolean hasNext = buffer.incrementToken();
-        assert hasNext == true;
-        termAtt.fillBytesRef();
-      } catch (IOException e) {
-        // safe to ignore, because we know the number of tokens
+    q.add(currentQuery, operator);
+    
+    return q;
+  }
+  
+  /** 
+   * Creates simple phrase query from the cached tokenstream contents 
+   */
+  private Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
+    PhraseQuery pq = newPhraseQuery();
+    pq.setSlop(slop);
+    
+    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+    BytesRef bytes = termAtt.getBytesRef();
+
+    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
+    int position = -1;    
+    
+    stream.reset();
+    while (stream.incrementToken()) {
+      termAtt.fillBytesRef();
+      
+      if (enablePositionIncrements) {
+        position += posIncrAtt.getPositionIncrement();
+        pq.add(new Term(field, BytesRef.deepCopyOf(bytes)), position);
+      } else {
+        pq.add(new Term(field, BytesRef.deepCopyOf(bytes)));
       }
-      return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
-    } else {
-      if (severalTokensAtSamePosition || (!quoted)) {
-        if (positionCount == 1 || (!quoted)) {
-          // no phrase query:
-          
-          if (positionCount == 1) {
-            // simple case: only one position, with synonyms
-            BooleanQuery q = newBooleanQuery(true);
-            for (int i = 0; i < numTokens; i++) {
-              try {
-                boolean hasNext = buffer.incrementToken();
-                assert hasNext == true;
-                termAtt.fillBytesRef();
-              } catch (IOException e) {
-                // safe to ignore, because we know the number of tokens
-              }
-              Query currentQuery = newTermQuery(
-                  new Term(field, BytesRef.deepCopyOf(bytes)));
-              q.add(currentQuery, BooleanClause.Occur.SHOULD);
-            }
-            return q;
-          } else {
-            // multiple positions
-            BooleanQuery q = newBooleanQuery(false);
-            Query currentQuery = null;
-            for (int i = 0; i < numTokens; i++) {
-              try {
-                boolean hasNext = buffer.incrementToken();
-                assert hasNext == true;
-                termAtt.fillBytesRef();
-              } catch (IOException e) {
-                // safe to ignore, because we know the number of tokens
-              }
-              if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
-                if (!(currentQuery instanceof BooleanQuery)) {
-                  Query t = currentQuery;
-                  currentQuery = newBooleanQuery(true);
-                  ((BooleanQuery)currentQuery).add(t, BooleanClause.Occur.SHOULD);
-                }
-                ((BooleanQuery)currentQuery).add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
-              } else {
-                if (currentQuery != null) {
-                  q.add(currentQuery, operator);
-                }
-                currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
-              }
-            }
-            q.add(currentQuery, operator);
-            return q;
-          }
+    }
+    
+    return pq;
+  }
+  
+  /** 
+   * Creates complex phrase query from the cached tokenstream contents 
+   */
+  private Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
+    MultiPhraseQuery mpq = newMultiPhraseQuery();
+    mpq.setSlop(slop);
+    
+    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+    BytesRef bytes = termAtt.getBytesRef();
+
+    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
+    int position = -1;  
+    
+    List<Term> multiTerms = new ArrayList<>();
+    stream.reset();
+    while (stream.incrementToken()) {
+      termAtt.fillBytesRef();
+      int positionIncrement = posIncrAtt.getPositionIncrement();
+      
+      if (positionIncrement > 0 && multiTerms.size() > 0) {
+        if (enablePositionIncrements) {
+          mpq.add(multiTerms.toArray(new Term[0]), position);
         } else {
-          // phrase query:
-          MultiPhraseQuery mpq = newMultiPhraseQuery();
-          mpq.setSlop(phraseSlop);
-          List<Term> multiTerms = new ArrayList<>();
-          int position = -1;
-          for (int i = 0; i < numTokens; i++) {
-            int positionIncrement = 1;
-            try {
-              boolean hasNext = buffer.incrementToken();
-              assert hasNext == true;
-              termAtt.fillBytesRef();
-              if (posIncrAtt != null) {
-                positionIncrement = posIncrAtt.getPositionIncrement();
-              }
-            } catch (IOException e) {
-              // safe to ignore, because we know the number of tokens
-            }
-
-            if (positionIncrement > 0 && multiTerms.size() > 0) {
-              if (enablePositionIncrements) {
-                mpq.add(multiTerms.toArray(new Term[0]),position);
-              } else {
-                mpq.add(multiTerms.toArray(new Term[0]));
-              }
-              multiTerms.clear();
-            }
-            position += positionIncrement;
-            multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
-          }
-          if (enablePositionIncrements) {
-            mpq.add(multiTerms.toArray(new Term[0]),position);
-          } else {
-            mpq.add(multiTerms.toArray(new Term[0]));
-          }
-          return mpq;
+          mpq.add(multiTerms.toArray(new Term[0]));
         }
-      } else {
-        PhraseQuery pq = newPhraseQuery();
-        pq.setSlop(phraseSlop);
-        int position = -1;
-
-        for (int i = 0; i < numTokens; i++) {
-          int positionIncrement = 1;
-
-          try {
-            boolean hasNext = buffer.incrementToken();
-            assert hasNext == true;
-            termAtt.fillBytesRef();
-            if (posIncrAtt != null) {
-              positionIncrement = posIncrAtt.getPositionIncrement();
-            }
-          } catch (IOException e) {
-            // safe to ignore, because we know the number of tokens
-          }
-
-          if (enablePositionIncrements) {
-            position += positionIncrement;
-            pq.add(new Term(field, BytesRef.deepCopyOf(bytes)),position);
-          } else {
-            pq.add(new Term(field, BytesRef.deepCopyOf(bytes)));
-          }
-        }
-        return pq;
+        multiTerms.clear();
       }
+      position += positionIncrement;
+      multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
+    }
+    
+    if (enablePositionIncrements) {
+      mpq.add(multiTerms.toArray(new Term[0]), position);
+    } else {
+      mpq.add(multiTerms.toArray(new Term[0]));
     }
+    return mpq;
   }
   
   /**