You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ho...@apache.org on 2010/01/20 20:24:30 UTC

svn commit: r901342 - /lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java

Author: hossman
Date: Wed Jan 20 19:24:30 2010
New Revision: 901342

URL: http://svn.apache.org/viewvc?rev=901342&view=rev
Log:
SOLR-1553: edismax improvements -- pf param is now backcompat with dismax; pf and pf3 logic was refactored to reduce code; added pf2 which does what pf did in the first itertion of edismax

Modified:
    lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=901342&r1=901341&r2=901342&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Wed Jan 20 19:24:30 2010
@@ -104,8 +104,15 @@
     SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params);
 
     queryFields = U.parseFieldBoosts(solrParams.getParams(DMP.QF));
-    Map<String,Float> phraseFields = U.parseFieldBoosts(solrParams.getParams(DMP.PF));
-    Map<String,Float> phraseFields3 = U.parseFieldBoosts(solrParams.getParams("pf3"));
+    // Boosted phrase of the full query string
+    Map<String,Float> phraseFields = 
+      U.parseFieldBoosts(solrParams.getParams(DMP.PF));
+    // Boosted Bi-Term Shingles from the query string
+    Map<String,Float> phraseFields2 = 
+      U.parseFieldBoosts(solrParams.getParams("pf2"));
+    // Boosted Tri-Term Shingles from the query string
+    Map<String,Float> phraseFields3 = 
+      U.parseFieldBoosts(solrParams.getParams("pf3"));
 
     float tiebreaker = solrParams.getFloat(DMP.TIE, 0.0f);
 
@@ -284,7 +291,10 @@
       query.add(parsedUserQuery, BooleanClause.Occur.MUST);
 
       // sloppy phrase queries for proximity
-      if (phraseFields.size() > 0 || phraseFields3.size() > 0) {
+      if (phraseFields.size() > 0 || 
+          phraseFields2.size() > 0 ||
+          phraseFields3.size() > 0) {
+        
         // find non-field clauses
         List<Clause> normalClauses = new ArrayList<Clause>(clauses.size());
         for (Clause clause : clauses) {
@@ -298,70 +308,15 @@
           normalClauses.add(clause);
         }
 
-        Map<String,Float> pf = phraseFields;
-        if (normalClauses.size() >= 2 && pf.size() > 0) {
-          StringBuilder sb = new StringBuilder();
-          for (int i=0; i<normalClauses.size()-1; i++) {
-            sb.append('"');
-            sb.append(normalClauses.get(i).val);
-            sb.append(' ');
-            sb.append(normalClauses.get(i+1).val);
-            sb.append('"');
-            sb.append(' ');
-          }
-
-          String userPhraseQuery = sb.toString();
-
-          /* for parsing sloppy phrases using DisjunctionMaxQueries */
-          ExtendedSolrQueryParser pp =
-                  new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
-          pp.addAlias(IMPOSSIBLE_FIELD_NAME,
-                  tiebreaker, pf);
-          pp.setPhraseSlop(pslop);
-          pp.makeDismax = false;  // make boolean queries instead
-          pp.setRemoveStopFilter(true);  // remove stop filter and keep stopwords
-          pp.minClauseSize = 2;  // if a stopword is removed, don't add the phrase
-
-          // TODO: perhaps we shouldn't use synonyms either...
-
-          Query phrase = pp.parse(userPhraseQuery);
-          if (phrase != null) {
-            query.add(phrase, BooleanClause.Occur.SHOULD);
-          }
-        }
-
-        pf = phraseFields3;
-        if (normalClauses.size() >= 3 && pf.size() > 0) {
-          StringBuilder sb = new StringBuilder();
-          for (int i=0; i<normalClauses.size()-2; i++) {
-            sb.append('"');
-            sb.append(normalClauses.get(i).val);
-            sb.append(' ');
-            sb.append(normalClauses.get(i+1).val);
-            sb.append(' ');
-            sb.append(normalClauses.get(i+2).val);
-            sb.append('"');
-            sb.append(' ');
-          }
-
-          String userPhraseQuery = sb.toString();
-
-          /* for parsing sloppy phrases using DisjunctionMaxQueries */
-          ExtendedSolrQueryParser pp =
-                  new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
-          pp.addAlias(IMPOSSIBLE_FIELD_NAME,
-                  tiebreaker, pf);
-          pp.setPhraseSlop(pslop);
-          pp.makeDismax = false;  // make boolean queries instead
-          pp.setRemoveStopFilter(true);  // remove stop filter and keep stopwords
-          pp.minClauseSize = 2;  // keep min phrase size at 2 since stopword could have been removed in middle
-
-          Query phrase = pp.parse(userPhraseQuery);
-          if (phrase != null) {
-            query.add(phrase, BooleanClause.Occur.SHOULD);
-          }
-        }
-
+        // full phrase...
+        addShingledPhraseQueries(query, normalClauses, phraseFields, 0, 
+                                 tiebreaker, pslop);
+        // shingles...
+        addShingledPhraseQueries(query, normalClauses, phraseFields2, 2,  
+                                 tiebreaker, pslop);
+        addShingledPhraseQueries(query, normalClauses, phraseFields3, 3,
+                                 tiebreaker, pslop);
+        
       }
     }
 
@@ -380,23 +335,8 @@
       }
     }
     if (null != boostQueries) {
-      if(1 == boostQueries.size() && 1 == boostParams.length) {
-        /* legacy logic */
-        Query f = boostQueries.get(0);
-        if (1.0f == f.getBoost() && f instanceof BooleanQuery) {
-          /* if the default boost was used, and we've got a BooleanQuery
-           * extract the subqueries out and use them directly
-           */
-          for (Object c : ((BooleanQuery)f).clauses()) {
-            query.add((BooleanClause)c);
-          }
-        } else {
-          query.add(f, BooleanClause.Occur.SHOULD);
-        }
-      } else {
-        for(Query f : boostQueries) {
-          query.add(f, BooleanClause.Occur.SHOULD);
-        }
+      for(Query f : boostQueries) {
+        query.add(f, BooleanClause.Occur.SHOULD);
       }
     }
 
@@ -450,6 +390,85 @@
     return topQuery;
   }
 
+  /**
+   * Modifies the main query by adding a new optional Query consisting
+   * of shingled phrase queries across the specified clauses using the 
+   * specified field =&gt; boost mappings.
+   *
+   * @param mainQuery Where the phrase boosting queries will be added
+   * @param clauses Clauses that will be used to construct the phrases
+   * @param fields Field =&gt; boost mappings for the phrase queries
+   * @param shingleSize how big the phrases should be, 0 means a single phrase
+   * @param tiebreaker tie breker value for the DisjunctionMaxQueries
+   * @param slop slop value for the constructed phrases
+   */
+  private void addShingledPhraseQueries(final BooleanQuery mainQuery, 
+                                        final List<Clause> clauses,
+                                        final Map<String,Float> fields,
+                                        int shingleSize,
+                                        final float tiebreaker,
+                                        final int slop) 
+    throws ParseException {
+    
+    if (null == fields || fields.isEmpty() || 
+        null == clauses || clauses.size() <= shingleSize ) 
+      return;
+    
+    if (0 == shingleSize) shingleSize = clauses.size();
+
+    final int goat = shingleSize-1; // :TODO: better name for var?
+
+    StringBuilder userPhraseQuery = new StringBuilder();
+      for (int i=0; i < clauses.size() - goat; i++) {
+        userPhraseQuery.append('"');
+        for (int j=0; j <= goat; j++) {
+          userPhraseQuery.append(clauses.get(i + j).val);
+          userPhraseQuery.append(' ');
+        }
+        userPhraseQuery.append('"');
+        userPhraseQuery.append(' ');
+      }
+
+      /* for parsing sloppy phrases using DisjunctionMaxQueries */
+      ExtendedSolrQueryParser pp =
+        new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+
+      pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, fields);
+      pp.setPhraseSlop(slop);
+      pp.setRemoveStopFilter(true);  // remove stop filter and keep stopwords
+
+      /* :TODO: reevaluate using makeDismax=true vs false...
+       * 
+       * The DismaxQueryParser always used DisjunctionMaxQueries for the 
+       * pf boost, for the same reasons it used them for the qf fields.
+       * When Yonik first wrote the ExtendedDismaxQParserPlugin, he added
+       * the "makeDismax=false" property to use BooleanQueries instead, but 
+       * when asked why his response was "I honestly don't recall" ...
+       *
+       * https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813
+       *
+       * so for now, we continue to use dismax style queries becuse it 
+       * seems the most logical and is back compatible, but we should 
+       * try to figure out what Yonik was thinking at the time (because he 
+       * rarely does things for no reason)
+       */
+      pp.makeDismax = true; 
+
+
+      // minClauseSize is independent of the shingleSize because of stop words
+      // (if they are removed from the middle, so be it, but we need at least 
+      // two or there shouldn't be a boost)
+      pp.minClauseSize = 2;  
+      
+      // TODO: perhaps we shouldn't use synonyms either...
+
+      Query phrase = pp.parse(userPhraseQuery.toString());
+      if (phrase != null) {
+        mainQuery.add(phrase, BooleanClause.Occur.SHOULD);
+      }
+  }
+
+
   @Override
   public String[] getDefaultHighlightFields() {
     String[] highFields = queryFields.keySet().toArray(new String[0]);