You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/01/08 21:35:50 UTC

svn commit: r1556627 - in /lucene/dev/branches/lucene5376/lucene/server/src: java/org/apache/lucene/server/ java/org/apache/lucene/server/handlers/ test/org/apache/lucene/server/

Author: mikemccand
Date: Wed Jan  8 20:35:49 2014
New Revision: 1556627

URL: http://svn.apache.org/r1556627
Log:
LUCENE-5376: remove recency blending hack: just use expressions instead

Removed:
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/BlendedComparator.java
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/RecencyBlendedFieldComparatorSource.java
Modified:
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/FieldDef.java
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/SearchHandler.java
    lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestSearch.java

Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/FieldDef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/FieldDef.java?rev=1556627&r1=1556626&r2=1556627&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/FieldDef.java (original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/FieldDef.java Wed Jan  8 20:35:49 2014
@@ -66,22 +66,13 @@ public class FieldDef {
   /** Field name to use as the ID field for live-values. */
   public final String liveValuesIDField;
 
-  /** Time-stamp field name for recency-blended sorting. */
-  public final String blendFieldName;
-
-  /** Maximum boost from recency. */
-  public final float blendMaxBoost;
-
-  /** Maximum age for recency boosting to have an effect (seconds). */
-  public final long blendRange;
-
   /** Only set for a virtual field (expression). */
   public final ValueSource valueSource;
 
   /** Sole constructor. */
   public FieldDef(String name, FieldType fieldType, String valueType, String faceted, String postingsFormat, String docValuesFormat, boolean multiValued,
                   Similarity sim, Analyzer indexAnalyzer, Analyzer searchAnalyzer, boolean highlighted, String liveValuesIDField,
-                  String blendFieldName, float blendMaxBoost, long blendRange, ValueSource valueSource) {
+                  ValueSource valueSource) {
     this.name = name;
     this.fieldType = fieldType;
     if (fieldType != null) {
@@ -105,10 +96,6 @@ public class FieldDef {
     } else {
       fieldTypeNoDV = null;
     }
-    // nocommit make this a subclass somehow
-    this.blendFieldName = blendFieldName;
-    this.blendMaxBoost = blendMaxBoost;
-    this.blendRange = blendRange;
     this.valueSource = valueSource;
   }
 }

Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java?rev=1556627&r1=1556626&r2=1556627&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java (original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java Wed Jan  8 20:35:49 2014
@@ -306,10 +306,8 @@ public class RegisterFieldHandler extend
                                "double", "Double value.",
                                "int", "Int value.",
                                "long", "Long value.",
-                               // TODO: this is hacked up now ... only supports fixed "recency" blending ... ideally we would accept
-                               // a custom equation and parse & execute that:
                                // nocommit name this "dynamic" instead of "virtual"?
-                               "virtual", "Virtual (computed at search time) field, e.g. for blended sorting.")),
+                               "virtual", "Virtual field defined with a JavaScript expression.")),
         // nocommit rename to "search"?  ie, "I will search on/by this field's values"
         new Param("index", "True if the value should be indexed.", new BooleanType(), false),
         new Param("tokenize", "True if the value should be tokenized.", new BooleanType(), true),
@@ -331,7 +329,7 @@ public class RegisterFieldHandler extend
                   "no"),
         new Param("storeDocValues", "Whether to index the value into doc values.", new BooleanType(), false),
         new Param("liveValues", "Enable live values for this field: whenever this field is retrieved during a search, the live (most recetly added) value will always be returned; set this to the field name of your id (primary key) field.  Uses @lucene:core:org.apache.lucene.index.LiveFieldValues under the hood.", new StringType()),
-        new Param("numericPrecisionStep", "If the value is numeric, what precision step to use during indexing.", new IntType(), NumericUtils.PRECISION_STEP_DEFAULT),
+        new Param("numericPrecisionStep", "If the value is numeric, what precision step to use during indexing.", new IntType(), NumericUtils.PRECISION_STEP_DEFAULT), // nocommit is 16 better?
         new Param("omitNorms", "True if norms are omitted.", new BooleanType(), false),
         new Param("analyzer", "Analyzer to use for this field during indexing and searching.", ANALYZER_TYPE),
         new Param("indexAnalyzer", "Analyzer to use for this field during indexing.", ANALYZER_TYPE),
@@ -343,11 +341,6 @@ public class RegisterFieldHandler extend
                                "docsFreqsPositionsOffsets", "Index doc ids, term frequencies, positions and offsets."),
                   "docsFreqsPositions"),
         new Param("expression", "The JavaScript expression defining a virtual field's value (only used with type=virtual).", new StringType()),
-        new Param("recencyScoreBlend", "Only used with type=virtual, to describe how the virtual field blends with score.",
-                  new StructType(
-                                 new Param("timeStampField", "Field holding timestamp value (must be type long, with sort=true)", new StringType()),
-                                 new Param("maxBoost", "Maximum boost to apply to the relevance score (for the most recent matches)", new FloatType()),
-                                 new Param("range", "Age beyond which no boosting occurs", new LongType()))),
         new Param("termVectors", "Whether/how term vectors should be indexed.",
                   new EnumType("terms", "Index terms and freqs only.",
                                "termsPositions", "Index terms, freqs and positions.",
@@ -387,63 +380,41 @@ public class RegisterFieldHandler extend
   }
 
   private FieldDef parseOneVirtualFieldType(Request r, IndexState state, Map<String,FieldDef> pendingFieldDefs, String name, JSONObject o) {
-    if (r.hasParam("expression")) {
-      String exprString = r.getString("expression");
-      Expression expr;
+    String exprString = r.getString("expression");
+    Expression expr;
 
-      try {
-        expr = JavascriptCompiler.compile(exprString);
-      } catch (ParseException pe) {
-        // Static error (e.g. bad JavaScript syntax):
-        r.fail("expression", "could not parse expression: " + pe, pe);
+    try {
+      expr = JavascriptCompiler.compile(exprString);
+    } catch (ParseException pe) {
+      // Static error (e.g. bad JavaScript syntax):
+      r.fail("expression", "could not parse expression: " + pe, pe);
 
-        // Dead code but compiler disagrees:
-        expr = null;
-      } catch (IllegalArgumentException iae) {
-        // Static error (e.g. bad JavaScript syntax):
-        r.fail("expression", "could not parse expression: " + iae, iae);
+      // Dead code but compiler disagrees:
+      expr = null;
+    } catch (IllegalArgumentException iae) {
+      // Static error (e.g. bad JavaScript syntax):
+      r.fail("expression", "could not parse expression: " + iae, iae);
 
-        // Dead code but compiler disagrees:
-        expr = null;
-      }
+      // Dead code but compiler disagrees:
+      expr = null;
+    }
 
-      Map<String,FieldDef> allFields = new HashMap<String,FieldDef>(state.getAllFields());
-      allFields.putAll(pendingFieldDefs);
+    Map<String,FieldDef> allFields = new HashMap<String,FieldDef>(state.getAllFields());
+    allFields.putAll(pendingFieldDefs);
 
-      ValueSource values;
-      try {
-        values = expr.getValueSource(new FieldDefBindings(allFields));
-      } catch (RuntimeException re) {
-        // Dynamic error (e.g. referred to a field that
-        // doesn't exist):
-        r.fail("expression", "could not evaluate expression: " + re, re);
+    ValueSource values;
+    try {
+      values = expr.getValueSource(new FieldDefBindings(allFields));
+    } catch (RuntimeException re) {
+      // Dynamic error (e.g. referred to a field that
+      // doesn't exist):
+      r.fail("expression", "could not evaluate expression: " + re, re);
 
-        // Dead code but compiler disagrees:
-        values = null;
-      }
-
-      return new FieldDef(name, null, "virtual", null, null, null, true, null, null, null, false, null, null, 0.0f, 0L, values);
-
-    } else {
-      // nocommit cutover all tests to expression fields and remove this hack:
-      Request r2 = r.getStruct("recencyScoreBlend");
-      String timeStampField = r2.getString("timeStampField");
-      FieldDef fd;
-      try {
-        fd = state.getField(timeStampField);
-      } catch (IllegalArgumentException iae) {
-        fd = pendingFieldDefs.get(timeStampField);
-        if (fd == null) {
-          r2.fail("timeStampField", "field \"" + timeStampField + "\" was not yet registered");
-        }
-      }
-      if (fd.fieldType.docValueType() != DocValuesType.NUMERIC) {
-        r2.fail("timeStampField", "field \"" + fd.name + "\" must be registered with type=long and sort=true");
-      }
-      float maxBoost = r2.getFloat("maxBoost");
-      long range = r2.getLong("range");
-      return new FieldDef(name, null, "virtual", null, null, null, true, null, null, null, false, null, fd.name, maxBoost, range, null);
+      // Dead code but compiler disagrees:
+      values = null;
     }
+
+    return new FieldDef(name, null, "virtual", null, null, null, true, null, null, null, false, null, values);
   }
 
   private FieldDef parseOneFieldType(Request r, IndexState state, Map<String,FieldDef> pendingFieldDefs, String name, JSONObject o) {
@@ -735,7 +706,7 @@ public class RegisterFieldHandler extend
     // nocommit facetsConfig.setIndexFieldName
     // nocommit facetsConfig.setRequireDimCount
 
-    return new FieldDef(name, ft, type, facet, pf, dvf, multiValued, sim, indexAnalyzer, searchAnalyzer, highlighted, liveValuesIDField, null, 0.0f, 0l, null);
+    return new FieldDef(name, ft, type, facet, pf, dvf, multiValued, sim, indexAnalyzer, searchAnalyzer, highlighted, liveValuesIDField, null);
   }
 
   /** Messy: we need this for indexed-but-not-tokenized

Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/SearchHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/SearchHandler.java?rev=1556627&r1=1556626&r2=1556627&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/SearchHandler.java (original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/SearchHandler.java Wed Jan  8 20:35:49 2014
@@ -115,7 +115,6 @@ import org.apache.lucene.server.FinishRe
 import org.apache.lucene.server.GlobalState;
 import org.apache.lucene.server.IndexState;
 import org.apache.lucene.server.MyIndexSearcher;
-import org.apache.lucene.server.RecencyBlendedFieldComparatorSource;
 import org.apache.lucene.server.SVJSONPassageFormatter;
 import org.apache.lucene.server.WholeMVJSONPassageFormatter;
 import org.apache.lucene.server.params.*;
@@ -668,8 +667,6 @@ public class SearchHandler extends Handl
 
         if (fd.valueSource != null) {
           sf = fd.valueSource.getSortField(sub.getBoolean("reverse"));
-        } else if (fd.blendFieldName != null) {
-          sf = new SortField(fd.name, new RecencyBlendedFieldComparatorSource(fd.blendFieldName, fd.blendMaxBoost, timeStamp, fd.blendRange), sub.getBoolean("reverse"));
         } else {
           if ((fd.fieldType != null && fd.fieldType.docValueType() == null) ||
               (fd.fieldType == null && fd.valueSource == null)) {
@@ -1738,7 +1735,7 @@ public class SearchHandler extends Handl
           values = null;
         }
 
-        FieldDef fd = new FieldDef(name, null, "virtual", null, null, null, true, null, null, null, false, null, null, 0.0f, 0L, values);
+        FieldDef fd = new FieldDef(name, null, "virtual", null, null, null, true, null, null, null, false, null, values);
 
         if (dynamicFields.put(name, fd) != null) {
           oneField.fail("name", "registered field or dynamic field \"" + name + "\" already exists");

Modified: lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestSearch.java?rev=1556627&r1=1556626&r2=1556627&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestSearch.java (original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestSearch.java Wed Jan  8 20:35:49 2014
@@ -279,8 +279,8 @@ public class TestSearch extends ServerBa
     curIndexName = "recency";
     File dir = new File(_TestUtil.getTempDir("recency"), "root");
     send("createIndex", "{rootDir: " + dir.getAbsolutePath() + "}");
-    send("startIndex", "{}");
-    send("registerFields", "{fields: {timestamp: {type: long, index: false, sort: true}, body: {type: text, analyzer: StandardAnalyzer}, blend: {type: virtual, recencyScoreBlend: {timeStampField: timestamp, maxBoost: 2.0, range: 30}}}}");
+    send("startIndex");
+    send("registerFields", "{fields: {timestamp: {type: long, index: false, sort: true}, body: {type: text, analyzer: StandardAnalyzer}}}");
 
     long t = System.currentTimeMillis()/1000;
     send("addDocument", "{fields: {body: 'this is some text', timestamp: " + (t-100) + "}}");
@@ -293,17 +293,23 @@ public class TestSearch extends ServerBa
       assertEquals(0, getInt(result, "hits[0].doc"));
       assertEquals(1, getInt(result, "hits[1].doc"));
 
-      // Relevance + recency changes the order:
-      result = send("search", "{queryText: text, sort: {fields: [{field: blend}]}, searcher: {indexGen: " + gen + "}}");
+      // Blended relevance + recency changes the order:
+      t = System.currentTimeMillis()/1000;
+      result = send("search",
+                    "{queryText: text, virtualFields: [" + 
+                     "{name: age,   expression: '" + t + " - timestamp'}, " + 
+                     "{name: boost, expression: '(age >= 30) ? 1.0 : (2.0 * (30. - age) / 30)'}, " +
+                     "{name: blend, expression: 'boost * _score'}], " + 
+                    " sort: {fields: [{field: blend, reverse: true}]}, retrieveFields: [age, boost], searcher: {indexGen: " + gen + "}}");
       assertEquals(2, getInt(result, "totalHits"));
       assertEquals(1, getInt(result, "hits[0].doc"));
       assertEquals(0, getInt(result, "hits[1].doc"));
+      assertTrue(getFloat(result, "hits[0].fields.boost") > 1.0f);
+      assertEquals(1.0, getFloat(result, "hits[1].fields.boost"), 0.0001f);
 
       // Make sure this survives restart:
-      send("stopIndex", "{}");
-      send("startIndex", "{}");
+      send("stopIndex");
+      send("startIndex");
     }
   }
-
-  // nocommit test grouping
 }