You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2016/10/26 13:58:11 UTC
lucene-solr git commit: fixed target docid check in LTRRescorer and unit test for extracting… (#176)
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-8542-v2 be3b8434c -> 4f4454cde
fixed target docid check in LTRRescorer and unit test for extracting\u2026 (#176)
* fixed target docid check in LTRRescorer and unit test for extracting features when there are multiple segments
(cherry picked from commit dfa0e2cc3baa72cec1b6329891d14b451effbd74)
* renamed unit test file and added comments
(cherry picked from commit fcfb661574c973b8963401d58145ddaf1942b511)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4f4454cd
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4f4454cd
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4f4454cd
Branch: refs/heads/jira/solr-8542-v2
Commit: 4f4454cde71bfcefc9c7893d452bb5d22f8a0b63
Parents: be3b843
Author: nsanthapuri <ns...@bloomberg.net>
Authored: Mon Oct 24 14:36:41 2016 -0400
Committer: Christine Poerschke <cp...@apache.org>
Committed: Wed Oct 26 08:52:52 2016 -0500
----------------------------------------------------------------------
solr/contrib/ltr/example/solrconfig.xml | 4 +-
.../java/org/apache/solr/ltr/LTRRescorer.java | 2 +-
.../featureExamples/comp_features.json | 37 +++++++
.../solr/collection1/conf/schema-ltr.xml | 1 +
.../collection1/conf/solrconfig-multiseg.xml | 63 +++++++++++
...stFeatureExtractionFromMultipleSegments.java | 106 +++++++++++++++++++
6 files changed, 210 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f4454cd/solr/contrib/ltr/example/solrconfig.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/example/solrconfig.xml b/solr/contrib/ltr/example/solrconfig.xml
index 94f9b33..5557991 100644
--- a/solr/contrib/ltr/example/solrconfig.xml
+++ b/solr/contrib/ltr/example/solrconfig.xml
@@ -171,11 +171,11 @@
Even older versions of Lucene used LogDocMergePolicy.
-->
<!--
- <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
+ <mergePolicyFactory class="org.apache.lucene.index.TieredMergePolicyFactory">
<int name="maxMergeAtOnce">10</int>
<int name="segmentsPerTier">10</int>
<double name="noCFSRatio">0.1</double>
- </mergePolicy>
+ </mergePolicyFactory>
-->
<!-- Merge Factor
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f4454cd/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
index d1a7f69..607a9c8 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
@@ -235,7 +235,7 @@ public class LTRRescorer extends Rescorer {
final LeafReaderContext atomicContext = leafContexts.get(n);
final int deBasedDoc = docid - atomicContext.docBase;
final ModelScorer r = modelWeight.scorer(atomicContext);
- if ( (r == null) || (r.iterator().advance(deBasedDoc) != docid) ) {
+ if ( (r == null) || (r.iterator().advance(deBasedDoc) != deBasedDoc) ) {
return new LTRScoringQuery.FeatureInfo[0];
} else {
if (originalDocScore != null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f4454cd/solr/contrib/ltr/src/test-files/featureExamples/comp_features.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/comp_features.json b/solr/contrib/ltr/src/test-files/featureExamples/comp_features.json
new file mode 100644
index 0000000..8d75739
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/comp_features.json
@@ -0,0 +1,37 @@
+[
+{ "name":"origScore",
+ "class":"org.apache.solr.ltr.feature.OriginalScoreFeature",
+ "params":{},
+ "store": "feature-store-6"
+},
+{
+ "name": "descriptionTermFreq",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": { "q" : "{!func}termfreq(description,${user_text})" },
+ "store": "feature-store-6"
+},
+{
+ "name": "popularity",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": { "q" : "{!func}normHits"},
+ "store": "feature-store-6"
+},
+{
+ "name": "isPopular",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": {"fq" : ["{!field f=popularity}201"] },
+ "store": "feature-store-6"
+},
+{
+ "name": "queryPartialMatch2",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": {"q": "{!dismax qf=description mm=2}${user_text}" },
+ "store": "feature-store-6"
+},
+{
+ "name": "queryPartialMatch2.1",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": {"q": "{!dismax qf=description mm=2}${user_text}" },
+ "store": "feature-store-6"
+}
+]
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f4454cd/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
index 9492508..15cf140 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
@@ -23,6 +23,7 @@
<field name="description" type="text_general" indexed="true" stored="true"/>
<field name="keywords" type="text_general" indexed="true" stored="true" multiValued="true"/>
<field name="popularity" type="int" indexed="true" stored="true" />
+ <field name="normHits" type="float" indexed="true" stored="true" />
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f4454cd/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-multiseg.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-multiseg.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-multiseg.xml
new file mode 100644
index 0000000..82f6b4b
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-multiseg.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<config>
+ <luceneMatchVersion>6.0.0</luceneMatchVersion>
+ <dataDir>${solr.data.dir:}</dataDir>
+ <directoryFactory name="DirectoryFactory"
+ class="${solr.directoryFactory:solr.RAMDirectoryFactory}" />
+
+ <schemaFactory class="ClassicIndexSchemaFactory" />
+
+
+ <!-- Query parser used to rerank top docs with a provided model -->
+ <queryParser name="ltr"
+ class="org.apache.solr.search.LTRQParserPlugin" />
+
+ <maxBufferedDocs>1</maxBufferedDocs>
+ <mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory">
+ <int name="maxMergeAtOnce">10</int>
+ <int name="segmentsPerTier">1000</int>
+ </mergePolicyFactory>
+ <!-- add a transformer that will encode the document features in the response.
+ For each document the transformer will add the features as an extra field
+ in the response. The name of the field we will be the the name of the transformer
+ enclosed between brackets (in this case [fv]). In order to get the feature
+ vector you will have to specify that you want the field (e.g., fl="*,[fv]) -->
+ <transformer name="features"
+ class="org.apache.solr.response.transform.LTRFeatureLoggerTransformerFactory" />
+
+ <updateHandler class="solr.DirectUpdateHandler2">
+ <autoCommit>
+ <maxTime>15000</maxTime>
+ <openSearcher>false</openSearcher>
+ </autoCommit>
+ <autoSoftCommit>
+ <maxTime>1000</maxTime>
+ </autoSoftCommit>
+ <updateLog>
+ <str name="dir">${solr.data.dir:}</str>
+ </updateLog>
+ </updateHandler>
+
+ <requestHandler name="/update" class="solr.UpdateRequestHandler" />
+ <!-- Query request handler managing models and features -->
+ <requestHandler name="/query" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="wt">json</str>
+ <str name="indent">true</str>
+ <str name="df">id</str>
+ </lst>
+ </requestHandler>
+
+</config>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f4454cd/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java
new file mode 100644
index 0000000..7dbd95d
--- /dev/null
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureExtractionFromMultipleSegments.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.feature;
+
+import java.security.SecureRandom;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.ltr.TestRerankBase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.noggit.ObjectBuilder;
+
+
+public class TestFeatureExtractionFromMultipleSegments extends TestRerankBase {
+ static final String AB = "abcdefghijklmnopqrstuvwxyz";
+ static SecureRandom rnd = new SecureRandom();
+
+ static String randomString( int len ){
+ StringBuilder sb = new StringBuilder( len );
+ for( int i = 0; i < len; i++ )
+ sb.append( AB.charAt( rnd.nextInt(AB.length()) ) );
+ return sb.toString();
+ }
+
+ @BeforeClass
+ public static void before() throws Exception {
+ // solrconfig-multiseg.xml contains the merge policy to restrict merging
+ setuptest("solrconfig-multiseg.xml", "schema-ltr.xml");
+ // index 400 documents
+ for(int i = 0; i<400;i=i+20) {
+ assertU(adoc("id", new Integer(i).toString(), "popularity", "201", "description", "apple is a company " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+1).toString(), "popularity", "201", "description", "d " + randomString(i%6+3), "normHits", "0.11"));
+
+ assertU(adoc("id", new Integer(i+2).toString(), "popularity", "201", "description", "apple is a company too " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+3).toString(), "popularity", "201", "description", "new york city is big apple " + randomString(i%6+3), "normHits", "0.11"));
+
+ assertU(adoc("id", new Integer(i+6).toString(), "popularity", "301", "description", "function name " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+7).toString(), "popularity", "301", "description", "function " + randomString(i%6+3), "normHits", "0.1"));
+
+ assertU(adoc("id", new Integer(i+8).toString(), "popularity", "301", "description", "This is a sample function for testing " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+9).toString(), "popularity", "301", "description", "Function to check out stock prices "+randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+10).toString(),"popularity", "301", "description", "Some descriptions "+randomString(i%6+3), "normHits", "0.1"));
+
+ assertU(adoc("id", new Integer(i+11).toString(), "popularity", "201", "description", "apple apple is a company " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+12).toString(), "popularity", "201", "description", "Big Apple is New York.", "normHits", "0.01"));
+ assertU(adoc("id", new Integer(i+13).toString(), "popularity", "201", "description", "New some York is Big. "+ randomString(i%6+3), "normHits", "0.1"));
+
+ assertU(adoc("id", new Integer(i+14).toString(), "popularity", "201", "description", "apple apple is a company " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+15).toString(), "popularity", "201", "description", "Big Apple is New York.", "normHits", "0.01"));
+ assertU(adoc("id", new Integer(i+16).toString(), "popularity", "401", "description", "barack h", "normHits", "0.0"));
+ assertU(adoc("id", new Integer(i+17).toString(), "popularity", "201", "description", "red delicious apple " + randomString(i%6+3), "normHits", "0.1"));
+ assertU(adoc("id", new Integer(i+18).toString(), "popularity", "201", "description", "nyc " + randomString(i%6+3), "normHits", "0.11"));
+ }
+
+ assertU(commit());
+
+ loadFeatures("comp_features.json");
+ }
+
+ @AfterClass
+ public static void after() throws Exception {
+ aftertest();
+ }
+
+ @Test
+ public void testFeatureExtractionFromMultipleSegments() throws Exception {
+
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("{!edismax qf='description^1' boost='sum(product(pow(normHits, 0.7), 1600), .1)' v='apple'}");
+ // request 100 rows, if any rows are fetched from the second or subsequent segments the tests should succeed if LTRRescorer::extractFeaturesInfo() advances the doc iterator properly
+ int numRows = 100;
+ query.add("rows", (new Integer(numRows)).toString());
+ query.add("wt", "json");
+ query.add("fq", "popularity:201");
+ query.add("fl", "*, score,id,normHits,description,fv:[features store='feature-store-6' format='dense' efi.user_text='apple']");
+ String res = restTestHarness.query("/query" + query.toQueryString());
+
+ Map<String,Object> resultJson = (Map<String,Object>) ObjectBuilder.fromJSON(res);
+
+ List<Map<String,Object>> docs = (List<Map<String,Object>>)((Map<String,Object>)resultJson.get("response")).get("docs");
+ int passCount = 0;
+ for (final Map<String,Object> doc : docs) {
+ String features = (String)doc.get("fv");
+ assert(features.length() > 0);
+ ++passCount;
+ }
+ assert(passCount == numRows);
+ }
+}