You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2014/12/22 08:05:35 UTC
svn commit: r1647253 [1/2] - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/core/
core/src/java/org/apache/solr/handler/component/
core/src/java/org/apache/solr/search/
core/src/java/org/apache/solr/search/stats/ core/src/java/org/apache/s...
Author: anshum
Date: Mon Dec 22 07:05:34 2014
New Revision: 1647253
URL: http://svn.apache.org/r1647253
Log:
SOLR-1632: Distributed IDF, finally.
Added:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CachedSearcherStats.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactSharedStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsCache.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsUtil.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/TermStats.java (with props)
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/package.html (with props)
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestBaseStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestExactSharedStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestExactStatsCache.java (with props)
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestLRUStatsCache.java (with props)
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrConfig.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrCore.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Dec 22 07:05:34 2014
@@ -244,6 +244,9 @@ New Features
* SOLR-6801: Load RequestHandler from blob store (Noble Paul)
+* SOLR-1632: Support Distributed IDF (Andrzej Bialecki, Mark Miller, Yonik Seeley,
+ Robert Muir, Markus Jelsma, Vitaliy Zhovtyuk, Anshum Gupta)
+
Bug Fixes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrConfig.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrConfig.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrConfig.java Mon Dec 22 07:05:34 2014
@@ -36,6 +36,7 @@ import org.apache.solr.search.CacheConfi
import org.apache.solr.search.FastLRUCache;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.ValueSourceParser;
+import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.servlet.SolrRequestParsers;
import org.apache.solr.spelling.QueryConverter;
import org.apache.solr.update.SolrIndexConfig;
@@ -251,7 +252,7 @@ public class SolrConfig extends Config i
jmxConfig = new JmxConfiguration(false, null, null, null);
}
maxWarmingSearchers = getInt("query/maxWarmingSearchers",Integer.MAX_VALUE);
- slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
+ slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
for (SolrPluginInfo plugin : plugins) loadPluginInfo(plugin);
updateHandlerInfo = loadUpdatehandlerInfo();
@@ -312,6 +313,7 @@ public class SolrConfig extends Config i
.add(new SolrPluginInfo(IndexSchemaFactory.class, "schemaFactory", REQUIRE_CLASS))
.add(new SolrPluginInfo(RestManager.class, "restManager"))
.add(new SolrPluginInfo(InitParams.class, InitParams.TYPE, MULTI_OK))
+ .add(new SolrPluginInfo(StatsCache.class, "statsCache", REQUIRE_CLASS))
.build();
public static class SolrPluginInfo{
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrCore.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrCore.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrCore.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrCore.java Mon Dec 22 07:05:34 2014
@@ -17,44 +17,6 @@
package org.apache.solr.core;
-import java.io.Closeable;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.Writer;
-import java.lang.reflect.Constructor;
-import java.net.URL;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.NoSuchFileException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.IdentityHashMap;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.StringTokenizer;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.ReentrantLock;
-
import org.apache.commons.io.FileUtils;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader;
@@ -80,8 +42,6 @@ import org.apache.solr.core.DirectoryFac
import org.apache.solr.handler.ReplicationHandler;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.SnapPuller;
-import org.apache.solr.handler.SolrConfigHandler;
-import org.apache.solr.handler.UpdateRequestHandler;
import org.apache.solr.handler.admin.ShowFileRequestHandler;
import org.apache.solr.handler.component.DebugComponent;
import org.apache.solr.handler.component.ExpandComponent;
@@ -119,6 +79,8 @@ import org.apache.solr.search.QParserPlu
import org.apache.solr.search.SolrFieldCacheMBean;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.ValueSourceParser;
+import org.apache.solr.search.stats.LocalStatsCache;
+import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.update.DefaultSolrCoreState;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.SolrCoreState;
@@ -143,6 +105,43 @@ import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.Writer;
+import java.lang.reflect.Constructor;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.NoSuchFileException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.ReentrantLock;
/**
*
@@ -164,6 +163,8 @@ public final class SolrCore implements S
private boolean isReloaded = false;
+ private StatsCache statsCache;
+
private final SolrConfig solrConfig;
private final SolrResourceLoader resourceLoader;
private volatile IndexSchema schema;
@@ -818,6 +819,8 @@ public final class SolrCore implements S
// Handle things that should eventually go away
initDeprecatedSupport();
+ statsCache = initStatsCache();
+
// cause the executor to stall so firstSearcher events won't fire
// until after inform() has been called for all components.
// searchExecutor must be single-threaded for this to work
@@ -970,6 +973,27 @@ public final class SolrCore implements S
}
return factory.getCodec();
}
+
+ private StatsCache initStatsCache() {
+ final StatsCache cache;
+ PluginInfo pluginInfo = solrConfig.getPluginInfo(StatsCache.class.getName());
+ if (pluginInfo != null && pluginInfo.className != null && pluginInfo.className.length() > 0) {
+ cache = createInitInstance(pluginInfo, StatsCache.class, null,
+ LocalStatsCache.class.getName());
+ log.info("Using statsCache impl: " + cache.getClass().getName());
+ } else {
+ log.info("Using default statsCache cache: " + LocalStatsCache.class.getName());
+ cache = new LocalStatsCache();
+ }
+ return cache;
+ }
+
+ /**
+ * Get the StatsCache.
+ */
+ public StatsCache getStatsCache() {
+ return statsCache;
+ }
/**
* Load the request processors
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java Mon Dec 22 07:05:34 2014
@@ -17,22 +17,8 @@
package org.apache.solr.handler.component;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
@@ -52,7 +38,13 @@ import org.apache.solr.client.solrj.Solr
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.*;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.CursorMarkParams;
+import org.apache.solr.common.params.GroupParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.MoreLikeThisParams;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
@@ -71,11 +63,11 @@ import org.apache.solr.search.Grouping;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.QueryParsing;
+import org.apache.solr.search.RankQuery;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SortSpec;
-import org.apache.solr.search.RankQuery;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification;
@@ -96,8 +88,25 @@ import org.apache.solr.search.grouping.e
import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
+import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.util.SolrPluginUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
/**
* TODO!
@@ -108,6 +117,7 @@ import java.util.Collections;
public class QueryComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "query";
+ private static final Logger LOG = LoggerFactory.getLogger(QueryComponent.class);
@Override
public void prepare(ResponseBuilder rb) throws IOException
@@ -271,6 +281,8 @@ public class QueryComponent extends Sear
@Override
public void process(ResponseBuilder rb) throws IOException
{
+ LOG.debug("process: {}", rb.req.getParams());
+
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
@@ -279,6 +291,19 @@ public class QueryComponent extends Sear
}
SolrIndexSearcher searcher = req.getSearcher();
+ StatsCache statsCache = req.getCore().getStatsCache();
+
+ int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
+ if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
+ statsCache.returnLocalStats(rb, searcher);
+ return;
+ }
+ // check if we need to update the local copy of global dfs
+ if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
+ // retrieve from request and update local cache
+ statsCache.receiveGlobalStats(req);
+ }
+
if (rb.getQueryCommand().getOffset() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
}
@@ -329,6 +354,9 @@ public class QueryComponent extends Sear
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
+
+ req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));
+
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
//
@@ -479,8 +507,8 @@ public class QueryComponent extends Sear
}
// normal search result
- searcher.search(result,cmd);
- rb.setResult( result );
+ searcher.search(result, cmd);
+ rb.setResult(result);
ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
@@ -637,7 +665,7 @@ public class QueryComponent extends Sear
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) {
nextStage = ResponseBuilder.STAGE_PARSE_QUERY;
} else if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
- createDistributedIdf(rb);
+ createDistributedStats(rb);
nextStage = ResponseBuilder.STAGE_TOP_GROUPS;
} else if (rb.stage < ResponseBuilder.STAGE_TOP_GROUPS) {
nextStage = ResponseBuilder.STAGE_TOP_GROUPS;
@@ -668,7 +696,7 @@ public class QueryComponent extends Sear
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY)
return ResponseBuilder.STAGE_PARSE_QUERY;
if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
- createDistributedIdf(rb);
+ createDistributedStats(rb);
return ResponseBuilder.STAGE_EXECUTE_QUERY;
}
if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) return ResponseBuilder.STAGE_EXECUTE_QUERY;
@@ -713,6 +741,10 @@ public class QueryComponent extends Sear
mergeIds(rb, sreq);
}
+ if ((sreq.purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
+ updateStats(rb, sreq);
+ }
+
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
returnFields(rb, sreq);
}
@@ -786,8 +818,19 @@ public class QueryComponent extends Sear
}
}
- private void createDistributedIdf(ResponseBuilder rb) {
- // TODO
+ private void createDistributedStats(ResponseBuilder rb) {
+ StatsCache cache = rb.req.getCore().getStatsCache();
+ if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
+ ShardRequest sreq = cache.retrieveStatsRequest(rb);
+ if (sreq != null) {
+ rb.addRequest(this, sreq);
+ }
+ }
+ }
+
+ private void updateStats(ResponseBuilder rb, ShardRequest sreq) {
+ StatsCache cache = rb.req.getCore().getStatsCache();
+ cache.mergeToGlobalStats(rb.req, sreq.responses);
}
private void createMainQuery(ResponseBuilder rb) {
@@ -836,6 +879,12 @@ public class QueryComponent extends Sear
sreq.params.set(ResponseBuilder.FIELD_SORT_VALUES,"true");
+ // TODO: should this really sendGlobalDfs if just includeScore?
+ if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
+ sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
+ StatsCache statsCache = rb.req.getCore().getStatsCache();
+ statsCache.sendGlobalStats(rb, sreq);
+ }
boolean shardQueryIncludeScore = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0 || rb.getSortSpec().includesScore();
if (distribSinglePass) {
String[] fls = rb.req.getParams().getParams(CommonParams.FL);
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java Mon Dec 22 07:05:34 2014
@@ -299,6 +299,7 @@ public class SearchHandler extends Reque
params.remove("indent");
params.remove(CommonParams.HEADER_ECHO_PARAMS);
params.set(ShardParams.IS_SHARD, true); // a sub (shard) request
+ params.set(ShardParams.SHARDS_PURPOSE, sreq.purpose);
params.set(ShardParams.SHARD_URL, shard); // so the shard knows what was asked
if (rb.requestInfo != null) {
// we could try and detect when this is needed, but it could be tricky
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java Mon Dec 22 07:05:34 2014
@@ -40,6 +40,8 @@ public class ShardRequest {
public final static int PURPOSE_GET_TOP_GROUPS =0x800;
public final static int PURPOSE_GET_MLT_RESULTS =0x1000;
public final static int PURPOSE_REFINE_PIVOT_FACETS =0x2000;
+ public final static int PURPOSE_SET_TERM_STATS =0x4000;
+ public final static int PURPOSE_GET_TERM_STATS = 0x8000;
public int purpose; // the purpose of this request
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java Mon Dec 22 07:05:34 2014
@@ -59,11 +59,13 @@ import org.apache.lucene.index.StorableF
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSet;
@@ -81,6 +83,7 @@ import org.apache.lucene.search.SimpleCo
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TimeLimitingCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
@@ -111,6 +114,7 @@ import org.apache.solr.request.UnInverte
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.stats.StatsSource;
import org.apache.solr.update.SolrIndexConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -125,6 +129,7 @@ import org.slf4j.LoggerFactory;
*/
public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrInfoMBean {
+ public static final String STATS_SOURCE = "org.apache.solr.stats_source";
// These should *only* be used for debugging or monitoring purposes
public static final AtomicLong numOpens = new AtomicLong();
public static final AtomicLong numCloses = new AtomicLong();
@@ -155,7 +160,7 @@ public class SolrIndexSearcher extends I
private final SolrCache<String,UnInvertedField> fieldValueCache;
private final LuceneQueryOptimizer optimizer;
-
+
// map of generic caches - not synchronized since it's read-only after the constructor.
private final HashMap<String, SolrCache> cacheMap;
private static final HashMap<String, SolrCache> noGenericCaches=new HashMap<>(0);
@@ -169,7 +174,7 @@ public class SolrIndexSearcher extends I
private final Collection<String> fieldNames;
private Collection<String> storedHighlightFieldNames;
private DirectoryFactory directoryFactory;
-
+
private final LeafReader leafReader;
// only for addIndexes etc (no fieldcache)
private final DirectoryReader rawReader;
@@ -322,7 +327,7 @@ public class SolrIndexSearcher extends I
// TODO: This option has been dead/noop since 3.1, should we re-enable it?
// optimizer = solrConfig.filtOptEnabled ? new LuceneQueryOptimizer(solrConfig.filtOptCacheSize,solrConfig.filtOptThreshold) : null;
optimizer = null;
-
+
fieldNames = new HashSet<>();
fieldInfos = leafReader.getFieldInfos();
for(FieldInfo fieldInfo : fieldInfos) {
@@ -332,6 +337,44 @@ public class SolrIndexSearcher extends I
// do this at the end since an exception in the constructor means we won't close
numOpens.incrementAndGet();
}
+
+ /*
+ * Override these two methods to provide a way to use global collection stats.
+ */
+ @Override
+ public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
+ SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
+ if (reqInfo != null) {
+ StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext()
+ .get(STATS_SOURCE);
+ if (statsSrc != null) {
+ return statsSrc.termStatistics(this, term, context);
+ }
+ }
+ return localTermStatistics(term, context);
+ }
+
+ @Override
+ public CollectionStatistics collectionStatistics(String field)
+ throws IOException {
+ SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
+ if (reqInfo != null) {
+ StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext()
+ .get(STATS_SOURCE);
+ if (statsSrc != null) {
+ return statsSrc.collectionStatistics(this, field);
+ }
+ }
+ return localCollectionStatistics(field);
+ }
+
+ public TermStatistics localTermStatistics(Term term, TermContext context) throws IOException {
+ return super.termStatistics(term, context);
+ }
+
+ public CollectionStatistics localCollectionStatistics(String field) throws IOException {
+ return super.collectionStatistics(field);
+ }
public boolean isCachingEnabled() { return cachingEnabled; }
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CachedSearcherStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CachedSearcherStats.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CachedSearcherStats.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CachedSearcherStats.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,23 @@
+package org.apache.solr.search.stats;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public abstract class CachedSearcherStats {
+
+
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,84 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.CollectionStatistics;
+
+/**
+ * Modifiable version of {@link CollectionStatistics} useful for
+ * aggregation of per-shard stats.
+ */
+public class CollectionStats {
+ public final String field;
+ public long maxDoc;
+ public long docCount;
+ public long sumTotalTermFreq;
+ public long sumDocFreq;
+
+ public CollectionStats(String field) {
+ this.field = field;
+ }
+
+ public CollectionStats(String field, long maxDoc, long docCount,
+ long sumTotalTermFreq, long sumDocFreq) {
+ this.field = field;
+ this.maxDoc = maxDoc;
+ this.docCount = docCount;
+ this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
+ }
+
+ public CollectionStats(CollectionStatistics stats) {
+ this.field = stats.field();
+ this.maxDoc = stats.maxDoc();
+ this.docCount = stats.docCount();
+ this.sumTotalTermFreq = stats.sumTotalTermFreq();
+ this.sumDocFreq = stats.sumDocFreq();
+ }
+
+ /*
+ * If any stats being added uses -1 then reset the total stats to -1
+ * as that parameter becomes unknowable.
+ */
+ public void add(CollectionStats stats) {
+ this.maxDoc += stats.maxDoc;
+ if (this.docCount < 0 || stats.docCount < 0) {
+ this.docCount = -1;
+ } else {
+ this.docCount += stats.docCount;
+ }
+ if (this.sumTotalTermFreq < 0 || stats.sumTotalTermFreq < 0) {
+ this.sumTotalTermFreq = -1;
+ } else {
+ this.sumTotalTermFreq += stats.sumTotalTermFreq;
+ }
+ if (this.sumDocFreq < 0 || stats.sumDocFreq < 0) {
+ this.sumDocFreq = -1;
+ } else {
+ this.sumDocFreq += stats.sumDocFreq;
+ }
+ }
+
+ public CollectionStatistics toCollectionStatistics() {
+ return new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
+ }
+
+ public String toString() {
+ return StatsUtil.colStatsToString(this);
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactSharedStatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactSharedStatsCache.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactSharedStatsCache.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactSharedStatsCache.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,88 @@
+package org.apache.solr.search.stats;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.request.SolrQueryRequest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
+
+
+public class ExactSharedStatsCache extends ExactStatsCache {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(ExactSharedStatsCache.class);
+
+ // local stats obtained from shard servers
+ private final Map<String,Map<String,TermStats>> perShardTermStats = new ConcurrentHashMap<>();
+ private final Map<String,Map<String,CollectionStats>> perShardColStats = new ConcurrentHashMap<>();
+ // global stats synchronized from the master
+ private final Map<String,TermStats> currentGlobalTermStats = new ConcurrentHashMap<>();
+ private final Map<String,CollectionStats> currentGlobalColStats = new ConcurrentHashMap<>();
+
+ @Override
+ public StatsSource get(SolrQueryRequest req) {
+ LOG.debug("total={}, cache {}", currentGlobalColStats, currentGlobalTermStats.size());
+ return new ExactStatsSource(currentGlobalTermStats, currentGlobalColStats);
+ }
+
+ @Override
+ public void init(PluginInfo info) {}
+
+ @Override
+ protected void addToPerShardColStats(SolrQueryRequest req, String shard,
+ Map<String,CollectionStats> colStats) {
+ perShardColStats.put(shard, colStats);
+ }
+
+ @Override
+ protected void printStats(SolrQueryRequest req) {
+ LOG.debug("perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats);
+ }
+
+ @Override
+ protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
+ Map<String,TermStats> termStats = StatsUtil
+ .termStatsMapFromString(termStatsString);
+ if (termStats != null) {
+ perShardTermStats.put(shard, termStats);
+ }
+ }
+
+ protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb, String shard) {
+ return perShardColStats.get(shard);
+ }
+
+ protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) {
+ Map<String,TermStats> cache = perShardTermStats.get(shard);
+ return cache.get(t);
+ }
+
+ protected void addToGlobalColStats(SolrQueryRequest req,
+ Entry<String,CollectionStats> e) {
+ currentGlobalColStats.put(e.getKey(), e.getValue());
+ }
+
+ protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
+ currentGlobalTermStats.put(e.getKey(), e.getValue());
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,352 @@
+package org.apache.solr.search.stats;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.handler.component.ShardResponse;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+/**
+ * This class implements exact caching of statistics. It requires an additional
+ * round-trip to parse query at shard servers, and return term statistics for
+ * query terms (and collection statistics for term fields).
+ */
+public class ExactStatsCache extends StatsCache {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(ExactStatsCache.class);
+
+ // experimenting with strategy that takes more RAM, but also doesn't share memory
+ // across threads
+ private static final String CURRENT_GLOBAL_COL_STATS = "org.apache.solr.stats.currentGlobalColStats";
+ private static final String CURRENT_GLOBAL_TERM_STATS = "org.apache.solr.stats.currentGlobalTermStats";
+ private static final String PER_SHARD_TERM_STATS = "org.apache.solr.stats.perShardTermStats";
+ private static final String PER_SHARD_COL_STATS = "org.apache.solr.stats.perShardColStats";
+
+ @Override
+ public StatsSource get(SolrQueryRequest req) {
+ Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
+ Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
+ if (currentGlobalColStats == null) {
+ currentGlobalColStats = Collections.emptyMap();
+ }
+ if (currentGlobalTermStats == null) {
+ currentGlobalTermStats = Collections.emptyMap();
+ }
+ LOG.debug("Returning StatsSource. Collection stats={}, Term stats size= {}", currentGlobalColStats, currentGlobalTermStats.size());
+ return new ExactStatsSource(currentGlobalTermStats, currentGlobalColStats);
+ }
+
+ @Override
+ public void init(PluginInfo info) {}
+
+ @Override
+ public ShardRequest retrieveStatsRequest(ResponseBuilder rb) {
+ ShardRequest sreq = new ShardRequest();
+ sreq.purpose = ShardRequest.PURPOSE_GET_TERM_STATS;
+ sreq.params = new ModifiableSolrParams(rb.req.getParams());
+ // don't pass through any shards param
+ sreq.params.remove(ShardParams.SHARDS);
+ return sreq;
+ }
+
+ @Override
+ public void mergeToGlobalStats(SolrQueryRequest req,
+ List<ShardResponse> responses) {
+ for (ShardResponse r : responses) {
+ LOG.debug("Merging to global stats, shard={}, response={}", r.getShard(), r.getSolrResponse().getResponse());
+ String shard = r.getShard();
+ SolrResponse res = r.getSolrResponse();
+ NamedList<Object> nl = res.getResponse();
+
+ // TODO: nl == null if not all shards respond (no server hosting shard)
+ String termStatsString = (String) nl.get(TERM_STATS_KEY);
+
+ if (termStatsString != null) {
+ addToPerShardTermStats(req, shard, termStatsString);
+ }
+ List<Object> terms = nl.getAll(TERMS_KEY);
+ if (terms != null) {
+ req.getContext().put(TERMS_KEY, terms);
+ }
+ String colStatsString = (String) nl.get(COL_STATS_KEY);
+ if (colStatsString != null) {
+ Map<String,CollectionStats> colStats = StatsUtil
+ .colStatsMapFromString(colStatsString);
+ if (colStats != null) {
+ addToPerShardColStats(req, shard, colStats);
+ }
+ }
+ }
+ if (LOG.isDebugEnabled()) printStats(req);
+ }
+
+ protected void addToPerShardColStats(SolrQueryRequest req, String shard,
+ Map<String,CollectionStats> colStats) {
+ Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
+ if (perShardColStats == null) {
+ perShardColStats = new HashMap<String,Map<String,CollectionStats>>();
+ req.getContext().put(PER_SHARD_COL_STATS, perShardColStats);
+ }
+ perShardColStats.put(shard, colStats);
+ }
+
+ protected void printStats(SolrQueryRequest req) {
+ Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
+ if (perShardTermStats == null) {
+ perShardTermStats = Collections.emptyMap();
+ }
+ Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
+ if (perShardColStats == null) {
+ perShardColStats = Collections.emptyMap();
+ }
+ LOG.debug("perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats);
+ }
+
+ protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
+ Map<String,TermStats> termStats = StatsUtil
+ .termStatsMapFromString(termStatsString);
+ if (termStats != null) {
+ Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
+ if (perShardTermStats == null) {
+ perShardTermStats = new HashMap<String,Map<String,TermStats>>();
+ req.getContext().put(PER_SHARD_TERM_STATS, perShardTermStats);
+ }
+ perShardTermStats.put(shard, termStats);
+ }
+ }
+
+ @Override
+ public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
+ Query q = rb.getQuery();
+ try {
+ q = q.rewrite(searcher.getIndexReader());
+ HashSet<Term> terms = new HashSet<Term>();
+ q.extractTerms(terms);
+ IndexReaderContext context = searcher.getTopReaderContext();
+ HashMap<String,TermStats> statsMap = new HashMap<String,TermStats>();
+ HashMap<String,CollectionStats> colMap = new HashMap<String,CollectionStats>();
+ for (Term t : terms) {
+ TermContext termContext = TermContext.build(context, t);
+
+ TermStatistics tst = searcher.localTermStatistics(t, termContext);
+ if (tst.docFreq() == 0) { // skip terms that are not present here
+ continue;
+ }
+
+ statsMap.put(t.toString(), new TermStats(t.field(), tst));
+ rb.rsp.add(TERMS_KEY, t.toString());
+ if (!colMap.containsKey(t.field())) { // collection stats for this field
+ colMap.put(
+ t.field(),
+ new CollectionStats(searcher.localCollectionStatistics(t.field())));
+ }
+ }
+
+ String termStatsString = StatsUtil.termStatsMapToString(statsMap);
+ rb.rsp.add(TERM_STATS_KEY, termStatsString);
+ String colStatsString = StatsUtil.colStatsMapToString(colMap);
+ rb.rsp.add(COL_STATS_KEY, colStatsString);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("termStats=" + termStatsString + ", collectionStats="
+ + colStatsString + ", terms=" + terms + ", numDocs="
+ + searcher.maxDoc());
+ }
+ } catch (IOException e) {
+ LOG.error("Error collecting local stats, query='" + q.toString() + "'", e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Error collecting local stats.", e);
+ }
+ }
+
+ @Override
+ public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
+ outgoing.purpose |= ShardRequest.PURPOSE_SET_TERM_STATS;
+ ModifiableSolrParams params = outgoing.params;
+ List<String> terms = (List<String>) rb.req.getContext().get(TERMS_KEY);
+ if (terms != null) {
+ Set<String> fields = new HashSet<String>();
+ for (String t : terms) {
+ String[] fv = t.split(":");
+ fields.add(fv[0]);
+ }
+ Map<String,TermStats> globalTermStats = new HashMap<String,TermStats>();
+ Map<String,CollectionStats> globalColStats = new HashMap<String,CollectionStats>();
+ // aggregate collection stats, only for the field in terms
+
+ for (String shard : rb.shards) {
+ Map<String,CollectionStats> s = getPerShardColStats(rb, shard);
+ if (s == null) {
+ continue;
+ }
+ for (Entry<String,CollectionStats> e : s.entrySet()) {
+ if (!fields.contains(e.getKey())) { // skip non-relevant fields
+ continue;
+ }
+ CollectionStats g = globalColStats.get(e.getKey());
+ if (g == null) {
+ g = new CollectionStats(e.getKey());
+ globalColStats.put(e.getKey(), g);
+ }
+ g.add(e.getValue());
+ }
+ }
+ params.add(COL_STATS_KEY, StatsUtil.colStatsMapToString(globalColStats));
+ // sum up only from relevant shards
+ for (String t : terms) {
+ params.add(TERMS_KEY, t);
+ for (String shard : rb.shards) {
+ TermStats termStats = getPerShardTermStats(rb.req, t, shard);
+ if (termStats == null || termStats.docFreq == 0) {
+ continue;
+ }
+ TermStats g = globalTermStats.get(t);
+ if (g == null) {
+ g = new TermStats(t);
+ globalTermStats.put(t, g);
+ }
+ g.add(termStats);
+ }
+ }
+ LOG.debug("terms={}, termStats={}", terms, globalTermStats);
+ // need global TermStats here...
+ params.add(TERM_STATS_KEY, StatsUtil.termStatsMapToString(globalTermStats));
+ }
+ }
+
+ protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb, String shard) {
+ Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) rb.req.getContext().get(PER_SHARD_COL_STATS);
+ if (perShardColStats == null) {
+ perShardColStats = Collections.emptyMap();
+ }
+ return perShardColStats.get(shard);
+ }
+
+ protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) {
+ Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
+ if (perShardTermStats == null) {
+ perShardTermStats = Collections.emptyMap();
+ }
+ Map<String,TermStats> cache = perShardTermStats.get(shard);
+ return cache.get(t);
+ }
+
+ @Override
+ public void receiveGlobalStats(SolrQueryRequest req) {
+ String globalTermStats = req.getParams().get(TERM_STATS_KEY);
+ String globalColStats = req.getParams().get(COL_STATS_KEY);
+ if (globalColStats != null) {
+ Map<String,CollectionStats> colStats = StatsUtil
+ .colStatsMapFromString(globalColStats);
+ if (colStats != null) {
+ for (Entry<String,CollectionStats> e : colStats.entrySet()) {
+ addToGlobalColStats(req, e);
+ }
+ }
+ }
+ LOG.debug("Global collection stats={}", globalColStats);
+ if (globalTermStats == null) return;
+ Map<String,TermStats> termStats = StatsUtil
+ .termStatsMapFromString(globalTermStats);
+ if (termStats != null) {
+ for (Entry<String,TermStats> e : termStats.entrySet()) {
+ addToGlobalTermStats(req, e);
+ }
+ }
+ }
+
+ protected void addToGlobalColStats(SolrQueryRequest req,
+ Entry<String,CollectionStats> e) {
+ Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
+ if (currentGlobalColStats == null) {
+ currentGlobalColStats = new HashMap<String,CollectionStats>();
+ req.getContext().put(CURRENT_GLOBAL_COL_STATS, currentGlobalColStats);
+ }
+ currentGlobalColStats.put(e.getKey(), e.getValue());
+ }
+
+ protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
+ Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
+ if (currentGlobalTermStats == null) {
+ currentGlobalTermStats = new HashMap<String,TermStats>();
+ req.getContext().put(CURRENT_GLOBAL_TERM_STATS, currentGlobalTermStats);
+ }
+ currentGlobalTermStats.put(e.getKey(), e.getValue());
+ }
+
+ protected static class ExactStatsSource extends StatsSource {
+ private final Map<String,TermStats> termStatsCache;
+ private final Map<String,CollectionStats> colStatsCache;
+
+ public ExactStatsSource(Map<String,TermStats> termStatsCache,
+ Map<String,CollectionStats> colStatsCache) {
+ this.termStatsCache = termStatsCache;
+ this.colStatsCache = colStatsCache;
+ }
+
+ public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context)
+ throws IOException {
+ TermStats termStats = termStatsCache.get(term.toString());
+ // TermStats == null is also true if term has no docFreq anyway,
+ // see returnLocalStats, if docFreq == 0, they are not added anyway
+ // Not sure we need a warning here
+ if (termStats == null) {
+ LOG.debug("Missing global termStats info for term={}, using local stats", term);
+ return localSearcher.localTermStatistics(term, context);
+ } else {
+ return termStats.toTermStatistics();
+ }
+ }
+
+ @Override
+ public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
+ throws IOException {
+ CollectionStats colStats = colStatsCache.get(field);
+ if (colStats == null) {
+ LOG.debug("Missing global colStats info for field={}, using local", field);
+ return localSearcher.localCollectionStatistics(field);
+ } else {
+ return colStats.toCollectionStatistics();
+ }
+ }
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,169 @@
+package org.apache.solr.search.stats;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.handler.component.ShardResponse;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.FastLRUCache;
+import org.apache.solr.search.SolrCache;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Unlike {@link ExactStatsCache} this implementation preserves term stats
+ * across queries in a set of LRU caches, and based on surface features of a
+ * query it determines the need to send additional RPC-s. As a result the
+ * additional RPC-s are needed much less frequently.
+ *
+ * <p>
+ * Query terms and their stats are maintained in a set of maps. At the query
+ * front-end there will be as many maps as there are shards, each maintaining
+ * the respective shard statistics. At each shard server there is a single map
+ * that is updated with the global statistics on every request.
+ */
+public class LRUStatsCache extends ExactStatsCache {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(LRUStatsCache.class);
+
+ // local stats obtained from shard servers
+ private final Map<String,SolrCache<String,TermStats>> perShardTermStats = new ConcurrentHashMap<>();
+ private final Map<String,Map<String,CollectionStats>> perShardColStats = new ConcurrentHashMap<>();
+
+ // global stats synchronized from the master
+ private final FastLRUCache<String,TermStats> currentGlobalTermStats = new FastLRUCache<>();
+ private final Map<String,CollectionStats> currentGlobalColStats = new ConcurrentHashMap<>();
+
+ // local term context (caching term lookups)
+
+ private final Map lruCacheInitArgs = new HashMap();
+
+ @Override
+ public StatsSource get(SolrQueryRequest req) {
+ LOG.debug("## GET total={}, cache {}", currentGlobalColStats , currentGlobalTermStats.size());
+ return new LRUStatsSource(currentGlobalTermStats, currentGlobalColStats);
+ }
+
+ @Override
+ public void init(PluginInfo info) {
+ // TODO: make this configurable via PluginInfo
+ lruCacheInitArgs.put("size", "100");
+ currentGlobalTermStats.init(lruCacheInitArgs, null, null);
+ }
+
+ @Override
+ protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
+ currentGlobalTermStats.put(e.getKey(), e.getValue());
+ }
+
+ @Override
+ protected void addToPerShardColStats(SolrQueryRequest req, String shard,
+ Map<String,CollectionStats> colStats) {
+ perShardColStats.put(shard, colStats);
+ }
+
+ @Override
+ protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb,
+ String shard) {
+ return perShardColStats.get(shard);
+ }
+
+ @Override
+ protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
+ Map<String,TermStats> termStats = StatsUtil
+ .termStatsMapFromString(termStatsString);
+ if (termStats != null) {
+ SolrCache<String,TermStats> cache = perShardTermStats.get(shard);
+ if (cache == null) { // initialize
+ cache = new FastLRUCache<>();
+ cache.init(lruCacheInitArgs, null, null);
+ perShardTermStats.put(shard, cache);
+ }
+ for (Entry<String,TermStats> e : termStats.entrySet()) {
+ cache.put(e.getKey(), e.getValue());
+ }
+ }
+ }
+
+ @Override
+ protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) {
+ SolrCache<String,TermStats> cache = perShardTermStats.get(shard);
+ return cache.get(t);
+ }
+
+ @Override
+ protected void addToGlobalColStats(SolrQueryRequest req,
+ Entry<String,CollectionStats> e) {
+ currentGlobalColStats.put(e.getKey(), e.getValue());
+ }
+
+ @Override
+ protected void printStats(SolrQueryRequest req) {
+ LOG.debug("## MERGED: perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats);
+ }
+
+ static class LRUStatsSource extends StatsSource {
+ private final SolrCache<String,TermStats> termStatsCache;
+ private final Map<String,CollectionStats> colStatsCache;
+
+ public LRUStatsSource(SolrCache<String,TermStats> termStatsCache,
+ Map<String,CollectionStats> colStatsCache) {
+ this.termStatsCache = termStatsCache;
+ this.colStatsCache = colStatsCache;
+ }
+ @Override
+ public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context)
+ throws IOException {
+ TermStats termStats = termStatsCache.get(term.toString());
+ if (termStats == null) {
+ LOG.debug("## Missing global termStats info: {}, using local", term.toString());
+ return localSearcher.localTermStatistics(term, context);
+ } else {
+ return termStats.toTermStatistics();
+ }
+ }
+
+ @Override
+ public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
+ throws IOException {
+ CollectionStats colStats = colStatsCache.get(field);
+ if (colStats == null) {
+ LOG.debug("## Missing global colStats info: {}, using local", field);
+ return localSearcher.localCollectionStatistics(field);
+ } else {
+ return colStats.toCollectionStatistics();
+ }
+ }
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,78 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.handler.component.ShardResponse;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Default implementation that simply ignores global term statistics, and always
+ * uses local term statistics.
+ */
+public class LocalStatsCache extends StatsCache {
+ private static final Logger LOG = LoggerFactory.getLogger(LocalStatsCache.class);
+
+ @Override
+ public StatsSource get(SolrQueryRequest req) {
+ LOG.debug("## GET {}", req.toString());
+ return new LocalStatsSource();
+ }
+
+ @Override
+ public void init(PluginInfo info) {
+ }
+
+ // by returning null we don't create additional round-trip request.
+ @Override
+ public ShardRequest retrieveStatsRequest(ResponseBuilder rb) {
+ LOG.debug("## RDR {}", rb.req.toString());
+ return null;
+ }
+
+ @Override
+ public void mergeToGlobalStats(SolrQueryRequest req,
+ List<ShardResponse> responses) {
+ LOG.debug("## MTGD {}", req.toString());
+ for (ShardResponse r : responses) {
+ LOG.debug(" - {}", r);
+ }
+ }
+
+ @Override
+ public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
+ LOG.debug("## RLD {}", rb.req.toString());
+ }
+
+ @Override
+ public void receiveGlobalStats(SolrQueryRequest req) {
+ LOG.debug("## RGD {}", req.toString());
+ }
+
+ @Override
+ public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
+ LOG.debug("## SGD {}", outgoing.toString());
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,48 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.solr.search.SolrIndexSearcher;
+
+/**
+ * Convenience class that wraps a local {@link SolrIndexSearcher} to provide
+ * local statistics.
+ */
+public final class LocalStatsSource extends StatsSource {
+
+ public LocalStatsSource() {
+ }
+
+ @Override
+ public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context)
+ throws IOException {
+ return localSearcher.localTermStatistics(term, context);
+ }
+
+ @Override
+ public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
+ throws IOException {
+ return localSearcher.localCollectionStatistics(field);
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsCache.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsCache.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsCache.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,119 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+
+import org.apache.lucene.search.Weight;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.handler.component.ShardResponse;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SolrIndexSearcher.QueryCommand;
+import org.apache.solr.util.plugin.PluginInfoInitialized;
+
+/**
+ * This class represents a cache of global document frequency information for
+ * selected terms. This information is periodically updated from all shards,
+ * either through scheduled events of some kind, or on every request when there
+ * is no global stats available for terms involved in the query (or if this
+ * information is stale due to changes in the shards).
+ * <p>
+ * There are instances of this class at the aggregator node (where the partial
+ * data from shards is aggregated), and on each core involved in a shard request
+ * (where this data is maintained and updated from the central cache).
+ * </p>
+ */
+public abstract class StatsCache implements PluginInfoInitialized {
+ // TODO: decouple use in response from use in request context for these keys
+ /**
+ * Map of terms and {@link TermStats}.
+ */
+ public static final String TERM_STATS_KEY = "org.apache.solr.stats.termStats";
+ /**
+ * Value of {@link CollectionStats}.
+ */
+ public static final String COL_STATS_KEY = "org.apache.solr.stats.colStats";
+ /**
+ * List of terms in the query.
+ */
+ public static final String TERMS_KEY = "org.apache.solr.stats.terms";
+
+ /**
+ * Creates a {@link ShardRequest} to retrieve per-shard stats related to the
+ * current query and the current state of the requester's {@link StatsCache}.
+ *
+ * @param rb contains current request
+ * @return shard request to retrieve stats for terms in the current request,
+ * or null if no additional request is needed (e.g. if the information
+ * in global cache is already sufficient to satisfy this request).
+ */
+ public abstract ShardRequest retrieveStatsRequest(ResponseBuilder rb);
+
+ /**
+ * Prepare a local (from the local shard) response to a "retrieve stats" shard
+ * request.
+ *
+ * @param rb response builder
+ * @param searcher current local searcher
+ */
+ public abstract void returnLocalStats(ResponseBuilder rb,
+ SolrIndexSearcher searcher);
+
+ /**
+ * Process shard responses that contain partial local stats. Usually this
+ * entails combining per-shard stats for each term.
+ *
+ * @param req query request
+ * @param responses responses from shards containing local stats for each shard
+ */
+ public abstract void mergeToGlobalStats(SolrQueryRequest req,
+ List<ShardResponse> responses);
+
+ /**
+ * Receive global stats data from the master and update a local cache of stats
+ * with this global data. This event occurs either as a separate request, or
+ * together with the regular query request, in which case this method is
+ * called first, before preparing a {@link QueryCommand} to be submitted to
+ * the local {@link SolrIndexSearcher}.
+ *
+ * @param req query request with global stats data
+ */
+ public abstract void receiveGlobalStats(SolrQueryRequest req);
+
+ /**
+ * Prepare global stats data to be sent out to shards in this request.
+ *
+ * @param rb response builder
+ * @param outgoing shard request to be sent
+ */
+ public abstract void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing);
+
+ /**
+ * Prepare local {@link StatsSource} to provide stats information to perform
+ * local scoring (to be precise, to build a local {@link Weight} from the
+ * query).
+ *
+ * @param req query request
+ * @return an instance of {@link StatsSource} to use in creating a query
+ * {@link Weight}
+ */
+ public abstract StatsSource get(SolrQueryRequest req);
+
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,43 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.Weight;
+import org.apache.solr.search.SolrIndexSearcher;
+
+/**
+ * The purpose of this class is only to provide two pieces of information
+ * necessary to create {@link Weight} from a {@link Query}, that is
+ * {@link TermStatistics} for a term and {@link CollectionStatistics} for the
+ * whole collection.
+ */
+public abstract class StatsSource {
+
+ public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermContext context)
+ throws IOException;
+
+ public abstract CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
+ throws IOException;
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsUtil.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsUtil.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/StatsUtil.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,223 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.util.Base64;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Various utilities for de/serialization of term stats and collection stats.
+ */
+public class StatsUtil {
+
+ private static final Logger LOG = LoggerFactory.getLogger(StatsSource.class);
+
+ /**
+ * Make a String representation of {@link CollectionStats}
+ */
+ public static String colStatsToString(CollectionStats colStats) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(colStats.field);
+ sb.append(',');
+ sb.append(String.valueOf(colStats.maxDoc));
+ sb.append(',');
+ sb.append(String.valueOf(colStats.docCount));
+ sb.append(',');
+ sb.append(String.valueOf(colStats.sumTotalTermFreq));
+ sb.append(',');
+ sb.append(String.valueOf(colStats.sumDocFreq));
+ return sb.toString();
+ }
+
+ private static CollectionStats colStatsFromString(String data) {
+ if (data == null || data.trim().length() == 0) {
+ LOG.warn("Invalid empty collection stats string");
+ return null;
+ }
+ String[] vals = data.split(",");
+ if (vals.length != 5) {
+ LOG.warn("Invalid collection stats string, num fields " + vals.length
+ + " != 5, '" + data + "'");
+ return null;
+ }
+ String field = vals[0];
+ try {
+ long maxDoc = Long.parseLong(vals[1]);
+ long docCount = Long.parseLong(vals[2]);
+ long sumTotalTermFreq = Long.parseLong(vals[3]);
+ long sumDocFreq = Long.parseLong(vals[4]);
+ return new CollectionStats(field, maxDoc, docCount, sumTotalTermFreq,
+ sumDocFreq);
+ } catch (Exception e) {
+ LOG.warn("Invalid collection stats string '" + data + "': "
+ + e.toString());
+ return null;
+ }
+ }
+
+ public static String termToString(Term t) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(t.field()).append(':');
+ BytesRef bytes = t.bytes();
+ sb.append(Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.offset));
+ return sb.toString();
+ }
+
+ private static Term termFromString(String data) {
+ if (data == null || data.trim().length() == 0) {
+ LOG.warn("Invalid empty term value");
+ return null;
+ }
+ int idx = data.indexOf(':');
+ if (idx == -1) {
+ LOG.warn("Invalid term data without ':': '" + data + "'");
+ return null;
+ }
+ String field = data.substring(0, idx);
+ String value = data.substring(idx + 1);
+ try {
+ return new Term(field, value);
+ // XXX this would be more correct
+ // byte[] bytes = Base64.base64ToByteArray(value);
+ // return new Term(field, new BytesRef(bytes));
+ } catch (Exception e) {
+ LOG.warn("Invalid term value '" + value + "'");
+ return null;
+ }
+ }
+
+ public static String termStatsToString(TermStats termStats,
+ boolean includeTerm) {
+ StringBuilder sb = new StringBuilder();
+ if (includeTerm) {
+ sb.append(termStats.term).append(',');
+ }
+ sb.append(String.valueOf(termStats.docFreq));
+ sb.append(',');
+ sb.append(String.valueOf(termStats.totalTermFreq));
+ return sb.toString();
+ }
+
+ private static TermStats termStatsFromString(String data, Term t) {
+ if (data == null || data.trim().length() == 0) {
+ LOG.warn("Invalid empty term stats string");
+ return null;
+ }
+ String[] vals = data.split(",");
+ if (vals.length < 2) {
+ LOG.warn("Invalid term stats string, num fields " + vals.length
+ + " < 2, '" + data + "'");
+ return null;
+ }
+ Term termToUse;
+ int idx = 0;
+ if (vals.length == 3) {
+ idx++;
+ // with term
+ Term term = termFromString(vals[0]);
+ if (term != null) {
+ termToUse = term;
+ if (t != null) {
+ assert term.equals(t);
+ }
+ } else { // failed term decoding
+ termToUse = t;
+ }
+ } else {
+ termToUse = t;
+ }
+ if (termToUse == null) {
+ LOG.warn("Missing term in termStats '" + data + "'");
+ return null;
+ }
+ try {
+ long docFreq = Long.parseLong(vals[idx++]);
+ long totalTermFreq = Long.parseLong(vals[idx]);
+ return new TermStats(termToUse.toString(), docFreq, totalTermFreq);
+ } catch (Exception e) {
+ LOG.warn("Invalid termStats string '" + data + "'");
+ return null;
+ }
+ }
+
+ public static Map<String,CollectionStats> colStatsMapFromString(String data) {
+ if (data == null || data.trim().length() == 0) {
+ return null;
+ }
+ Map<String,CollectionStats> map = new HashMap<String,CollectionStats>();
+ String[] entries = data.split("!");
+ for (String es : entries) {
+ CollectionStats stats = colStatsFromString(es);
+ if (stats != null) {
+ map.put(stats.field, stats);
+ }
+ }
+ return map;
+ }
+
+ public static String colStatsMapToString(Map<String,CollectionStats> stats) {
+ if (stats == null || stats.isEmpty()) {
+ return "";
+ }
+ StringBuilder sb = new StringBuilder();
+ for (Entry<String,CollectionStats> e : stats.entrySet()) {
+ if (sb.length() > 0) {
+ sb.append('!');
+ }
+ sb.append(colStatsToString(e.getValue()));
+ }
+ return sb.toString();
+ }
+
+ public static Map<String,TermStats> termStatsMapFromString(String data) {
+ if (data == null || data.trim().length() == 0) {
+ return null;
+ }
+ Map<String,TermStats> map = new HashMap<String,TermStats>();
+ String[] entries = data.split("!");
+ for (String es : entries) {
+ TermStats termStats = termStatsFromString(es, null);
+ if (termStats != null) {
+ map.put(termStats.term, termStats);
+ }
+ }
+ return map;
+ }
+
+ public static String termStatsMapToString(Map<String,TermStats> stats) {
+ if (stats == null || stats.isEmpty()) {
+ return "";
+ }
+ StringBuilder sb = new StringBuilder();
+ for (Entry<String,TermStats> e : stats.entrySet()) {
+ if (sb.length() > 0) {
+ sb.append('!');
+ }
+ sb.append(termStatsToString(e.getValue(), true));
+ }
+ return sb.toString();
+ }
+
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/TermStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/TermStats.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/TermStats.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/TermStats.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,82 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.TermStatistics;
+
+/**
+ * Modifiable version of {@link TermStatistics} useful for aggregation of
+ * per-shard stats.
+ */
+public class TermStats {
+ final public String term;
+ public long docFreq;
+ public long totalTermFreq;
+ private final Term t;
+
+ public TermStats(String term) {
+ this.term = term;
+ t = makeTerm(term);
+ }
+
+ private Term makeTerm(String s) {
+ int idx = s.indexOf(':');
+ if (idx == -1) {
+ return null;
+ }
+ return new Term(s.substring(0, idx), s.substring(idx + 1));
+ }
+
+ public TermStats(String term, long docFreq, long totalTermFreq) {
+ this(term);
+ this.docFreq = docFreq;
+ this.totalTermFreq = totalTermFreq;
+ }
+
+ public TermStats(String field, TermStatistics stats) {
+ this.term = field + ":" + stats.term().utf8ToString();
+ this.t = new Term(field, stats.term());
+ this.docFreq = stats.docFreq();
+ this.totalTermFreq = stats.totalTermFreq();
+ }
+
+ /*
+ * If any of the stats is -1 then reset total stats to -1.
+ */
+ public void add(TermStats stats) {
+ if (this.docFreq < 0 || stats.docFreq < 0) {
+ this.docFreq = -1;
+ } else {
+ this.docFreq += stats.docFreq;
+ }
+ if (this.totalTermFreq < 0 || stats.totalTermFreq < 0) {
+ this.totalTermFreq = -1;
+ } else {
+ this.totalTermFreq += stats.totalTermFreq;
+ }
+ }
+
+ public TermStatistics toTermStatistics() {
+ return new TermStatistics(t.bytes(), docFreq, totalTermFreq);
+ }
+
+ public String toString() {
+ return StatsUtil.termStatsToString(this, true);
+ }
+}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/package.html?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/package.html (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/stats/package.html Mon Dec 22 07:05:34 2014
@@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<html>
+<head lang="en">
+ <meta charset="UTF-8">
+ <title></title>
+</head>
+<body>
+<p>
+ APIs and Classes implementing the Stats component used for document frequency
+ calculations.
+</p>
+</body>
+</html>
\ No newline at end of file
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java Mon Dec 22 07:05:34 2014
@@ -97,7 +97,7 @@ public class SolrPluginUtils {
static {
Map<Integer, String> map = new TreeMap<>();
map.put(ShardRequest.PURPOSE_PRIVATE, "PRIVATE");
- map.put(ShardRequest.PURPOSE_GET_TERM_DFS, "GET_TERM_DFS");
+ map.put(ShardRequest.PURPOSE_GET_TERM_STATS, "GET_TERM_STATS");
map.put(ShardRequest.PURPOSE_GET_TOP_IDS, "GET_TOP_IDS");
map.put(ShardRequest.PURPOSE_REFINE_TOP_IDS, "REFINE_TOP_IDS");
map.put(ShardRequest.PURPOSE_GET_FACETS, "GET_FACETS");
@@ -109,6 +109,7 @@ public class SolrPluginUtils {
map.put(ShardRequest.PURPOSE_GET_TERMS, "GET_TERMS");
map.put(ShardRequest.PURPOSE_GET_TOP_GROUPS, "GET_TOP_GROUPS");
map.put(ShardRequest.PURPOSE_GET_MLT_RESULTS, "GET_MLT_RESULTS");
+ map.put(ShardRequest.PURPOSE_SET_TERM_STATS, "SET_TERM_STATS");
purposes = Collections.unmodifiableMap(map);
}
Modified: lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml?rev=1647253&r1=1647252&r2=1647253&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml Mon Dec 22 07:05:34 2014
@@ -52,6 +52,8 @@
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <statsCache class="${solr.statsCache:}"/>
+
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<updateHandler class="solr.DirectUpdateHandler2">
Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestBaseStatsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestBaseStatsCache.java?rev=1647253&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestBaseStatsCache.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/stats/TestBaseStatsCache.java Mon Dec 22 07:05:34 2014
@@ -0,0 +1,69 @@
+package org.apache.solr.search.stats;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.junit.Ignore;
+
+import java.util.Iterator;
+
+@Ignore("Abstract calls should not executed as test")
+public abstract class TestBaseStatsCache extends TestDefaultStatsCache {
+
+ protected abstract String getStatsCacheClassName();
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ System.setProperty("solr.statsCache", getStatsCacheClassName());
+ }
+
+ public void tearDown() throws Exception {
+ super.tearDown();
+ System.clearProperty("solr.statsCache");
+ }
+
+ // in this case, as the number of shards increases, per-shard scores should
+ // remain identical
+ @Override
+ protected void checkResponse(QueryResponse controlRsp, QueryResponse shardRsp) {
+ System.out.println("======================= Control Response =======================");
+ System.out.println(controlRsp);
+ System.out.println("");
+ System.out.println("");
+ System.out.println("======================= Shard Response =======================");
+ System.out.println("");
+ System.out.println(shardRsp);
+ SolrDocumentList shardList = shardRsp.getResults();
+ SolrDocumentList controlList = controlRsp.getResults();
+
+ assertEquals(controlList.size(), shardList.size());
+
+ assertEquals(controlList.getNumFound(), shardList.getNumFound());
+ Iterator<SolrDocument> it = controlList.iterator();
+ Iterator<SolrDocument> it2 = shardList.iterator();
+ while (it.hasNext()) {
+ SolrDocument controlDoc = it.next();
+ SolrDocument shardDoc = it2.next();
+ assertEquals(controlDoc.getFieldValue("score"), shardDoc.getFieldValue("score"));
+ }
+ }
+
+}