You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2008/12/07 22:11:56 UTC
svn commit: r724203 - in /lucene/solr/trunk: example/solr/conf/
src/java/org/apache/solr/core/ src/java/org/apache/solr/request/
src/java/org/apache/solr/search/
Author: yonik
Date: Sun Dec 7 13:11:55 2008
New Revision: 724203
URL: http://svn.apache.org/viewvc?rev=724203&view=rev
Log:
SOLR-475: close termenums, use fieldValueCache
Modified:
lucene/solr/trunk/example/solr/conf/solrconfig.xml
lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java
lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java
lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java
lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java
Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Sun Dec 7 13:11:55 2008
@@ -235,6 +235,17 @@
initialSize="512"
autowarmCount="128"/>
+ <!-- Cache used to hold field values that are quickly accessible
+ by document id. The fieldValueCache is created by default
+ even if not configured here.
+ <fieldValueCache
+ class="solr.FastLRUCache"
+ size="512"
+ autowarmCount="128"
+ showItems="32"
+ />
+ -->
+
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
Modified: lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java Sun Dec 7 13:11:55 2008
@@ -23,6 +23,7 @@
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.CacheConfig;
+import org.apache.solr.search.FastLRUCache;
import org.apache.solr.update.SolrIndexConfig;
import org.apache.lucene.search.BooleanQuery;
@@ -31,9 +32,7 @@
import javax.xml.parsers.ParserConfigurationException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.StringTokenizer;
+import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.IOException;
@@ -127,7 +126,19 @@
filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache");
queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache");
documentCacheConfig = CacheConfig.getConfig(this, "query/documentCache");
- userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
+ CacheConfig conf = CacheConfig.getConfig(this, "query/fieldValueCache");
+ if (conf == null) {
+ Map<String,String> args = new HashMap<String,String>();
+ args.put("name","fieldValueCache");
+ args.put("size","10000");
+ args.put("initialSize","10");
+ args.put("showItems","-1");
+ conf = new CacheConfig(FastLRUCache.class, args, null);
+ }
+ fieldValueCacheConfig = conf;
+
+ userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
+
org.apache.solr.search.SolrIndexSearcher.initRegenerators(this);
hashSetInverseLoadFactor = 1.0f / getFloat("//HashDocSet/@loadFactor",0.75f);
@@ -161,6 +172,7 @@
public final CacheConfig filterCacheConfig ;
public final CacheConfig queryResultCacheConfig;
public final CacheConfig documentCacheConfig;
+ public final CacheConfig fieldValueCacheConfig;
public final CacheConfig[] userCacheConfigs;
// SolrIndexSearcher - more...
public final boolean useFilterForSortedQuery;
Modified: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java Sun Dec 7 13:11:55 2008
@@ -28,10 +28,7 @@
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.schema.FieldType;
-import org.apache.solr.search.BitDocSet;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocSet;
-import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.*;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.lucene.util.OpenBitSet;
@@ -42,6 +39,7 @@
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.WeakHashMap;
+import java.util.concurrent.atomic.AtomicLong;
/**
*
@@ -73,7 +71,7 @@
* much like Lucene's own internal term index).
*
*/
-class UnInvertedField {
+public class UnInvertedField {
private static int TNUM_OFFSET=2;
static class TopTerm {
@@ -92,6 +90,10 @@
int termsInverted; // number of unique terms that were un-inverted
long termInstances; // total number of references to term numbers
final TermIndex ti;
+ long memsz;
+ int total_time; // total time to uninvert the field
+ int phase1_time; // time for phase1 of the uninvert process
+ final AtomicLong use = new AtomicLong(); // number of uses
int[] index;
byte[][] tnums = new byte[256][];
@@ -100,7 +102,9 @@
public long memSize() {
- long sz = 6*8 + 12; // local fields
+ // can cache the mem size since it shouldn't change
+ if (memsz!=0) return memsz;
+ long sz = 8*8 + 32; // local fields
sz += bigTerms.size() * 64;
for (TopTerm tt : bigTerms.values()) {
sz += tt.memSize();
@@ -113,6 +117,7 @@
if (maxTermCounts != null)
sz += maxTermCounts.length * 4;
sz += ti.memSize();
+ memsz = sz;
return sz;
}
@@ -396,6 +401,9 @@
byte[] newtarget = new byte[pos];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
+ if (target.length > (1<<24)*.9) {
+ SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length);
+ }
}
tnums[pass] = target;
@@ -407,17 +415,18 @@
long endTime = System.currentTimeMillis();
- SolrCore.log.info("UnInverted multi-valued field " + field + ", memSize=" + memSize()
- + ", time="+(endTime-startTime)+", phase1="+(midPoint-startTime)
- + ", nTerms=" + numTermsInField + ", bigTerms=" + bigTerms.size()
- + ", termInstances=" + termInstances
- );
+ total_time = (int)(endTime-startTime);
+ phase1_time = (int)(midPoint-startTime);
+
+ SolrCore.log.info("UnInverted multi-valued field " + toString());
}
public NamedList getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
+ use.incrementAndGet();
+
FieldType ft = searcher.getSchema().getFieldType(field);
NamedList res = new NamedList(); // order is important
@@ -613,63 +622,43 @@
return te.term().text();
}
+ public String toString() {
+ return "{field=" + field
+ + ",memSize="+memSize()
+ + ",tindexSize="+ti.memSize()
+ + ",time="+total_time
+ + ",phase1="+phase1_time
+ + ",nTerms="+numTermsInField
+ + ",bigTerms="+bigTerms.size()
+ + ",termInstances="+termInstances
+ + ",uses="+use.get()
+ + "}";
+ }
+
//////////////////////////////////////////////////////////////////
//////////////////////////// caching /////////////////////////////
//////////////////////////////////////////////////////////////////
- static final class CreationPlaceholder {
- Object value;
- }
-
public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
- return (UnInvertedField)multiValuedFieldCache.get(searcher, field);
- }
+ SolrCache cache = searcher.getFieldValueCache();
+ if (cache == null) {
+ return new UnInvertedField(field, searcher);
+ }
- static Cache multiValuedFieldCache = new Cache() {
- protected Object createValue(SolrIndexSearcher searcher, Object key) throws IOException {
- return new UnInvertedField((String)key, searcher);
- }
- };
-
- /** Internal cache. (from lucene FieldCache) */
- abstract static class Cache {
- private final Map readerCache = new WeakHashMap();
-
- protected abstract Object createValue(SolrIndexSearcher searcher, Object key) throws IOException;
-
- public Object get(SolrIndexSearcher searcher, Object key) throws IOException {
- Map innerCache;
- Object value;
- synchronized (readerCache) {
- innerCache = (Map) readerCache.get(searcher);
- if (innerCache == null) {
- innerCache = new HashMap();
- readerCache.put(searcher, innerCache);
- value = null;
- } else {
- value = innerCache.get(key);
- }
- if (value == null) {
- value = new CreationPlaceholder();
- innerCache.put(key, value);
+ UnInvertedField uif = (UnInvertedField)cache.get(field);
+ if (uif == null) {
+ synchronized (cache) {
+ uif = (UnInvertedField)cache.get(field);
+ if (uif == null) {
+ uif = new UnInvertedField(field, searcher);
+ cache.put(field, uif);
}
}
- if (value instanceof CreationPlaceholder) {
- synchronized (value) {
- CreationPlaceholder progress = (CreationPlaceholder) value;
- if (progress.value == null) {
- progress.value = createValue(searcher, key);
- synchronized (readerCache) {
- innerCache.put(key, progress.value);
- }
- }
- return progress.value;
- }
- }
-
- return value;
}
+
+ return uif;
}
+
}
@@ -743,7 +732,7 @@
}
public void close() throws IOException {
- tenum.close();
+ if (tenum!=null) tenum.close();
}
public boolean skipTo(String target) throws IOException {
@@ -758,6 +747,7 @@
if (startIdx >= 0) {
// we hit the term exactly... lucky us!
+ if (tenum != null) tenum.close();
tenum = reader.terms(target);
pos = startIdx << tindex.intervalBits;
return setTerm();
@@ -768,6 +758,7 @@
if (startIdx == 0) {
// our target occurs *before* the first term
+ if (tenum != null) tenum.close();
tenum = reader.terms(target);
pos = 0;
return setTerm();
@@ -781,6 +772,7 @@
// so we don't need to seek.
} else {
// seek to the right block
+ if (tenum != null) tenum.close();
tenum = reader.terms(target.createTerm(tindex.index[startIdx]));
pos = startIdx << tindex.intervalBits;
setTerm(); // should be true since it's in the index
@@ -802,6 +794,7 @@
String base = tindex.index[idx];
pos = idx << tindex.intervalBits;
delta = termNumber - pos;
+ if (tenum != null) tenum.close();
tenum = reader.terms(tindex.createTerm(base));
}
while (--delta >= 0) {
@@ -895,7 +888,7 @@
/**
- * Returns the approximate amount of memory taken by this DocSet.
+ * Returns the approximate amount of memory taken by this TermIndex.
* This is only an approximation and doesn't take into account java object overhead.
*
* @return
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java Sun Dec 7 13:11:55 2008
@@ -39,15 +39,24 @@
*/
public class CacheConfig {
private String nodeName;
+
+ private Class clazz;
private Map<String,String> args;
+ private CacheRegenerator regenerator;
private String cacheImpl;
- private Class clazz;
private Object[] persistence = new Object[1];
private String regenImpl;
- private CacheRegenerator regenerator;
+
+ public CacheConfig() {}
+
+ public CacheConfig(Class clazz, Map<String,String> args, CacheRegenerator regenerator) {
+ this.clazz = clazz;
+ this.args = args;
+ this.regenerator = regenerator;
+ }
public CacheRegenerator getRegenerator() {
return regenerator;
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java Sun Dec 7 13:11:55 2008
@@ -11,6 +11,7 @@
import java.net.URL;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
/**
@@ -38,6 +39,7 @@
private CacheRegenerator regenerator;
private String description = "Concurrent LRU Cache";
private ConcurrentLRUCache cache;
+ private int showItems = 0;
public Object init(Map args, Object persistence, CacheRegenerator regenerator) {
state = State.CREATED;
@@ -71,9 +73,13 @@
autowarmCount = str == null ? 0 : Integer.parseInt(str);
str = (String) args.get("cleanupThread");
boolean newThread = str == null ? false : Boolean.parseBoolean(str);
-
+
+ str = (String) args.get("showItems");
+ showItems = str == null ? 0 : Integer.parseInt(str);
+
+
description = "Concurrent LRU Cache(maxSize=" + limit + ", initialSize=" + initialSize +
- ", minSize="+minLimit + ", acceptableSize="+acceptableLimit+" ,cleanupThread ="+newThread;
+ ", minSize="+minLimit + ", acceptableSize="+acceptableLimit+", cleanupThread ="+newThread;
if (autowarmCount > 0) {
description += ", autowarmCount=" + autowarmCount
+ ", regenerator=" + regenerator;
@@ -234,6 +240,19 @@
lst.add("cumulative_inserts", cinserts);
lst.add("cumulative_evictions", cevictions);
+ if (showItems != 0) {
+ Map items = cache.getLatestAccessedItems( showItems == -1 ? Integer.MAX_VALUE : showItems );
+ for (Map.Entry e : (Set <Map.Entry>)items.entrySet()) {
+ Object k = e.getKey();
+ Object v = e.getValue();
+
+ String ks = "item_" + k;
+ String vs = v.toString();
+ lst.add(ks,vs);
+ }
+
+ }
+
return lst;
}
@@ -242,3 +261,5 @@
}
}
+
+
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java Sun Dec 7 13:11:55 2008
@@ -31,6 +31,7 @@
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.request.UnInvertedField;
import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
@@ -77,6 +78,7 @@
private final SolrCache filterCache;
private final SolrCache queryResultCache;
private final SolrCache documentCache;
+ private final SolrCache fieldValueCache;
private final LuceneQueryOptimizer optimizer;
@@ -140,6 +142,8 @@
cachingEnabled=enableCache;
if (cachingEnabled) {
ArrayList<SolrCache> clist = new ArrayList<SolrCache>();
+ fieldValueCache = solrConfig.fieldValueCacheConfig==null ? null : solrConfig.fieldValueCacheConfig.newInstance();
+ if (fieldValueCache!=null) clist.add(fieldValueCache);
filterCache= solrConfig.filterCacheConfig==null ? null : solrConfig.filterCacheConfig.newInstance();
if (filterCache!=null) clist.add(filterCache);
queryResultCache = solrConfig.queryResultCacheConfig==null ? null : solrConfig.queryResultCacheConfig.newInstance();
@@ -166,6 +170,7 @@
filterCache=null;
queryResultCache=null;
documentCache=null;
+ fieldValueCache=null;
cacheMap = noGenericCaches;
cacheList= noCaches;
}
@@ -232,6 +237,19 @@
// Set default regenerators on filter and query caches if they don't have any
//
public static void initRegenerators(SolrConfig solrConfig) {
+ if (solrConfig.fieldValueCacheConfig != null && solrConfig.fieldValueCacheConfig.getRegenerator() == null) {
+ solrConfig.fieldValueCacheConfig.setRegenerator(
+ new CacheRegenerator() {
+ public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
+ if (oldVal instanceof UnInvertedField) {
+ UnInvertedField.getUnInvertedField((String)oldKey, newSearcher);
+ }
+ return true;
+ }
+ }
+ );
+ }
+
if (solrConfig.filterCacheConfig != null && solrConfig.filterCacheConfig.getRegenerator() == null) {
solrConfig.filterCacheConfig.setRegenerator(
new CacheRegenerator() {
@@ -452,6 +470,12 @@
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
+ /** expert: internal API, subject to change */
+ public SolrCache getFieldValueCache() {
+ return fieldValueCache;
+ }
+
+
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.