You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2008/12/07 22:11:56 UTC

svn commit: r724203 - in /lucene/solr/trunk: example/solr/conf/ src/java/org/apache/solr/core/ src/java/org/apache/solr/request/ src/java/org/apache/solr/search/

Author: yonik
Date: Sun Dec  7 13:11:55 2008
New Revision: 724203

URL: http://svn.apache.org/viewvc?rev=724203&view=rev
Log:
SOLR-475: close termenums, use fieldValueCache

Modified:
    lucene/solr/trunk/example/solr/conf/solrconfig.xml
    lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java
    lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java
    lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java
    lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java

Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Sun Dec  7 13:11:55 2008
@@ -235,6 +235,17 @@
       initialSize="512"
       autowarmCount="128"/>
 
+    <!-- Cache used to hold field values that are quickly accessible
+         by document id.  The fieldValueCache is created by default
+         even if not configured here.
+      <fieldValueCache
+        class="solr.FastLRUCache"
+        size="512"
+        autowarmCount="128"
+        showItems="32"
+      />
+    -->
+
    <!-- queryResultCache caches results of searches - ordered lists of
          document ids (DocList) based on a query, a sort, and the range
          of documents requested.  -->

Modified: lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/core/SolrConfig.java Sun Dec  7 13:11:55 2008
@@ -23,6 +23,7 @@
 import org.apache.solr.request.SolrQueryRequest;
 
 import org.apache.solr.search.CacheConfig;
+import org.apache.solr.search.FastLRUCache;
 import org.apache.solr.update.SolrIndexConfig;
 import org.apache.lucene.search.BooleanQuery;
 
@@ -31,9 +32,7 @@
 
 import javax.xml.parsers.ParserConfigurationException;
 
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.StringTokenizer;
+import java.util.*;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 import java.io.IOException;
@@ -127,7 +126,19 @@
     filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache");
     queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache");
     documentCacheConfig = CacheConfig.getConfig(this, "query/documentCache");
-    userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");
+    CacheConfig conf = CacheConfig.getConfig(this, "query/fieldValueCache");
+    if (conf == null) {
+      Map<String,String> args = new HashMap<String,String>();
+      args.put("name","fieldValueCache");
+      args.put("size","10000");
+      args.put("initialSize","10");
+      args.put("showItems","-1");
+      conf = new CacheConfig(FastLRUCache.class, args, null);
+    }
+    fieldValueCacheConfig = conf;
+
+    userCacheConfigs = CacheConfig.getMultipleConfigs(this, "query/cache");     
+
     org.apache.solr.search.SolrIndexSearcher.initRegenerators(this);
 
     hashSetInverseLoadFactor = 1.0f / getFloat("//HashDocSet/@loadFactor",0.75f);
@@ -161,6 +172,7 @@
   public final CacheConfig filterCacheConfig ;
   public final CacheConfig queryResultCacheConfig;
   public final CacheConfig documentCacheConfig;
+  public final CacheConfig fieldValueCacheConfig;
   public final CacheConfig[] userCacheConfigs;
   // SolrIndexSearcher - more...
   public final boolean useFilterForSortedQuery;

Modified: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java Sun Dec  7 13:11:55 2008
@@ -28,10 +28,7 @@
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SimpleFacets;
 import org.apache.solr.schema.FieldType;
-import org.apache.solr.search.BitDocSet;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocSet;
-import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.*;
 import org.apache.solr.util.BoundedTreeSet;
 import org.apache.lucene.util.OpenBitSet;
 
@@ -42,6 +39,7 @@
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.WeakHashMap;
+import java.util.concurrent.atomic.AtomicLong;
 
 /**
  *
@@ -73,7 +71,7 @@
  *   much like Lucene's own internal term index).
  *
  */
-class UnInvertedField {
+public class UnInvertedField {
   private static int TNUM_OFFSET=2;
 
   static class TopTerm {
@@ -92,6 +90,10 @@
   int termsInverted;  // number of unique terms that were un-inverted
   long termInstances; // total number of references to term numbers
   final TermIndex ti;
+  long memsz;
+  int total_time;  // total time to uninvert the field
+  int phase1_time;  // time for phase1 of the uninvert process
+  final AtomicLong use = new AtomicLong(); // number of uses
 
   int[] index;
   byte[][] tnums = new byte[256][];
@@ -100,7 +102,9 @@
 
 
   public long memSize() {
-    long sz = 6*8 + 12; // local fields
+    // can cache the mem size since it shouldn't change
+    if (memsz!=0) return memsz;
+    long sz = 8*8 + 32; // local fields
     sz += bigTerms.size() * 64;
     for (TopTerm tt : bigTerms.values()) {
       sz += tt.memSize();
@@ -113,6 +117,7 @@
     if (maxTermCounts != null)
       sz += maxTermCounts.length * 4;
     sz += ti.memSize();
+    memsz = sz;
     return sz;
   }
 
@@ -396,6 +401,9 @@
           byte[] newtarget = new byte[pos];
           System.arraycopy(target, 0, newtarget, 0, pos);
           target = newtarget;
+          if (target.length > (1<<24)*.9) {
+            SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length);
+          }
         }
         
         tnums[pass] = target;
@@ -407,17 +415,18 @@
 
     long endTime = System.currentTimeMillis();
 
-    SolrCore.log.info("UnInverted multi-valued field " + field + ", memSize=" + memSize()
-            + ", time="+(endTime-startTime)+", phase1="+(midPoint-startTime)
-            + ", nTerms=" + numTermsInField + ", bigTerms=" + bigTerms.size()
-            + ", termInstances=" + termInstances
-            );
+    total_time = (int)(endTime-startTime);
+    phase1_time = (int)(midPoint-startTime);
+
+    SolrCore.log.info("UnInverted multi-valued field " + toString());
   }
 
 
 
 
   public NamedList getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
+    use.incrementAndGet();
+
     FieldType ft = searcher.getSchema().getFieldType(field);
 
     NamedList res = new NamedList();  // order is important
@@ -613,63 +622,43 @@
     return te.term().text();
   }
 
+  public String toString() {
+    return "{field=" + field
+            + ",memSize="+memSize()
+            + ",tindexSize="+ti.memSize()
+            + ",time="+total_time
+            + ",phase1="+phase1_time
+            + ",nTerms="+numTermsInField
+            + ",bigTerms="+bigTerms.size()
+            + ",termInstances="+termInstances
+            + ",uses="+use.get()
+            + "}";
+  }
+
 
   //////////////////////////////////////////////////////////////////
   //////////////////////////// caching /////////////////////////////
   //////////////////////////////////////////////////////////////////
-  static final class CreationPlaceholder {
-    Object value;
-  }
-
   public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
-    return (UnInvertedField)multiValuedFieldCache.get(searcher, field);
-  }
+    SolrCache cache = searcher.getFieldValueCache();
+    if (cache == null) {
+      return new UnInvertedField(field, searcher);
+    }
 
-  static Cache multiValuedFieldCache = new Cache() {
-    protected Object createValue(SolrIndexSearcher searcher, Object key) throws IOException {
-      return new UnInvertedField((String)key, searcher);
-    }
-  };
-
-    /** Internal cache. (from lucene FieldCache) */
-  abstract static class Cache {
-    private final Map readerCache = new WeakHashMap();
-
-    protected abstract Object createValue(SolrIndexSearcher searcher, Object key) throws IOException;
-
-    public Object get(SolrIndexSearcher searcher, Object key) throws IOException {
-      Map innerCache;
-      Object value;
-      synchronized (readerCache) {
-        innerCache = (Map) readerCache.get(searcher);
-        if (innerCache == null) {
-          innerCache = new HashMap();
-          readerCache.put(searcher, innerCache);
-          value = null;
-        } else {
-          value = innerCache.get(key);
-        }
-        if (value == null) {
-          value = new CreationPlaceholder();
-          innerCache.put(key, value);
+    UnInvertedField uif = (UnInvertedField)cache.get(field);
+    if (uif == null) {
+      synchronized (cache) {
+        uif = (UnInvertedField)cache.get(field);
+        if (uif == null) {
+          uif = new UnInvertedField(field, searcher);
+          cache.put(field, uif);
         }
       }
-      if (value instanceof CreationPlaceholder) {
-        synchronized (value) {
-          CreationPlaceholder progress = (CreationPlaceholder) value;
-          if (progress.value == null) {
-            progress.value = createValue(searcher, key);
-            synchronized (readerCache) {
-              innerCache.put(key, progress.value);
-            }
-          }
-          return progress.value;
-        }
-      }
-
-      return value;
     }
+
+    return uif;
   }
+
 }
 
 
@@ -743,7 +732,7 @@
   }
 
   public void close() throws IOException {
-    tenum.close();
+    if (tenum!=null) tenum.close();
   }
 
   public boolean skipTo(String target) throws IOException {
@@ -758,6 +747,7 @@
 
     if (startIdx >= 0) {
       // we hit the term exactly... lucky us!
+      if (tenum != null) tenum.close();
       tenum = reader.terms(target);
       pos = startIdx << tindex.intervalBits;
       return setTerm();
@@ -768,6 +758,7 @@
 
     if (startIdx == 0) {
       // our target occurs *before* the first term
+      if (tenum != null) tenum.close();
       tenum = reader.terms(target);
       pos = 0;
       return setTerm();
@@ -781,6 +772,7 @@
       // so we don't need to seek.
     } else {
       // seek to the right block
+      if (tenum != null) tenum.close();            
       tenum = reader.terms(target.createTerm(tindex.index[startIdx]));
       pos = startIdx << tindex.intervalBits;
       setTerm();  // should be true since it's in the index
@@ -802,6 +794,7 @@
       String base = tindex.index[idx];
       pos = idx << tindex.intervalBits;
       delta = termNumber - pos;
+      if (tenum != null) tenum.close();
       tenum = reader.terms(tindex.createTerm(base));
     }
     while (--delta >= 0) {
@@ -895,7 +888,7 @@
 
 
   /**
-   * Returns the approximate amount of memory taken by this DocSet.
+   * Returns the approximate amount of memory taken by this TermIndex.
    * This is only an approximation and doesn't take into account java object overhead.
    *
    * @return

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/CacheConfig.java Sun Dec  7 13:11:55 2008
@@ -39,15 +39,24 @@
  */
 public class CacheConfig {
   private String nodeName;
+
+  private Class clazz;
   private Map<String,String> args;
+  private CacheRegenerator regenerator;
 
   private String cacheImpl;
-  private Class clazz;
 
   private Object[] persistence = new Object[1];
 
   private String regenImpl;
-  private CacheRegenerator regenerator;
+
+  public CacheConfig() {}
+
+  public CacheConfig(Class clazz, Map<String,String> args, CacheRegenerator regenerator) {
+    this.clazz = clazz;
+    this.args = args;
+    this.regenerator = regenerator;
+  }
 
   public CacheRegenerator getRegenerator() {
     return regenerator;

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/FastLRUCache.java Sun Dec  7 13:11:55 2008
@@ -11,6 +11,7 @@
 import java.net.URL;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.CopyOnWriteArrayList;
 
 /**
@@ -38,6 +39,7 @@
   private CacheRegenerator regenerator;
   private String description = "Concurrent LRU Cache";
   private ConcurrentLRUCache cache;
+  private int showItems = 0;
 
   public Object init(Map args, Object persistence, CacheRegenerator regenerator) {
     state = State.CREATED;
@@ -71,9 +73,13 @@
     autowarmCount = str == null ? 0 : Integer.parseInt(str);
     str = (String) args.get("cleanupThread");
     boolean newThread = str == null ? false : Boolean.parseBoolean(str);
-    
+
+    str = (String) args.get("showItems");
+    showItems = str == null ? 0 : Integer.parseInt(str);
+
+
     description = "Concurrent LRU Cache(maxSize=" + limit + ", initialSize=" + initialSize +
-            ", minSize="+minLimit + ", acceptableSize="+acceptableLimit+" ,cleanupThread ="+newThread;
+            ", minSize="+minLimit + ", acceptableSize="+acceptableLimit+", cleanupThread ="+newThread;
     if (autowarmCount > 0) {
       description += ", autowarmCount=" + autowarmCount
               + ", regenerator=" + regenerator;
@@ -234,6 +240,19 @@
     lst.add("cumulative_inserts", cinserts);
     lst.add("cumulative_evictions", cevictions);
 
+    if (showItems != 0) {
+      Map items = cache.getLatestAccessedItems( showItems == -1 ? Integer.MAX_VALUE : showItems );
+      for (Map.Entry e : (Set <Map.Entry>)items.entrySet()) {
+        Object k = e.getKey();
+        Object v = e.getValue();
+
+        String ks = "item_" + k;
+        String vs = v.toString();
+        lst.add(ks,vs);
+      }
+      
+    }
+
     return lst;
   }
 
@@ -242,3 +261,5 @@
   }
 }
 
+
+

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=724203&r1=724202&r2=724203&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/SolrIndexSearcher.java Sun Dec  7 13:11:55 2008
@@ -31,6 +31,7 @@
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrInfoMBean;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.request.UnInvertedField;
 import org.apache.lucene.util.OpenBitSet;
 
 import java.io.IOException;
@@ -77,6 +78,7 @@
   private final SolrCache filterCache;
   private final SolrCache queryResultCache;
   private final SolrCache documentCache;
+  private final SolrCache fieldValueCache;
 
   private final LuceneQueryOptimizer optimizer;
   
@@ -140,6 +142,8 @@
     cachingEnabled=enableCache;
     if (cachingEnabled) {
       ArrayList<SolrCache> clist = new ArrayList<SolrCache>();
+      fieldValueCache = solrConfig.fieldValueCacheConfig==null ? null : solrConfig.fieldValueCacheConfig.newInstance();
+      if (fieldValueCache!=null) clist.add(fieldValueCache);
       filterCache= solrConfig.filterCacheConfig==null ? null : solrConfig.filterCacheConfig.newInstance();
       if (filterCache!=null) clist.add(filterCache);
       queryResultCache = solrConfig.queryResultCacheConfig==null ? null : solrConfig.queryResultCacheConfig.newInstance();
@@ -166,6 +170,7 @@
       filterCache=null;
       queryResultCache=null;
       documentCache=null;
+      fieldValueCache=null;
       cacheMap = noGenericCaches;
       cacheList= noCaches;
     }
@@ -232,6 +237,19 @@
   // Set default regenerators on filter and query caches if they don't have any
   //
   public static void initRegenerators(SolrConfig solrConfig) {
+    if (solrConfig.fieldValueCacheConfig != null && solrConfig.fieldValueCacheConfig.getRegenerator() == null) {
+      solrConfig.fieldValueCacheConfig.setRegenerator(
+              new CacheRegenerator() {
+                public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
+                  if (oldVal instanceof UnInvertedField) {
+                    UnInvertedField.getUnInvertedField((String)oldKey, newSearcher);
+                  }
+                  return true;
+                }
+              }
+      );
+    }
+
     if (solrConfig.filterCacheConfig != null && solrConfig.filterCacheConfig.getRegenerator() == null) {
       solrConfig.filterCacheConfig.setRegenerator(
               new CacheRegenerator() {
@@ -452,6 +470,12 @@
   ////////////////////////////////////////////////////////////////////////////////
   ////////////////////////////////////////////////////////////////////////////////
 
+  /** expert: internal API, subject to change */
+  public SolrCache getFieldValueCache() {
+    return fieldValueCache;
+  }
+
+
   /**
    * Returns the first document number containing the term <code>t</code>
    * Returns -1 if no document was found.