You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2012/10/26 12:49:34 UTC

svn commit: r1402463 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/schema/ core/src/java/org/apache/solr/search/function/

Author: romseygeek
Date: Fri Oct 26 10:49:33 2012
New Revision: 1402463

URL: http://svn.apache.org/viewvc?rev=1402463&view=rev
Log:
SOLR-3985: Allow ExternalFileField caches to be reloaded on newSearcher/firstSearcher events

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileFieldReloader.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1402463&r1=1402462&r2=1402463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Oct 26 10:49:33 2012
@@ -49,6 +49,9 @@ New Features
   underlying analyzed form used for suggestions is separate from the returned
   text.  (Robert Muir)
 
+* SOLR-3985: ExternalFileField caches can be reloaded on firstSearcher/
+  newSearcher events using the ExternalFileFieldReloader (Alan Woodward)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java?rev=1402463&r1=1402462&r2=1402463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java Fri Oct 26 10:49:33 2012
@@ -16,18 +16,16 @@
  */
 package org.apache.solr.schema;
 
+import org.apache.lucene.index.StorableField;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.SortField;
-import org.apache.lucene.index.GeneralField;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.StorableField;
-import org.apache.solr.search.function.FileFloatSource;
-import org.apache.solr.search.QParser;
-import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.response.TextResponseWriter;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.function.FileFloatSource;
 
-import java.util.Map;
 import java.io.IOException;
+import java.util.Map;
 
 /** Get values from an external file instead of the index.
  *
@@ -55,7 +53,7 @@ import java.io.IOException;
  * <p/>The external file may be sorted or unsorted by the key field, but it will be substantially slower (untested) if it isn't sorted.
  * <p/>Fields of this type may currently only be used as a ValueSource in a FunctionQuery.
  *
- *
+ * @see ExternalFileFieldReloader
  */
 public class ExternalFileField extends FieldType {
   private FieldType ftype;
@@ -94,10 +92,26 @@ public class ExternalFileField extends F
 
   @Override
   public ValueSource getValueSource(SchemaField field, QParser parser) {
-    // default key field to unique key
-    SchemaField keyField = keyFieldName==null ? schema.getUniqueKeyField() : schema.getField(keyFieldName);
-    return new FileFloatSource(field, keyField, defVal, parser);
+    return getFileFloatSource(field, parser.getReq().getCore().getDataDir());
   }
 
+  /**
+   * Get a FileFloatSource for the given field, looking in datadir for the relevant file
+   * @param field the field to get a source for
+   * @param datadir the data directory in which to look for the external file
+   * @return a FileFloatSource
+   */
+  public FileFloatSource getFileFloatSource(SchemaField field, String datadir) {
+    // Because the float source uses a static cache, all source objects will
+    // refer to the same data.
+    return new FileFloatSource(field, getKeyField(), defVal, datadir);
+  }
+
+  // If no key field is defined, we use the unique key field
+  private SchemaField getKeyField() {
+    return keyFieldName == null ?
+        schema.getUniqueKeyField() :
+        schema.getField(keyFieldName);
+  }
 
 }

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileFieldReloader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileFieldReloader.java?rev=1402463&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileFieldReloader.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/ExternalFileFieldReloader.java Fri Oct 26 10:49:33 2012
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.AbstractSolrEventListener;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.function.FileFloatSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An event listener to reload ExternalFileFields for new searchers.
+ *
+ * Opening a new IndexSearcher will invalidate the internal caches used by
+ * {@link ExternalFileField}.  By default, these caches are reloaded lazily
+ * by the first search that uses them.  For large external files, this can
+ * slow down searches unacceptably.
+ *
+ * To reload the caches when the searcher is first opened, set up event
+ * listeners in your solrconfig.xml:
+ *
+ * <pre>
+ *   &lt;listener event="newSearcher" class="org.apache.solr.schema.ExternalFileFieldReloader"/>
+ *   &lt;listener event="firstSearcher" class="org.apache.solr.schema.ExternalFileFieldReloader"/>
+ * </pre>
+ *
+ * The caches will be reloaded for all ExternalFileFields in your schema after
+ * each commit.
+ */
+public class ExternalFileFieldReloader extends AbstractSolrEventListener {
+
+  private IndexSchema schema;
+  private String datadir;
+  private List<FileFloatSource> fieldSources = new ArrayList<FileFloatSource>();
+
+  private static final Logger log = LoggerFactory.getLogger(ExternalFileFieldReloader.class);
+
+  public ExternalFileFieldReloader(SolrCore core) {
+    super(core);
+    schema = core.getSchema();
+    datadir = core.getDataDir();
+  }
+
+  @Override
+  public void init(NamedList args) {
+    for (SchemaField field : schema.getFields().values()) {
+      FieldType type = field.getType();
+      if (type instanceof ExternalFileField) {
+        ExternalFileField eff = (ExternalFileField) type;
+        fieldSources.add(eff.getFileFloatSource(field, datadir));
+        log.info("Adding ExternalFileFieldReloader listener for field {}", field.getName());
+      }
+    }
+  }
+
+  @Override
+  public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
+    // We need to reload the caches for the new searcher
+    IndexReader reader = newSearcher.getIndexReader();
+    for (FileFloatSource fieldSource : fieldSources) {
+      fieldSource.refreshCache(reader);
+    }
+  }
+}
+

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java?rev=1402463&r1=1402462&r2=1402463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java Fri Oct 26 10:49:33 2012
@@ -16,24 +16,7 @@
  */
 package org.apache.solr.search.function;
 
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.WeakHashMap;
-
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.*;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.docvalues.FloatDocValues;
@@ -47,29 +30,45 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.QParser;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.solr.util.VersionedFile;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.*;
+
 /**
  * Obtains float field values from an external file.
  *
+ * @see org.apache.solr.schema.ExternalFileField
+ * @see org.apache.solr.schema.ExternalFileFieldReloader
  */
 
 public class FileFloatSource extends ValueSource {
+
   private SchemaField field;
   private final SchemaField keyField;
   private final float defVal;
-
   private final String dataDir;
 
-  public FileFloatSource(SchemaField field, SchemaField keyField, float defVal, QParser parser) {
+  private static final Logger log = LoggerFactory.getLogger(FileFloatSource.class);
+
+  /**
+   * Creates a new FileFloatSource
+   * @param field the source's SchemaField
+   * @param keyField the field to use as a key
+   * @param defVal the default value to use if a field has no entry in the external file
+   * @param datadir the directory in which to look for the external file
+   */
+  public FileFloatSource(SchemaField field, SchemaField keyField, float defVal, String datadir) {
     this.field = field;
     this.keyField = keyField;
     this.defVal = defVal;
-    this.dataDir = parser.getReq().getCore().getDataDir();
+    this.dataDir = datadir;
   }
 
   @Override
@@ -117,11 +116,27 @@ public class FileFloatSource extends Val
             + ",defVal="+defVal+",dataDir="+dataDir+")";
 
   }
-  
+
+  /**
+   * Remove all cached entries.  Values are lazily loaded next time getValues() is
+   * called.
+   */
   public static void resetCache(){
     floatCache.resetCache();
   }
 
+  /**
+   * Refresh the cache for an IndexReader.  The new values are loaded in the background
+   * and then swapped in, so queries against the cache should not block while the reload
+   * is happening.
+   * @param reader the IndexReader whose cache needs refreshing
+   */
+  public void refreshCache(IndexReader reader) {
+    log.info("Refreshing FlaxFileFloatSource cache for field {}", this.field.getName());
+    floatCache.refresh(reader, new Entry(this));
+    log.info("FlaxFileFloatSource cache for field {} reloaded", this.field.getName());
+  }
+
   private final float[] getCachedFloats(IndexReader reader) {
     return (float[])floatCache.get(reader, new Entry(this));
   }
@@ -139,6 +154,18 @@ public class FileFloatSource extends Val
 
     protected abstract Object createValue(IndexReader reader, Object key);
 
+    public void refresh(IndexReader reader, Object key) {
+      Object refreshedValues = createValue(reader, key);
+      synchronized (readerCache) {
+        Map innerCache = (Map) readerCache.get(reader);
+        if (innerCache == null) {
+          innerCache = new HashMap();
+          readerCache.put(reader, innerCache);
+        }
+        innerCache.put(key, refreshedValues);
+      }
+    }
+
     public Object get(IndexReader reader, Object key) {
       Map innerCache;
       Object value;