You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/02/20 10:43:37 UTC

svn commit: r746189 - in /lucene/solr/trunk/contrib/dataimporthandler: CHANGES.txt src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java

Author: shalin
Date: Fri Feb 20 09:43:36 2009
New Revision: 746189

URL: http://svn.apache.org/viewvc?rev=746189&view=rev
Log:
SOLR-783 -- Enhance delta-imports by maintaining separate last_index_time for each entity

Modified:
    lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
    lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java

Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=746189&r1=746188&r2=746189&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Fri Feb 20 09:43:36 2009
@@ -75,6 +75,9 @@
 17.SOLR-996:  Expose Context to Evaluators.
               (Noble Paul, shalin)
 
+18.SOLR-783:  Enhance delta-imports by maintaining separate last_index_time for each entity.
+              (Jon Baer, Noble Paul via shalin)
+
 Optimizations
 ----------------------
 1. SOLR-846:  Reduce memory consumption during delta import by removing keys when used

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java?rev=746189&r1=746188&r2=746189&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java Fri Feb 20 09:43:36 2009
@@ -19,6 +19,7 @@
 
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.core.SolrCore;
+import static org.apache.solr.handler.dataimport.SolrWriter.LAST_INDEX_KEY;
 import org.apache.solr.schema.SchemaField;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -62,15 +63,17 @@
 
   static final ThreadLocal<DocBuilder> INSTANCE = new ThreadLocal<DocBuilder>();
   Map<String, Object> functionsNamespace;
+  private Properties persistedProperties;
 
-  public DocBuilder(DataImporter context, SolrWriter writer, DataImporter.RequestParams reqParams) {
+  public DocBuilder(DataImporter dataImporter, SolrWriter writer, DataImporter.RequestParams reqParams) {
     INSTANCE.set(this);
-    this.dataImporter = context;
+    this.dataImporter = dataImporter;
     this.writer = writer;
     DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
     requestParameters = reqParams;
     verboseDebug = requestParameters.debug && requestParameters.verbose;
-    functionsNamespace = EvaluatorBag.getFunctionsNamespace(dataImporter.getConfig().functions, this);
+    functionsNamespace = EvaluatorBag.getFunctionsNamespace(this.dataImporter.getConfig().functions, this);
+    persistedProperties = writer.readIndexerProperties();
   }
 
   public VariableResolverImpl getVariableResolver() {
@@ -82,6 +85,13 @@
     indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
     indexerNamespace.put("request", requestParameters.requestParams);
     indexerNamespace.put("functions", functionsNamespace);
+    for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) {
+      String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY;
+      String lastIndex = persistedProperties.getProperty(key);
+      if (lastIndex != null) {
+        indexerNamespace.put(key, lastIndex);
+      }
+    }
     if (dataImporter.getConfig().script != null) {
       indexerNamespace.put(DataConfig.SCRIPT, dataImporter.getConfig().script.script);
       indexerNamespace.put(DataConfig.SCRIPT_LANG, dataImporter.getConfig().script.language);
@@ -133,10 +143,14 @@
     }
     AtomicBoolean fullCleanDone = new AtomicBoolean(false);
     //we must not do a delete of *:* multiple times if there are multiple root entities to be run
+    Properties lastIndexTimeProps = new Properties();
+    lastIndexTimeProps.setProperty(LAST_INDEX_KEY,
+            DataImporter.DATE_TIME_FORMAT.get().format(dataImporter.getIndexStartTime()));
     for (DataConfig.Entity e : document.entities) {
       if (entities != null && !entities.contains(e.name))
         continue;
-
+      lastIndexTimeProps.setProperty(e.name + "." + LAST_INDEX_KEY,
+              DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
       root = e;
       String delQuery = e.allAttributes.get("preImportDeleteQuery");
       if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP
@@ -168,11 +182,11 @@
       // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted
       if (!requestParameters.clean) {
         if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) {
-          commit();
+          commit(lastIndexTimeProps);
         }
       } else {
         // Finished operation normally, commit now
-        commit();
+        commit(lastIndexTimeProps);
       }
       if (document.onImportEnd != null) {
         invokeEventListener(document.onImportEnd);
@@ -185,9 +199,7 @@
   }
 
   @SuppressWarnings("unchecked")
-  private void commit() {
-    if (requestParameters.commit)
-      writer.persistIndexStartTime(dataImporter.getIndexStartTime());
+  private void commit(Properties lastIndexTimeProps) {
     LOG.info("Full Import completed successfully");
     statusMessages.put("", "Indexing completed. Added/Updated: "
             + importStatistics.docCount + " documents. Deleted "
@@ -196,7 +208,8 @@
     addStatusMessage("Committed");
     if (requestParameters.optimize)
       addStatusMessage("Optimized");
-
+    if (requestParameters.commit)
+      writer.persist(lastIndexTimeProps);
   }
 
   void rollback() {
@@ -253,7 +266,6 @@
     }
 
     if (!stop.get()) {
-      writer.persistIndexStartTime(dataImporter.getIndexStartTime());
       LOG.info("Delta Import completed successfully");
     }
   }
@@ -336,7 +348,7 @@
           if (entity.isDocRoot) {
             if (seenDocCount <= requestParameters.start)
               continue;
-            if (seenDocCount > requestParameters.start + requestParameters.rows)  {
+            if (seenDocCount > requestParameters.start + requestParameters.rows) {
               LOG.info("Indexing stopped at docCount = " + importStatistics.docCount);
               break;
             }

Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java?rev=746189&r1=746188&r2=746189&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java Fri Feb 20 09:43:36 2009
@@ -31,10 +31,7 @@
 import java.util.Properties;
 
 /**
- * <p>
- * Writes documents to SOLR as well as provides methods for loading and
- * persisting last index time.
- * </p>
+ * <p> Writes documents to SOLR as well as provides methods for loading and persisting last index time. </p>
  * <p/>
  * <b>This API is experimental and may change in the future.</b>
  *
@@ -92,29 +89,14 @@
     }
   }
 
-  Date getStartTime() {
-    Properties props = readIndexerProperties();
-    String result = props.getProperty(SolrWriter.LAST_INDEX_KEY);
-
-    try {
-      if (result != null)
-        return DataImporter.DATE_TIME_FORMAT.get().parse(result);
-    } catch (ParseException e) {
-      throw new DataImportHandlerException(DataImportHandlerException.WARN,
-              "Unable to read last indexed time from: "
-                      + SolrWriter.IMPORTER_PROPERTIES, e);
-    }
-    return null;
-  }
 
-  private void persistStartTime(Date date) {
+  void persist(Properties p) {
     OutputStream propOutput = null;
 
     Properties props = readIndexerProperties();
 
     try {
-      props.put(SolrWriter.LAST_INDEX_KEY,
-              DataImporter.DATE_TIME_FORMAT.get().format(date));
+      props.putAll(p);
       String filePath = configDir;
       if (configDir != null && !configDir.endsWith(File.separator))
         filePath += File.separator;
@@ -138,7 +120,7 @@
     }
   }
 
-  private Properties readIndexerProperties() {
+  Properties readIndexerProperties() {
     Properties props = new Properties();
     InputStream propInput = null;
 
@@ -183,7 +165,7 @@
     }
   }
 
-  public void rollback()  {
+  public void rollback() {
     try {
       RollbackUpdateCommand rollback = new RollbackUpdateCommand();
       processor.processRollback(rollback);
@@ -236,20 +218,19 @@
   }
 
   public Date loadIndexStartTime() {
-    return this.getStartTime();
-  }
+    Properties props;
+    props = readIndexerProperties();
+    String result = props.getProperty(SolrWriter.LAST_INDEX_KEY);
 
-  /**
-   * <p>
-   * Stores the last indexed time into the <code>IMPORTER_PROPERTIES</code>
-   * file. If any properties are already defined in the file, then they are
-   * preserved.
-   * </p>
-   *
-   * @param date the Date instance to be persisted
-   */
-  public void persistIndexStartTime(Date date) {
-    this.persistStartTime(date);
+    try {
+      if (result != null)
+        return DataImporter.DATE_TIME_FORMAT.get().parse(result);
+    } catch (ParseException e) {
+      throw new DataImportHandlerException(DataImportHandlerException.WARN,
+              "Unable to read last indexed time from: "
+                      + SolrWriter.IMPORTER_PROPERTIES, e);
+    }
+    return null;
   }
 
   /**