You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/02/20 10:43:37 UTC
svn commit: r746189 - in /lucene/solr/trunk/contrib/dataimporthandler:
CHANGES.txt src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java
Author: shalin
Date: Fri Feb 20 09:43:36 2009
New Revision: 746189
URL: http://svn.apache.org/viewvc?rev=746189&view=rev
Log:
SOLR-783 -- Enhance delta-imports by maintaining separate last_index_time for each entity
Modified:
lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java
Modified: lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt?rev=746189&r1=746188&r2=746189&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/CHANGES.txt Fri Feb 20 09:43:36 2009
@@ -75,6 +75,9 @@
17.SOLR-996: Expose Context to Evaluators.
(Noble Paul, shalin)
+18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
+ (Jon Baer, Noble Paul via shalin)
+
Optimizations
----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java?rev=746189&r1=746188&r2=746189&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java Fri Feb 20 09:43:36 2009
@@ -19,6 +19,7 @@
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
+import static org.apache.solr.handler.dataimport.SolrWriter.LAST_INDEX_KEY;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,15 +63,17 @@
static final ThreadLocal<DocBuilder> INSTANCE = new ThreadLocal<DocBuilder>();
Map<String, Object> functionsNamespace;
+ private Properties persistedProperties;
- public DocBuilder(DataImporter context, SolrWriter writer, DataImporter.RequestParams reqParams) {
+ public DocBuilder(DataImporter dataImporter, SolrWriter writer, DataImporter.RequestParams reqParams) {
INSTANCE.set(this);
- this.dataImporter = context;
+ this.dataImporter = dataImporter;
this.writer = writer;
DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
requestParameters = reqParams;
verboseDebug = requestParameters.debug && requestParameters.verbose;
- functionsNamespace = EvaluatorBag.getFunctionsNamespace(dataImporter.getConfig().functions, this);
+ functionsNamespace = EvaluatorBag.getFunctionsNamespace(this.dataImporter.getConfig().functions, this);
+ persistedProperties = writer.readIndexerProperties();
}
public VariableResolverImpl getVariableResolver() {
@@ -82,6 +85,13 @@
indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
indexerNamespace.put("request", requestParameters.requestParams);
indexerNamespace.put("functions", functionsNamespace);
+ for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) {
+ String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY;
+ String lastIndex = persistedProperties.getProperty(key);
+ if (lastIndex != null) {
+ indexerNamespace.put(key, lastIndex);
+ }
+ }
if (dataImporter.getConfig().script != null) {
indexerNamespace.put(DataConfig.SCRIPT, dataImporter.getConfig().script.script);
indexerNamespace.put(DataConfig.SCRIPT_LANG, dataImporter.getConfig().script.language);
@@ -133,10 +143,14 @@
}
AtomicBoolean fullCleanDone = new AtomicBoolean(false);
//we must not do a delete of *:* multiple times if there are multiple root entities to be run
+ Properties lastIndexTimeProps = new Properties();
+ lastIndexTimeProps.setProperty(LAST_INDEX_KEY,
+ DataImporter.DATE_TIME_FORMAT.get().format(dataImporter.getIndexStartTime()));
for (DataConfig.Entity e : document.entities) {
if (entities != null && !entities.contains(e.name))
continue;
-
+ lastIndexTimeProps.setProperty(e.name + "." + LAST_INDEX_KEY,
+ DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
root = e;
String delQuery = e.allAttributes.get("preImportDeleteQuery");
if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP
@@ -168,11 +182,11 @@
// Do not commit unnecessarily if this is a delta-import and no documents were created or deleted
if (!requestParameters.clean) {
if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) {
- commit();
+ commit(lastIndexTimeProps);
}
} else {
// Finished operation normally, commit now
- commit();
+ commit(lastIndexTimeProps);
}
if (document.onImportEnd != null) {
invokeEventListener(document.onImportEnd);
@@ -185,9 +199,7 @@
}
@SuppressWarnings("unchecked")
- private void commit() {
- if (requestParameters.commit)
- writer.persistIndexStartTime(dataImporter.getIndexStartTime());
+ private void commit(Properties lastIndexTimeProps) {
LOG.info("Full Import completed successfully");
statusMessages.put("", "Indexing completed. Added/Updated: "
+ importStatistics.docCount + " documents. Deleted "
@@ -196,7 +208,8 @@
addStatusMessage("Committed");
if (requestParameters.optimize)
addStatusMessage("Optimized");
-
+ if (requestParameters.commit)
+ writer.persist(lastIndexTimeProps);
}
void rollback() {
@@ -253,7 +266,6 @@
}
if (!stop.get()) {
- writer.persistIndexStartTime(dataImporter.getIndexStartTime());
LOG.info("Delta Import completed successfully");
}
}
@@ -336,7 +348,7 @@
if (entity.isDocRoot) {
if (seenDocCount <= requestParameters.start)
continue;
- if (seenDocCount > requestParameters.start + requestParameters.rows) {
+ if (seenDocCount > requestParameters.start + requestParameters.rows) {
LOG.info("Indexing stopped at docCount = " + importStatistics.docCount);
break;
}
Modified: lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java?rev=746189&r1=746188&r2=746189&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java (original)
+++ lucene/solr/trunk/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/SolrWriter.java Fri Feb 20 09:43:36 2009
@@ -31,10 +31,7 @@
import java.util.Properties;
/**
- * <p>
- * Writes documents to SOLR as well as provides methods for loading and
- * persisting last index time.
- * </p>
+ * <p> Writes documents to SOLR as well as provides methods for loading and persisting last index time. </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
@@ -92,29 +89,14 @@
}
}
- Date getStartTime() {
- Properties props = readIndexerProperties();
- String result = props.getProperty(SolrWriter.LAST_INDEX_KEY);
-
- try {
- if (result != null)
- return DataImporter.DATE_TIME_FORMAT.get().parse(result);
- } catch (ParseException e) {
- throw new DataImportHandlerException(DataImportHandlerException.WARN,
- "Unable to read last indexed time from: "
- + SolrWriter.IMPORTER_PROPERTIES, e);
- }
- return null;
- }
- private void persistStartTime(Date date) {
+ void persist(Properties p) {
OutputStream propOutput = null;
Properties props = readIndexerProperties();
try {
- props.put(SolrWriter.LAST_INDEX_KEY,
- DataImporter.DATE_TIME_FORMAT.get().format(date));
+ props.putAll(p);
String filePath = configDir;
if (configDir != null && !configDir.endsWith(File.separator))
filePath += File.separator;
@@ -138,7 +120,7 @@
}
}
- private Properties readIndexerProperties() {
+ Properties readIndexerProperties() {
Properties props = new Properties();
InputStream propInput = null;
@@ -183,7 +165,7 @@
}
}
- public void rollback() {
+ public void rollback() {
try {
RollbackUpdateCommand rollback = new RollbackUpdateCommand();
processor.processRollback(rollback);
@@ -236,20 +218,19 @@
}
public Date loadIndexStartTime() {
- return this.getStartTime();
- }
+ Properties props;
+ props = readIndexerProperties();
+ String result = props.getProperty(SolrWriter.LAST_INDEX_KEY);
- /**
- * <p>
- * Stores the last indexed time into the <code>IMPORTER_PROPERTIES</code>
- * file. If any properties are already defined in the file, then they are
- * preserved.
- * </p>
- *
- * @param date the Date instance to be persisted
- */
- public void persistIndexStartTime(Date date) {
- this.persistStartTime(date);
+ try {
+ if (result != null)
+ return DataImporter.DATE_TIME_FORMAT.get().parse(result);
+ } catch (ParseException e) {
+ throw new DataImportHandlerException(DataImportHandlerException.WARN,
+ "Unable to read last indexed time from: "
+ + SolrWriter.IMPORTER_PROPERTIES, e);
+ }
+ return null;
}
/**