You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by pr...@apache.org on 2012/08/29 16:47:36 UTC

svn commit: r1378586 - in /oodt/trunk/filemgr/src/main: java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java resources/indexer.properties

Author: pramirez
Date: Wed Aug 29 14:47:36 2012
New Revision: 1378586

URL: http://svn.apache.org/viewvc?rev=1378586&view=rev
Log:
OODT-488 Enhance Solr Indexer Capabilities.

Support for:
 -piping in product ids
 -formatting dates
 -metadata substitution
 -annotation with product type metadata

Modified:
    oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java
    oodt/trunk/filemgr/src/main/resources/indexer.properties

Modified: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java?rev=1378586&r1=1378585&r2=1378586&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java (original)
+++ oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java Wed Aug 29 14:47:36 2012
@@ -18,15 +18,17 @@
 package org.apache.oodt.cas.filemgr.tools;
 
 //JDK imports
+import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Properties;
 import java.util.logging.Logger;
@@ -47,11 +49,11 @@ import org.apache.solr.client.solrj.Solr
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
 import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.core.CoreContainer;
 
 //OODT imports
 import org.apache.oodt.cas.filemgr.metadata.CoreMetKeys;
 import org.apache.oodt.cas.filemgr.structs.Product;
+import org.apache.oodt.cas.filemgr.structs.ProductPage;
 import org.apache.oodt.cas.filemgr.structs.ProductType;
 import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException;
 import org.apache.oodt.cas.filemgr.structs.exceptions.ConnectionException;
@@ -59,362 +61,644 @@ import org.apache.oodt.cas.filemgr.struc
 import org.apache.oodt.cas.filemgr.system.XmlRpcFileManagerClient;
 import org.apache.oodt.cas.metadata.Metadata;
 import org.apache.oodt.cas.metadata.SerializableMetadata;
+import org.apache.oodt.cas.metadata.util.PathUtils;
 
 /**
- * 
- * Indexes the File Manager Catalog to Solr. Uses an associated config file,
- * indexer.properties to specify how to perform the indexing. See
- * indexer.properties in the src/main/resources directory of file manager for
+ * Indexes products from the File Manager catalog to a Solr instance. Uses an
+ * associated config file, indexer.properties to specify how to perform the
+ * indexing. See indexer.properties in the src/main/resources directory for
  * specific documentation.
- * 
  */
 public class SolrIndexer {
-  private final static String SOLR_INDEXER_CONFIG = "SOLR_INDEXER_CONFIG";
-  private final static String SOLR_URL = "solr.url";
-  private final static String FILEMGR_URL = "filemgr.url";
-  private IndexerConfig config = null;
-  private final SolrServer server;
-  private CoreContainer coreContainer;
-  private String fmUrl;
-  private String solrUrl;
-  private static Logger LOG = Logger.getLogger(SolrIndexer.class.getName());
-
-  public SolrIndexer(String solrUrl, String fmUrl)
-      throws InstantiationException {
-    InputStream input = null;
-    String filename = null;
-
-    try {
-      LOG.info("System property " + SOLR_INDEXER_CONFIG + " set to "
-          + System.getProperty(SOLR_INDEXER_CONFIG));
-      filename = System.getProperty(SOLR_INDEXER_CONFIG);
-      if (filename != null) {
-        LOG.info("Reading config from " + filename);
-        input = new FileInputStream(filename);
-      } else {
-        LOG.info("Config file not found reading config from classpath");
-        input = SolrIndexer.class.getResourceAsStream("indexer.properties");
-      }
-      config = new IndexerConfig(input);
-    } catch (IOException e) {
-      LOG.severe("Could not read in configuration for indexer from classpath or file");
-      throw new InstantiationException(e.getMessage());
-    } finally {
-      if (input != null) {
-        try {
-          input.close();
-        } catch (IOException e) {
-          // no op
-        }
-      }
-    }
-
-    this.solrUrl = solrUrl;
-    if (this.solrUrl == null) {
-      this.solrUrl = config.getProperty(SOLR_URL);
-    }
-
-    this.fmUrl = fmUrl;
-    if (this.fmUrl == null) {
-      this.fmUrl = config.getProperty(FILEMGR_URL);
-    }
-
-    LOG.info("Using Solr: " + this.solrUrl + " FileManager: " + this.fmUrl);
-
-    try {
-      server = new CommonsHttpSolrServer(this.solrUrl);
-    } catch (MalformedURLException e) {
-      LOG.severe("Could not connect to Solr server " + this.solrUrl);
-      throw new InstantiationException(e.getMessage());
-    }
-
-  }
-
-  public void shutdown() {
-    coreContainer.shutdown();
-  }
-
-  public void commit() throws SolrServerException, IOException {
-    server.commit();
-  }
-
-  public void optimize() throws SolrServerException, IOException {
-    server.optimize();
-  }
-
-  @SuppressWarnings("unchecked")
-  private SolrInputDocument getSolrDocument(Metadata metadata) {
-    SolrInputDocument doc = new SolrInputDocument();
-
-    for (Object objKey : config.getMapProperties().keySet()) {
-      String key = (String) objKey;
-      if (metadata.isMultiValued(key)) {
-        List<String> values = metadata.getAllMetadata(key);
-        for (String value : values) {
-          if (value != null && !config.getIgnoreValues().contains(value.trim())) {
-            LOG.fine("Adding field: "
-                + config.getMapProperties().getProperty(key) + " value: "
-                + value);
-            doc.addField(config.getMapProperties().getProperty(key), value);
-          }
-        }
-      } else {
-        String value = metadata.getMetadata(key);
-        if (value != null && !config.getIgnoreValues().contains(value.trim())) {
-          LOG.fine("Adding field: "
-              + config.getMapProperties().getProperty(key) + " value: " + value);
-          doc.addField(config.getMapProperties().getProperty(key), value);
-        }
-      }
-    }
-
-    return doc;
-  }
-
-  public void indexMetFile(File file, boolean delete)
-      throws InstantiationException, FileNotFoundException, IOException,
-      SolrServerException {
-    SerializableMetadata metadata = new SerializableMetadata("UTF-8", false);
-    metadata.loadMetadataFromXmlStream(new FileInputStream(file));
-    if (delete) {
-      server.deleteById(metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
-    }
-    server.add(this.getSolrDocument(metadata));
-  }
-
-  public void indexAll(boolean delete) throws SolrServerException {
-    LOG.info("Indexing");
-    try {
-      XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
-          this.fmUrl));
-      if (delete) {
-        server.deleteByQuery("*:*");
-      }
-      LOG.info("Looking up product types");
-      List<ProductType> types = fmClient.getProductTypes();
-      for (ProductType type : types) {
-        if (!config.getIgnoreTypes().contains(type.getName().trim())) {
-          LOG.info("Looking up products for product type: " + type.getName());
-          List<Product> products = fmClient.getProductsByProductType(type);
-          for (Product product : products) {
-            LOG.info("Looking up metadata for ProductId "
-                + product.getProductId());
-            Metadata metadata = fmClient.getMetadata(product);
-            if (metadata != null) {
-              LOG.info("Found metadata for product ID "
-                  + metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
-            } else {
-              LOG.info("Could not find metadata for product "
-                  + product.getProductId());
-            }
-            if (metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID) != null) {
-              if (metadata.getMetadata("Deleted") == null
-                  || !"true".equals(metadata.getMetadata("Deleted"))) {
-                try {
-                  server.add(this.getSolrDocument(metadata));
-                  server.commit();
-                  LOG.info("Indexed " + metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
-                } catch (Exception e) {
-                  LOG.severe("Could not index " + metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID)
-                      + " " + e.getMessage());
-                }
-              } else {
-                LOG.info("Skipping Deleted: " + metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
-              }
-            }
-          }
-        } else {
-          LOG.info("Ignoring product type " + type.getName());
-        }
-      }
-    } catch (MalformedURLException e) {
-      LOG.severe("File Manager URL is malformed: " + e.getMessage());
-    } catch (ConnectionException e) {
-      LOG.severe("Could not connect to File Manager: " + e.getMessage());
-    } catch (IOException e) {
-      LOG.severe("Could not delete all: " + e.getMessage());
-    } catch (RepositoryManagerException e) {
-      LOG.severe("Could not look up product types: " + e.getMessage());
-    } catch (CatalogException e) {
-      LOG.severe("Query to File Manager failed: " + e.getMessage());
-    }
-    LOG.info("Finished Indexing");
-  }
-
-  public void indexProduct(String productId, boolean delete)
-      throws SolrServerException, IOException, ConnectionException,
-      CatalogException {
-    XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
-        this.fmUrl));
-    if (delete) {
-      server.deleteById(productId);
-    }
-
-    Product product = fmClient.getProductById(productId);
-    Metadata metadata = fmClient.getMetadata(product);
-    server.add(this.getSolrDocument(metadata));
-  }
-
-  @SuppressWarnings("static-access")
-  public static Options buildCommandLine() {
-    Options options = new Options();
-
-    options.addOption(new Option("h", "help", false, "Print this message"));
-    options.addOption(new Option("o", "optimize", false,
-        "Optimize the Solr index when done"));
-    options.addOption(new Option("d", "delete", false,
-        "Delete items before indexing"));
-    options.addOption(OptionBuilder.withArgName("Solr URL").hasArg()
-        .withDescription("URL to the Solr server").withLongOpt("solrUrl")
-        .create("su"));
-    options.addOption(OptionBuilder.withArgName("Filemgr URL").hasArg()
-        .withDescription("URL to the CAS FileManager").withLongOpt("fmUrl")
-        .create("fmu"));
-
-    OptionGroup group = new OptionGroup();
-    Option all = new Option("a", "all", false, "Index all items in catalog");
-    Option met = OptionBuilder.withArgName("file").hasArg()
-        .withDescription("Index this met file").withLongOpt("metFile")
-        .create("mf");
-    Option query = OptionBuilder.withArgName("query").hasArg()
-        .withDescription("Not yet implemented").withLongOpt("catalogQuery")
-        .create("cq");
-    Option product = OptionBuilder.withArgName("productId").hasArg()
-        .withDescription("Product id to index").withLongOpt("product")
-        .create("p");
-
-    group.addOption(all);
-    group.addOption(met);
-    group.addOption(query);
-    group.addOption(product);
-    options.addOptionGroup(group);
-
-    return options;
-  }
-
-  public static void main(String[] args) throws Exception {
-    Options options = SolrIndexer.buildCommandLine();
-    CommandLineParser parser = new GnuParser();
-    CommandLine line = null;
-
-    try {
-      line = parser.parse(options, args);
-    } catch (ParseException e) {
-      LOG.severe("Could not parse command line: " + e.getMessage());
-    }
-
-    if (line == null || line.hasOption("help") || line.getOptions().length == 0) {
-      HelpFormatter formatter = new HelpFormatter();
-      formatter.printHelp("java " + SolrIndexer.class.getName(), options);
-    } else if (line.hasOption("all") || line.hasOption("product")
-        || line.hasOption("metFile") || line.hasOption("catalogQuery")) {
-      SolrIndexer indexer = null;
-      String solrUrl = null;
-      String fmUrl = null;
-      if (line.hasOption("solrUrl")) {
-        solrUrl = line.getOptionValue("solrUrl");
-      }
-      if (line.hasOption("fmUrl")) {
-        fmUrl = line.getOptionValue("fmUrl");
-      }
-      try {
-        indexer = new SolrIndexer(solrUrl, fmUrl);
-        if (line.hasOption("all")) {
-          indexer.indexAll(line.hasOption("delete"));
-        } else if (line.hasOption("product")) {
-          indexer.indexProduct(line.getOptionValue("product"),
-              line.hasOption("delete"));
-        } else if (line.hasOption("metFile")) {
-          indexer.indexMetFile(new File(line.getOptionValue("metFile")),
-              line.hasOption("delete"));
-        } else {
-          LOG.info("Catalog query not yet implemented.");
-        }
-        indexer.commit();
-        if (line.hasOption("optimize")) {
-          indexer.optimize();
-        }
-      } catch (Exception ex) {
-        LOG.severe("Did not complete indexing: " + ex.getMessage());
-        ex.printStackTrace();
-      }
-    }
-
-  }
-
-  public class IndexerConfig {
-    private final static String PREFIX_CONFIG = "config.";
-    private final static String PREFIX_MET = "map.";
-    private Properties properties = new Properties();
-    private Properties mapProperties = new Properties();
-    private HashMap<String, Properties> xmlMapProperties = new HashMap<String, Properties>();
-    private List<String> xmlKeys = new ArrayList<String>();
-    private List<String> xmlMultiKeys = new ArrayList<String>();
-    private List<String> ignoreTypes = new ArrayList<String>();
-    private List<String> ignoreValues = new ArrayList<String>();
-
-    public IndexerConfig(InputStream inputStream) throws IOException {
-      Properties props = new Properties();
-      props.load(inputStream);
-      for (Object objKey : props.keySet()) {
-        String key = (String) objKey;
-        if (key.startsWith(PREFIX_CONFIG)) {
-          properties.put(key.substring(PREFIX_CONFIG.length()),
-              props.getProperty(key));
-        } else if (key.startsWith(PREFIX_MET)) {
-          mapProperties.put(key.substring(PREFIX_MET.length()),
-              props.getProperty(key));
-        }
-      }
-
-      if (properties.getProperty("ignore.types") != null) {
-        String[] values = properties.getProperty("ignore.types").trim()
-            .split(",");
-        for (String value : values) {
-          ignoreTypes.add(value);
-        }
-      }
-      if (properties.getProperty("ignore.values") != null) {
-        String[] values = properties.getProperty("ignore.values").trim()
-            .split(",");
-        for (String value : values) {
-          ignoreValues.add(value);
-        }
-      }
-    }
-
-    public String getProperty(String key) {
-      return properties.getProperty(key);
-    }
-
-    public String getProperty(String key, String defaultValue) {
-      return properties.getProperty(key, defaultValue);
-    }
-
-    public Properties getMapProperties() {
-      return mapProperties;
-    }
-
-    public Properties getXmlMapProperties(String name) {
-      return xmlMapProperties.get(name);
-    }
-
-    public List<String> getXmlKeys() {
-      return this.xmlKeys;
-    }
-
-    public List<String> getXmlMultiKeys() {
-      return this.xmlMultiKeys;
-    }
-
-    public List<String> getIgnoreTypes() {
-      return this.ignoreTypes;
-    }
-
-    public List<String> getIgnoreValues() {
-      return this.ignoreValues;
-    }
-
-  }
+	private final static String SOLR_INDEXER_CONFIG = "SOLR_INDEXER_CONFIG";
+	private final static String SOLR_URL = "solr.url";
+	private final static String FILEMGR_URL = "filemgr.url";
+	private final static String ACCESS_KEY = "access.key";
+	private final static String ACCESS_URL = "access.url";
+	private IndexerConfig config = null;
+	private final SolrServer server;
+	private String fmUrl;
+	private String solrUrl;
+	private static Logger LOG = Logger.getLogger(SolrIndexer.class.getName());
+	private final static SimpleDateFormat solrFormat = new SimpleDateFormat(
+	    "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+
+	/**
+	 * Constructor reads in the configuration and initiates the connection to the
+	 * Solr instance.
+	 * 
+	 * @param solrUrl
+	 *          URL for the Solr instance.
+	 * @param fmUrl
+	 *          URL for the File Manager instance.
+	 */
+	public SolrIndexer(String solrUrl, String fmUrl)
+	    throws InstantiationException {
+		InputStream input = null;
+		String filename = null;
+
+		try {
+			LOG.info("System property " + SOLR_INDEXER_CONFIG + " set to "
+			    + System.getProperty(SOLR_INDEXER_CONFIG));
+			filename = System.getProperty(SOLR_INDEXER_CONFIG);
+			if (filename != null) {
+				LOG.info("Reading config from " + filename);
+				input = new FileInputStream(filename);
+			} else {
+				LOG.info("Config file not found, reading config from classpath");
+				input = SolrIndexer.class.getResourceAsStream("/indexer.properties");
+			}
+			config = new IndexerConfig(input);
+		} catch (IOException e) {
+			LOG
+			    .severe("Could not read in configuration for indexer from classpath or file");
+			throw new InstantiationException(e.getMessage());
+		} finally {
+			if (input != null) {
+				try {
+					input.close();
+				} catch (IOException e) {
+					// no op
+				}
+			}
+		}
+
+		this.solrUrl = solrUrl;
+		if (this.solrUrl == null) {
+			this.solrUrl = config.getProperty(SOLR_URL);
+		}
+
+		this.fmUrl = fmUrl;
+		if (this.fmUrl == null) {
+			this.fmUrl = config.getProperty(FILEMGR_URL);
+		}
+
+		LOG.info("Using Solr: " + this.solrUrl + " FileManager: " + this.fmUrl);
+
+		try {
+			server = new CommonsHttpSolrServer(this.solrUrl);
+		} catch (MalformedURLException e) {
+			LOG.severe("Could not connect to Solr server " + this.solrUrl);
+			throw new InstantiationException(e.getMessage());
+		}
+
+	}
+
+	/**
+	 * This method deletes all entries from the Solr index.
+	 */
+	public void delete() throws SolrServerException, IOException {
+		server.deleteByQuery("*:*");
+	}
+
+	/**
+	 * This method commits all of the modifications to the Solr index.
+	 */
+	public void commit() throws SolrServerException, IOException {
+		server.commit();
+	}
+
+	/**
+	 * This method optimizes the Solr index.
+	 */
+	public void optimize() throws SolrServerException, IOException {
+		server.optimize();
+	}
+
+	/**
+	 * This method transforms the product metadata into a Solr document.
+	 * 
+	 * @param metadata
+	 *          The metadata object for the product to index.
+	 * @return Returns the SolrInputDocument containing product metadata.
+	 */
+	private SolrInputDocument getSolrDocument(Metadata metadata) {
+		SolrInputDocument doc = new SolrInputDocument();
+		// Only grab metadata which have a mapping in the indexer.properties
+		for (Object objKey : config.getMapProperties().keySet()) {
+			// The key in the metadata object
+			String key = (String) objKey;
+			// The solr field name this metadata key will be mapped to
+			String fieldName = config.getMapProperties().getProperty(key);
+			List<String> values = metadata.getAllMetadata(key);
+			for (String value : values) {
+				// Add each metadata value into the
+				if (value != null && !config.getIgnoreValues().contains(value.trim())) {
+					LOG.fine("Adding field: " + fieldName + " value: " + value);
+					doc.addField(fieldName, value);
+				}
+			}
+		}
+		return doc;
+	}
+
+	/**
+	 * This method adds a single product extracted from a metadata file to the
+	 * Solr index.
+	 * 
+	 * @param file
+	 *          The file containing product metadata.
+	 * @param delete
+	 *          Flag indicating whether the entry should be deleted from the
+	 *          index.
+	 * @throws SolrServerException
+	 *           When an error occurs communicating with the Solr server instance.
+	 */
+	public void indexMetFile(File file, boolean delete)
+	    throws InstantiationException, FileNotFoundException, IOException,
+	    SolrServerException {
+		LOG.info("Attempting to index product from metadata file.");
+		try {
+			SerializableMetadata metadata = new SerializableMetadata("UTF-8", false);
+			metadata.loadMetadataFromXmlStream(new FileInputStream(file));
+			metadata.addMetadata("id", metadata.getMetadata("CAS."
+			    + CoreMetKeys.PRODUCT_ID));
+			metadata.addMetadata(config.getProperty(ACCESS_KEY), config
+			    .getProperty(ACCESS_URL)
+			    + metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
+			if (delete) {
+				server
+				    .deleteById(metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
+			}
+			server.add(this.getSolrDocument(metadata));
+			LOG.info("Indexed product: "
+			    + metadata.getMetadata("CAS." + CoreMetKeys.PRODUCT_ID));
+		} catch (InstantiationException e) {
+			LOG.severe("Could not instantiate metadata object: " + e.getMessage());
+		} catch (FileNotFoundException e) {
+			LOG.severe("Could not find metadata file: " + e.getMessage());
+		} catch (IOException e) {
+			LOG.severe("Could not delete product from index: " + e.getMessage());
+		}
+	}
+
+	/**
+	 * This method indexes all product types retrieved from the File Manager to
+	 * the Solr index.
+	 * 
+	 * @param delete
+	 *          Flag indicating whether each product type retrieved from the File
+	 *          Manager should be deleted from the index.
+	 * @throws SolrServerException
+	 *           When an error occurs communicating with the Solr server instance.
+	 */
+	public void indexProductTypes(boolean delete) {
+		LOG.info("Indexing product types...");
+		try {
+			XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
+			    this.fmUrl));
+			LOG.info("Retrieving list of product types.");
+			List<ProductType> types = fmClient.getProductTypes();
+			for (ProductType type : types) {
+				if (!config.getIgnoreTypes().contains(type.getName().trim())) {
+					Metadata metadata = new Metadata();
+					metadata.addMetadata("id", type.getProductTypeId());
+					metadata.addMetadata("CAS.ProductTypeId", type.getProductTypeId());
+					metadata.addMetadata("CAS.ProductTypeDescription", type
+					    .getDescription());
+					metadata.addMetadata("CAS.ProductTypeRepositoryPath", type
+					    .getProductRepositoryPath());
+					metadata.addMetadata("CAS.ProductTypeVersioner", type.getVersioner());
+					metadata.addMetadata("CAS.ProductTypeName", type.getName());
+					metadata.addMetadata("ProductType", "ProductType");
+					metadata.replaceMetadata(type.getTypeMetadata());
+					if (delete) {
+						try {
+							server.deleteById(type.getProductTypeId());
+						} catch (Exception e) {
+							LOG.severe("Could not delete product type " + type.getName()
+							    + " from index: " + e.getMessage());
+						}
+					}
+					try {
+						performSubstitution(metadata);
+						server.add(this.getSolrDocument(metadata));
+						LOG.info("Indexed product type: " + type.getName());
+					} catch (Exception e) {
+						LOG.severe("Could not index " + type.getName() + ": "
+						    + e.getMessage());
+					}
+				} else {
+					LOG.info("Ignoring product type: " + type.getName());
+				}
+			}
+		} catch (MalformedURLException e) {
+			LOG.severe("File Manager URL is malformed: " + e.getMessage());
+		} catch (ConnectionException e) {
+			LOG.severe("Could not connect to File Manager: " + e.getMessage());
+		} catch (RepositoryManagerException e) {
+			LOG.severe("Could not retrieve product types from File Manager: "
+			    + e.getMessage());
+		}
+		LOG.info("Finished indexing product types.");
+	}
+
+	/**
+	 * This method indexes all products retrieved from the File Manager to the
+	 * Solr index. Metadata from the product's associated ProductType is also
+	 * included.
+	 * 
+	 * @param delete
+	 *          Flag indicating whether each product retrieved from the File
+	 *          Manager should be deleted from the index.
+	 * @throws SolrServerException
+	 *           When an error occurs communicating with the Solr server instance.
+	 */
+	public void indexAll(boolean delete) {
+		LOG.info("Indexing products...");
+		try {
+			XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
+			    this.fmUrl));
+			LOG.info("Retrieving list of product types.");
+			List<ProductType> types = fmClient.getProductTypes();
+			for (ProductType type : types) {
+				if (!config.getIgnoreTypes().contains(type.getName().trim())) {
+					LOG.info("Paging through products for product type: "
+					    + type.getName());
+					for (ProductPage page = fmClient.getFirstPage(type); page
+					    .isLastPage(); page = fmClient.getNextPage(type, page)) {
+						for (Product product : page.getPageProducts()) {
+							try {
+								this.indexProduct(product.getProductId(), fmClient
+								    .getMetadata(product), type.getTypeMetadata(), delete);
+							} catch (Exception e) {
+								LOG.severe("Could not index " + product.getProductId() + ": "
+								    + e.getMessage());
+							}
+						}
+					}
+				}
+			}
+		} catch (MalformedURLException e) {
+			LOG.severe("File Manager URL is malformed: " + e.getMessage());
+		} catch (ConnectionException e) {
+			LOG.severe("Could not connect to File Manager: " + e.getMessage());
+		} catch (CatalogException e) {
+			LOG.severe("Could not retrieve product from File Manager: "
+			    + e.getMessage());
+		} catch (RepositoryManagerException e) {
+			LOG.severe("Could not retrieve product types from File Manager: "
+			    + e.getMessage());
+		}
+		LOG.info("Finished indexing products.");
+	}
+
+	/**
+	 * This method adds a single product retrieved from the File Manager by its
+	 * product identifier to the Solr index. Metadata from the ProductType is also
+	 * included.
+	 * 
+	 * @param productId
+	 *          The identifier of the product (CAS.ProductId).
+	 * @param delete
+	 *          Flag indicating whether the entry should be deleted from the
+	 *          index.
+	 * @throws SolrServerException
+	 *           When an error occurs communicating with the Solr server instance.
+	 */
+	public void indexProduct(String productId, boolean delete)
+	    throws SolrServerException {
+		LOG.info("Attempting to index product: " + productId);
+		try {
+			XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
+			    this.fmUrl));
+			Product product = fmClient.getProductById(productId);
+			Metadata productMetadata = fmClient.getMetadata(product);
+			indexProduct(product.getProductId(), productMetadata, product
+			    .getProductType().getTypeMetadata(), delete);
+		} catch (MalformedURLException e) {
+			LOG.severe("File Manager URL is malformed: " + e.getMessage());
+		} catch (ConnectionException e) {
+			LOG.severe("Could not connect to File Manager: " + e.getMessage());
+		} catch (CatalogException e) {
+			LOG.severe("Could not retrieve product from File Manager: "
+			    + e.getMessage());
+		} catch (java.text.ParseException e) {
+			LOG.severe("Could not format date: " + e.getMessage());
+		}
+	}
+
+	private void indexProduct(String productId, Metadata productMetadata,
+	    Metadata typeMetadata, boolean delete) throws SolrServerException,
+	    java.text.ParseException {
+		Metadata metadata = new Metadata();
+		metadata.addMetadata("id", productId);
+		// Add in product type metadata
+		if (typeMetadata != null) {
+			metadata.addMetadata(typeMetadata);
+		}
+		if (productMetadata != null) {
+			String accessKey = config.getProperty(ACCESS_KEY);
+			// Product metadata takes precedence
+			metadata.replaceMetadata(productMetadata);
+			// If there is an access url configured add it to the metadata
+			if (config.getProperty(ACCESS_URL) != null) {
+				metadata.addMetadata(accessKey, config.getProperty(ACCESS_URL));
+			}
+			// Replace values for metadata keys specified in config. This allows
+			// for metadata substitution. For instance, if a key named "product_url"
+			// has a value of
+			// http://localhost:8080/cas-product/data?productID=[CAS.ProductId]
+			// the value in brakets will be updated with the value from the
+			// CAS.ProductId.
+			performSubstitution(metadata);
+			if (delete) {
+				try {
+					server.deleteById(productId);
+				} catch (Exception e) {
+					LOG.severe("Could not delete product from index: " + e.getMessage());
+				}
+			}
+			try {
+				server.add(this.getSolrDocument(metadata));
+				LOG.info("Indexed product: " + productId);
+			} catch (IOException e) {
+				LOG.severe("Could not index product: " + productId);
+			}
+		} else {
+			LOG.info("Could not find metadata for product: " + productId);
+		}
+	}
+
+	/**
+	 * Quick helper method to do substitution on the keys specified in the config
+	 * 
+	 * @param metadata
+	 *          to substitute on
+	 * @throws java.text.ParseException
+	 */
+	private void performSubstitution(Metadata metadata)
+	    throws java.text.ParseException {
+		// Do metadata replacement
+		for (String key : config.getReplacementKeys()) {
+			List<String> values = metadata.getAllValues(key);
+			if (values != null) {
+				List<String> newValues = new ArrayList<String>();
+				for (String value : values) {
+					newValues.add(PathUtils.replaceEnvVariables(value, metadata));
+				}
+				metadata.removeMetadata(key);
+				metadata.addMetadata(key, newValues);
+			}
+		}
+		// Format dates
+		for (Object key : config.getFormatProperties().keySet()) {
+			String keyString = (String) key;
+			List<String> values = metadata.getAllValues(keyString);
+			if (metadata.containsKey(keyString)) {
+				List<String> newValues = new ArrayList<String>();
+				SimpleDateFormat format = new SimpleDateFormat(config
+				    .getFormatProperties().getProperty(keyString).trim());
+				for (String value : values) {
+					newValues.add(formatDate(format, value));
+				}
+				metadata.removeMetadata(keyString);
+				metadata.addMetadata(keyString, newValues);
+			}
+		}
+	}
+
+	private String formatDate(SimpleDateFormat format, String value)
+	    throws java.text.ParseException {
+		// Ignore formating if its an ignore value
+		if (config.getIgnoreValues().contains(value.trim()))
+			return value;
+		return solrFormat.format(format.parse(value));
+	}
+
+	/**
+	 * This method builds the command-line options.
+	 * 
+	 * @return Returns the supported Options.
+	 */
+	@SuppressWarnings("static-access")
+	public static Options buildCommandLine() {
+		Options options = new Options();
+
+		options.addOption(new Option("h", "help", false, "Print this message"));
+		options.addOption(new Option("o", "optimize", false,
+		    "Optimize the Solr index"));
+		options.addOption(new Option("d", "delete", false,
+		    "Delete item before indexing"));
+		options.addOption(OptionBuilder.withArgName("Solr URL").hasArg()
+		    .withDescription("URL to the Solr instance").withLongOpt("solrUrl")
+		    .create("su"));
+		options.addOption(OptionBuilder.withArgName("Filemgr URL").hasArg()
+		    .withDescription("URL to the File Manager").withLongOpt("fmUrl")
+		    .create("fmu"));
+
+		OptionGroup group = new OptionGroup();
+		Option all = new Option("a", "all", false,
+		    "Index all products from the File Manager");
+		Option product = OptionBuilder.withArgName("productId").hasArg()
+		    .withDescription("Index the product from the File Manager")
+		    .withLongOpt("product").create("p");
+		Option met = OptionBuilder.withArgName("file").hasArg().withDescription(
+		    "Index the product from a metadata file").withLongOpt("metFile")
+		    .create("mf");
+		Option read = new Option("r", "read", false,
+		    "Index all products based on a list of product identifiers passed in");
+		Option types = new Option("t", "types", false,
+		    "Index all product types from the File Manager");
+		Option deleteAll = new Option("da", "deleteAll", false,
+		    "Delete all products/types from the Solr index");
+
+		group.addOption(all);
+		group.addOption(product);
+		group.addOption(met);
+		group.addOption(read);
+		group.addOption(types);
+		group.addOption(deleteAll);
+		options.addOptionGroup(group);
+
+		return options;
+	}
+
+	/**
+	 * The main method. Execution without argument displays help message.
+	 * 
+	 * @param args
+	 *          Command-line arguments.
+	 */
+	public static void main(String[] args) throws Exception {
+		Options options = SolrIndexer.buildCommandLine();
+		CommandLineParser parser = new GnuParser();
+		CommandLine line = null;
+
+		try {
+			line = parser.parse(options, args);
+		} catch (ParseException e) {
+			LOG.severe("Could not parse command line: " + e.getMessage());
+		}
+
+		if (line == null || line.hasOption("help") || line.getOptions().length == 0) {
+			HelpFormatter formatter = new HelpFormatter();
+			formatter.printHelp("java " + SolrIndexer.class.getName(), options);
+		} else if (line.hasOption("all") || line.hasOption("product")
+		    || line.hasOption("metFile") || line.hasOption("read")
+		    || line.hasOption("types") || line.hasOption("deleteAll")) {
+			SolrIndexer indexer = null;
+			String solrUrl = null;
+			String fmUrl = null;
+			if (line.hasOption("solrUrl")) {
+				solrUrl = line.getOptionValue("solrUrl");
+			}
+			if (line.hasOption("fmUrl")) {
+				fmUrl = line.getOptionValue("fmUrl");
+			}
+			try {
+				indexer = new SolrIndexer(solrUrl, fmUrl);
+				if (line.hasOption("all")) {
+					indexer.indexAll(line.hasOption("delete"));
+				} else if (line.hasOption("product")) {
+					indexer.indexProduct(line.getOptionValue("product"), line
+					    .hasOption("delete"));
+				} else if (line.hasOption("metFile")) {
+					indexer.indexMetFile(new File(line.getOptionValue("metFile")), line
+					    .hasOption("delete"));
+				} else if (line.hasOption("read")) {
+					for (String productId : readProductIdsFromStdin()) {
+						indexer.indexProduct(productId, line.hasOption("delete"));
+					}
+				} else if (line.hasOption("types")) {
+					indexer.indexProductTypes(line.hasOption("delete"));
+				} else if (line.hasOption("deleteAll")) {
+					indexer.delete();
+				} else {
+					LOG.severe("Option not supported.");
+				}
+				indexer.commit();
+				if (line.hasOption("optimize")) {
+					indexer.optimize();
+				}
+			} catch (Exception e) {
+				LOG.severe("An error occurred indexing: " + e.getMessage());
+				LOG
+				    .severe("If the above message is related to accessing the Solr instance, see the Application Server's log for additional information.");
+			}
+		}
+	}
+
+	/**
+	 * This method reads product identifiers from the standard input.
+	 * 
+	 * @return Returns a List of product identifiers.
+	 */
+	private static List<String> readProductIdsFromStdin() {
+		List<String> productIds = new ArrayList<String>();
+		BufferedReader br = null;
+
+		br = new BufferedReader(new InputStreamReader(System.in));
+		String line = null;
+
+		try {
+			while ((line = br.readLine()) != null) {
+				productIds.add(line);
+			}
+		} catch (IOException e) {
+			LOG.severe("Error reading product id: line: [" + line + "]: Message: "
+			    + e.getMessage());
+		} finally {
+			if (br != null) {
+				try {
+					br.close();
+				} catch (Exception ignore) {
+				}
+				br = null;
+			}
+		}
+		return productIds;
+	}
+
+	/**
+	 * This class manages the Indexer configuration.
+	 */
+	public class IndexerConfig {
+		private final static String PREFIX_CONFIG = "config.";
+		private final static String PREFIX_MET = "map.";
+		private final static String PREFIX_FORMAT = "format.";
+		private final static String IGNORE_TYPES = "ignore.types";
+		private final static String IGNORE_VALUES = "ignore.values";
+		private final static String REPLACEMENT_KEYS = "replacement.keys";
+		// Used to hold general properties for indexer configuration
+		private Properties properties = new Properties();
+		// Used to hold mappings for filemanager -> solr for keys
+		private Properties mapProperties = new Properties();
+		// Used to define the date format for a field
+		private Properties formatProperties = new Properties();
+		private List<String> ignoreTypes = new ArrayList<String>();
+		private List<String> ignoreValues = new ArrayList<String>();
+		private List<String> replacementKeys = new ArrayList<String>();
+
+		public IndexerConfig(InputStream inputStream) throws IOException {
+			Properties props = new Properties();
+			props.load(inputStream);
+			for (Object objKey : props.keySet()) {
+				String key = (String) objKey;
+				if (key.startsWith(PREFIX_CONFIG)) {
+					properties.put(key.substring(PREFIX_CONFIG.length()), props
+					    .getProperty(key));
+				} else if (key.startsWith(PREFIX_MET)) {
+					mapProperties.put(key.substring(PREFIX_MET.length()), props
+					    .getProperty(key));
+				} else if (key.startsWith(PREFIX_FORMAT)) {
+					formatProperties.put(key.substring(PREFIX_FORMAT.length()), props
+					    .getProperty(key));
+				}
+			}
+
+			if (properties.getProperty(IGNORE_TYPES) != null) {
+				String[] values = properties.getProperty(IGNORE_TYPES).trim()
+				    .split(",");
+				for (String value : values) {
+					ignoreTypes.add(value);
+				}
+			}
+
+			if (properties.getProperty(IGNORE_VALUES) != null) {
+				String[] values = properties.getProperty(IGNORE_VALUES).trim().split(
+				    ",");
+				for (String value : values) {
+					ignoreValues.add(value);
+				}
+			}
+
+			if (properties.getProperty(REPLACEMENT_KEYS) != null) {
+				String[] values = properties.getProperty(REPLACEMENT_KEYS).trim()
+				    .split(",");
+				for (String value : values) {
+					replacementKeys.add(value);
+				}
+			}
+		}
+
+		public String getProperty(String key) {
+			return properties.getProperty(key);
+		}
+
+		public String getProperty(String key, String defaultValue) {
+			return properties.getProperty(key, defaultValue);
+		}
+
+		public Properties getMapProperties() {
+			return mapProperties;
+		}
+
+		public Properties getFormatProperties() {
+			return formatProperties;
+		}
+
+		public List<String> getIgnoreTypes() {
+			return this.ignoreTypes;
+		}
+
+		public List<String> getIgnoreValues() {
+			return this.ignoreValues;
+		}
+
+		public List<String> getReplacementKeys() {
+			return this.replacementKeys;
+		}
+	}
 
 }

Modified: oodt/trunk/filemgr/src/main/resources/indexer.properties
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/resources/indexer.properties?rev=1378586&r1=1378585&r2=1378586&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/resources/indexer.properties (original)
+++ oodt/trunk/filemgr/src/main/resources/indexer.properties Wed Aug 29 14:47:36 2012
@@ -29,13 +29,28 @@ config.solr.url=http://localhost:8983/so
 # the URL path to the File Manager
 config.filemgr.url=http://localhost:9001
 
+# Data access for the products
+config.access.key=ProductURL
+config.access.url=http://localhost:8080/cas-product/data?productID=[ProductId]
 
-config.ref.DataDownloadRef=dataref
+# Add a comma seperated list of values for keys 
+# that will go through metadata replacement. 
+# For example the ProductURL above will fill in
+# the actual value for the [ProductId]
+config.replacement.keys=ProductURL
 
-# a set of products to map from File Manager
-# terminology into Solr index doc field 
-# terminology
+# Map from File Manager terminology into Solr 
+# index doc field terminology. Only mapped fields 
+# will be added to the Solr index.
 map.MimeType=mimetype
-map.ReceivedTime=receivedtime
+map.ProductReceivedTime=receivedtime
 map.FileSize=filesize
 map.FileName=filename
+# map.ProductURL=producturl
+
+# Formatting of date fields can be specified for a
+# field coming from Solr. Solr requires a specific 
+# format when you want to map to a date so one must
+# specify the source format in the filemanager.  
+format.ProductReceivedTime=yyyy-MM-dd'T'HH:mm:ss.SSS
+