You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by ma...@apache.org on 2011/10/05 18:58:16 UTC
svn commit: r1179317 - in /oodt/trunk: CHANGES.txt filemgr/pom.xml
filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java
filemgr/src/main/resources/indexer.properties
Author: mattmann
Date: Wed Oct 5 16:58:15 2011
New Revision: 1179317
URL: http://svn.apache.org/viewvc?rev=1179317&view=rev
Log:
- OODT-326 A tool to dump the File Manager catalog metadata into Solr
Added:
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java (with props)
oodt/trunk/filemgr/src/main/resources/indexer.properties (with props)
Modified:
oodt/trunk/CHANGES.txt
oodt/trunk/filemgr/pom.xml
Modified: oodt/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1179317&r1=1179316&r2=1179317&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Wed Oct 5 16:58:15 2011
@@ -4,6 +4,9 @@ Apache OODT Change Log
Release 0.4: Current Development
--------------------------------------------
+* OODT-326 A tool to dump the File Manager catalog metadata
+ into Solr (mattmann, pramirez)
+
* OODT-37 Create an Action to Group other Actions Together (pramirez, mattmann)
* OODT-36 Create an Action to Support Simple Branching (pramirez, mattmann)
Modified: oodt/trunk/filemgr/pom.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/pom.xml?rev=1179317&r1=1179316&r2=1179317&view=diff
==============================================================================
--- oodt/trunk/filemgr/pom.xml (original)
+++ oodt/trunk/filemgr/pom.xml Wed Oct 5 16:58:15 2011
@@ -156,7 +156,29 @@
<groupId>commons-pool</groupId>
<artifactId>commons-pool</artifactId>
<version>1.2</version>
- </dependency>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ <version>1.3.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-common</artifactId>
+ <version>1.3.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-core</artifactId>
+ <version>1.3.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
Added: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java?rev=1179317&view=auto
==============================================================================
--- oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java (added)
+++ oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java Wed Oct 5 16:58:15 2011
@@ -0,0 +1,419 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.cas.filemgr.tools;
+
+//JDK imports
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Properties;
+import java.util.logging.Logger;
+
+//COMMONS imports
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.OptionGroup;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+//Solr imports
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.core.CoreContainer;
+
+//OODT imports
+import org.apache.oodt.cas.filemgr.structs.Product;
+import org.apache.oodt.cas.filemgr.structs.ProductType;
+import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException;
+import org.apache.oodt.cas.filemgr.structs.exceptions.ConnectionException;
+import org.apache.oodt.cas.filemgr.structs.exceptions.RepositoryManagerException;
+import org.apache.oodt.cas.filemgr.system.XmlRpcFileManagerClient;
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.cas.metadata.SerializableMetadata;
+
+/**
+ *
+ * Indexes the File Manager Catalog to Solr. Uses an associated config file,
+ * indexer.properties to specify how to perform the indexing. See
+ * indexer.properties in the src/main/resources directory of file manager for
+ * specific documentation.
+ *
+ */
+public class SolrIndexer {
+ private final static String SOLR_INDEXER_CONFIG = "SOLR_INDEXER_CONFIG";
+ private final static String SOLR_URL = "solr.url";
+ private final static String FILEMGR_URL = "filemgr.url";
+ private IndexerConfig config = null;
+ private final SolrServer server;
+ private CoreContainer coreContainer;
+ private String fmUrl;
+ private String solrUrl;
+ private static Logger LOG = Logger.getLogger(SolrIndexer.class.getName());
+
+ public SolrIndexer(String solrUrl, String fmUrl)
+ throws InstantiationException {
+ InputStream input = null;
+ String filename = null;
+
+ try {
+ LOG.info("System property " + SOLR_INDEXER_CONFIG + " set to "
+ + System.getProperty(SOLR_INDEXER_CONFIG));
+ filename = System.getProperty(SOLR_INDEXER_CONFIG);
+ if (filename != null) {
+ LOG.info("Reading config from " + filename);
+ input = new FileInputStream(filename);
+ } else {
+ LOG.info("Config file not found reading config from classpath");
+ input = SolrIndexer.class.getResourceAsStream("indexer.properties");
+ }
+ config = new IndexerConfig(input);
+ } catch (IOException e) {
+ LOG.severe("Could not read in configuration for indexer from classpath or file");
+ throw new InstantiationException(e.getMessage());
+ } finally {
+ if (input != null) {
+ try {
+ input.close();
+ } catch (IOException e) {
+ // no op
+ }
+ }
+ }
+
+ this.solrUrl = solrUrl;
+ if (this.solrUrl == null) {
+ this.solrUrl = config.getProperty(SOLR_URL);
+ }
+
+ this.fmUrl = fmUrl;
+ if (this.fmUrl == null) {
+ this.fmUrl = config.getProperty(FILEMGR_URL);
+ }
+
+ LOG.info("Using Solr: " + this.solrUrl + " FileManager: " + this.fmUrl);
+
+ try {
+ server = new CommonsHttpSolrServer(this.solrUrl);
+ } catch (MalformedURLException e) {
+ LOG.severe("Could not connect to Solr server " + this.solrUrl);
+ throw new InstantiationException(e.getMessage());
+ }
+
+ }
+
+ public void shutdown() {
+ coreContainer.shutdown();
+ }
+
+ public void commit() throws SolrServerException, IOException {
+ server.commit();
+ }
+
+ public void optimize() throws SolrServerException, IOException {
+ server.optimize();
+ }
+
+ @SuppressWarnings("unchecked")
+ private SolrInputDocument getSolrDocument(Metadata metadata) {
+ SolrInputDocument doc = new SolrInputDocument();
+
+ for (Object objKey : config.getMapProperties().keySet()) {
+ String key = (String) objKey;
+ if (metadata.isMultiValued(key)) {
+ List<String> values = metadata.getAllMetadata(key);
+ for (String value : values) {
+ if (value != null && !config.getIgnoreValues().contains(value.trim())) {
+ LOG.fine("Adding field: "
+ + config.getMapProperties().getProperty(key) + " value: "
+ + value);
+ doc.addField(config.getMapProperties().getProperty(key), value);
+ }
+ }
+ } else {
+ String value = metadata.getMetadata(key);
+ if (value != null && !config.getIgnoreValues().contains(value.trim())) {
+ LOG.fine("Adding field: "
+ + config.getMapProperties().getProperty(key) + " value: " + value);
+ doc.addField(config.getMapProperties().getProperty(key), value);
+ }
+ }
+ }
+
+ return doc;
+ }
+
+ public void indexMetFile(File file, boolean delete)
+ throws InstantiationException, FileNotFoundException, IOException,
+ SolrServerException {
+ SerializableMetadata metadata = new SerializableMetadata("UTF-8", false);
+ metadata.loadMetadataFromXmlStream(new FileInputStream(file));
+ if (delete) {
+ server.deleteById(metadata.getMetadata("uuid"));
+ }
+ server.add(this.getSolrDocument(metadata));
+ }
+
+ public void indexAll(boolean delete) throws SolrServerException {
+ LOG.info("Indexing");
+ try {
+ XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
+ this.fmUrl));
+ if (delete) {
+ server.deleteByQuery("*:*");
+ }
+ LOG.info("Looking up product types");
+ List<ProductType> types = fmClient.getProductTypes();
+ for (ProductType type : types) {
+ if (!config.getIgnoreTypes().contains(type.getName().trim())) {
+ LOG.info("Looking up products for product type: " + type.getName());
+ List<Product> products = fmClient.getProductsByProductType(type);
+ for (Product product : products) {
+ LOG.info("Looking up metadata for ProductId "
+ + product.getProductId());
+ Metadata metadata = fmClient.getMetadata(product);
+ if (metadata != null) {
+ LOG.info("Found metadata for product ID "
+ + metadata.getMetadata("CAS.ProductId"));
+ } else {
+ LOG.info("Could not find metadata for product "
+ + product.getProductId());
+ }
+ if (metadata.getMetadata("UUID") != null) {
+ if (metadata.getMetadata("Deleted") == null
+ || !"true".equals(metadata.getMetadata("Deleted"))) {
+ try {
+ server.add(this.getSolrDocument(metadata));
+ server.commit();
+ LOG.info("Indexed " + metadata.getMetadata("UUID"));
+ } catch (Exception e) {
+ LOG.severe("Could not index " + metadata.getMetadata("UUID")
+ + " " + e.getMessage());
+ }
+ } else {
+ LOG.info("Skipping Deleted: " + metadata.getMetadata("UUID"));
+ }
+ }
+ }
+ } else {
+ LOG.info("Ignoring product type " + type.getName());
+ }
+ }
+ } catch (MalformedURLException e) {
+ LOG.severe("File Manager URL is malformed: " + e.getMessage());
+ } catch (ConnectionException e) {
+ LOG.severe("Could not connect to File Manager: " + e.getMessage());
+ } catch (IOException e) {
+ LOG.severe("Could not delete all: " + e.getMessage());
+ } catch (RepositoryManagerException e) {
+ LOG.severe("Could not look up product types: " + e.getMessage());
+ } catch (CatalogException e) {
+ LOG.severe("Query to File Manager failed: " + e.getMessage());
+ }
+ LOG.info("Finished Indexing");
+ }
+
+ public void indexProduct(String productId, boolean delete)
+ throws SolrServerException, IOException, ConnectionException,
+ CatalogException {
+ XmlRpcFileManagerClient fmClient = new XmlRpcFileManagerClient(new URL(
+ this.fmUrl));
+ if (delete) {
+ server.deleteById(productId);
+ }
+
+ Product product = fmClient.getProductById(productId);
+ Metadata metadata = fmClient.getMetadata(product);
+ server.add(this.getSolrDocument(metadata));
+ }
+
+ @SuppressWarnings("static-access")
+ public static Options buildCommandLine() {
+ Options options = new Options();
+
+ options.addOption(new Option("h", "help", false, "Print this message"));
+ options.addOption(new Option("o", "optimize", false,
+ "Optimize the Solr index when done"));
+ options.addOption(new Option("d", "delete", false,
+ "Delete items before indexing"));
+ options.addOption(OptionBuilder.withArgName("Solr URL").hasArg()
+ .withDescription("URL to the Solr server").withLongOpt("solrUrl")
+ .create("su"));
+ options.addOption(OptionBuilder.withArgName("Filemgr URL").hasArg()
+ .withDescription("URL to the CAS FileManager").withLongOpt("fmUrl")
+ .create("fmu"));
+
+ OptionGroup group = new OptionGroup();
+ Option all = new Option("a", "all", false, "Index all items in catalog");
+ Option met = OptionBuilder.withArgName("file").hasArg()
+ .withDescription("Index this met file").withLongOpt("metFile")
+ .create("mf");
+ Option query = OptionBuilder.withArgName("query").hasArg()
+ .withDescription("Not yet implemented").withLongOpt("catalogQuery")
+ .create("cq");
+ Option product = OptionBuilder.withArgName("productId").hasArg()
+ .withDescription("Product id to index").withLongOpt("product")
+ .create("p");
+
+ group.addOption(all);
+ group.addOption(met);
+ group.addOption(query);
+ group.addOption(product);
+ options.addOptionGroup(group);
+
+ return options;
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = SolrIndexer.buildCommandLine();
+ CommandLineParser parser = new GnuParser();
+ CommandLine line = null;
+
+ try {
+ line = parser.parse(options, args);
+ } catch (ParseException e) {
+ LOG.severe("Could not parse command line: " + e.getMessage());
+ }
+
+ if (line == null || line.hasOption("help") || line.getOptions().length == 0) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("java " + SolrIndexer.class.getName(), options);
+ } else if (line.hasOption("all") || line.hasOption("product")
+ || line.hasOption("metFile") || line.hasOption("catalogQuery")) {
+ SolrIndexer indexer = null;
+ String solrUrl = null;
+ String fmUrl = null;
+ if (line.hasOption("solrUrl")) {
+ solrUrl = line.getOptionValue("solrUrl");
+ }
+ if (line.hasOption("fmUrl")) {
+ fmUrl = line.getOptionValue("fmUrl");
+ }
+ try {
+ indexer = new SolrIndexer(solrUrl, fmUrl);
+ if (line.hasOption("all")) {
+ indexer.indexAll(line.hasOption("delete"));
+ } else if (line.hasOption("product")) {
+ indexer.indexProduct(line.getOptionValue("product"),
+ line.hasOption("delete"));
+ } else if (line.hasOption("metFile")) {
+ indexer.indexMetFile(new File(line.getOptionValue("metFile")),
+ line.hasOption("delete"));
+ } else {
+ LOG.info("Catalog query not yet implemented.");
+ }
+ indexer.commit();
+ if (line.hasOption("optimize")) {
+ indexer.optimize();
+ }
+ } catch (Exception ex) {
+ LOG.severe("Did not complete indexing: " + ex.getMessage());
+ ex.printStackTrace();
+ }
+ }
+
+ }
+
+ public class IndexerConfig {
+ private final static String PREFIX_CONFIG = "config.";
+ private final static String PREFIX_MET = "map.";
+ private Properties properties = new Properties();
+ private Properties mapProperties = new Properties();
+ private HashMap<String, Properties> xmlMapProperties = new HashMap<String, Properties>();
+ private List<String> xmlKeys = new ArrayList<String>();
+ private List<String> xmlMultiKeys = new ArrayList<String>();
+ private List<String> ignoreTypes = new ArrayList<String>();
+ private List<String> ignoreValues = new ArrayList<String>();
+
+ public IndexerConfig(InputStream inputStream) throws IOException {
+ Properties props = new Properties();
+ props.load(inputStream);
+ for (Object objKey : props.keySet()) {
+ String key = (String) objKey;
+ if (key.startsWith(PREFIX_CONFIG)) {
+ properties.put(key.substring(PREFIX_CONFIG.length()),
+ props.getProperty(key));
+ } else if (key.startsWith(PREFIX_MET)) {
+ mapProperties.put(key.substring(PREFIX_MET.length()),
+ props.getProperty(key));
+ }
+ }
+
+ if (properties.getProperty("ignore.types") != null) {
+ String[] values = properties.getProperty("ignore.types").trim()
+ .split(",");
+ for (String value : values) {
+ ignoreTypes.add(value);
+ }
+ }
+ if (properties.getProperty("ignore.values") != null) {
+ String[] values = properties.getProperty("ignore.values").trim()
+ .split(",");
+ for (String value : values) {
+ ignoreValues.add(value);
+ }
+ }
+ }
+
+ public String getProperty(String key) {
+ return properties.getProperty(key);
+ }
+
+ public String getProperty(String key, String defaultValue) {
+ return properties.getProperty(key, defaultValue);
+ }
+
+ public Properties getMapProperties() {
+ return mapProperties;
+ }
+
+ public Properties getXmlMapProperties(String name) {
+ return xmlMapProperties.get(name);
+ }
+
+ public List<String> getXmlKeys() {
+ return this.xmlKeys;
+ }
+
+ public List<String> getXmlMultiKeys() {
+ return this.xmlMultiKeys;
+ }
+
+ public List<String> getIgnoreTypes() {
+ return this.ignoreTypes;
+ }
+
+ public List<String> getIgnoreValues() {
+ return this.ignoreValues;
+ }
+
+ }
+
+}
Propchange: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/tools/SolrIndexer.java
------------------------------------------------------------------------------
svn:executable = *
Added: oodt/trunk/filemgr/src/main/resources/indexer.properties
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/resources/indexer.properties?rev=1179317&view=auto
==============================================================================
--- oodt/trunk/filemgr/src/main/resources/indexer.properties (added)
+++ oodt/trunk/filemgr/src/main/resources/indexer.properties Wed Oct 5 16:58:15 2011
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Configures the Solr Indexer tool
+
+# basic config properties
+
+# a set of product types to ignore dumping
+config.ignore.types=SomeProductType
+
+# any met values to ignore indexing
+config.ignore.values=TBD
+
+# the URL path to Solr
+config.solr.url=http://localhost:8983/solr
+
+# the URL path to the File Manager
+config.filemgr.url=http://localhost:9001
+
+
+config.ref.DataDownloadRef=dataref
+
+# a set of products to map from File Manager
+# terminology into Solr index doc field
+# terminology
+map.MimeType=mimetype
+map.ReceivedTime=receivedtime
+map.FileSize=filesize
+map.FileName=filename
Propchange: oodt/trunk/filemgr/src/main/resources/indexer.properties
------------------------------------------------------------------------------
svn:executable = *