You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by bf...@apache.org on 2012/04/10 00:08:43 UTC

svn commit: r1311492 - in /oodt/trunk/pge: ./ src/main/java/org/apache/oodt/cas/pge/ src/main/java/org/apache/oodt/cas/pge/config/ src/main/java/org/apache/oodt/cas/pge/metadata/ src/main/java/org/apache/oodt/cas/pge/writers/ src/main/java/org/apache/o...

Author: bfoster
Date: Mon Apr  9 22:08:42 2012
New Revision: 1311492

URL: http://svn.apache.org/viewvc?rev=1311492&view=rev
Log:
- Introduce a CAS-Metadata based renaming interface

------------
OODT-426

Added:
    oodt/trunk/pge/src/main/resources/examples/Crawler/
    oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.properties   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.xml   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/filename.extractor.config.xml   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/mime-extractor-map.xml   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/mime-types.xml   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/naming-beans.xml   (with props)
    oodt/trunk/pge/src/main/resources/examples/Crawler/precondition-beans.xml   (with props)
Removed:
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/RegExprOutputFiles.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/RenamingConv.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/ExternExtractorMetWriter.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/FilenameExtractorWriter.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/PcsMetFileWriter.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/metlist/
    oodt/trunk/pge/src/main/resources/examples/MetadataOutputFiles/
Modified:
    oodt/trunk/pge/pom.xml
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/PGETaskInstance.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/OutputDir.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfig.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigBuilder.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigMetKeys.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/XmlFilePgeConfigBuilder.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/metadata/PgeTaskMetKeys.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/SciPgeConfigFileWriter.java
    oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/xslt/XslTransformWriter.java
    oodt/trunk/pge/src/main/resources/examples/PgeConfigFiles/pge-config.xml
    oodt/trunk/pge/src/test/org/apache/oodt/cas/pge/TestPGETaskInstance.java

Modified: oodt/trunk/pge/pom.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/pom.xml?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/pom.xml (original)
+++ oodt/trunk/pge/pom.xml Mon Apr  9 22:08:42 2012
@@ -111,6 +111,21 @@ the License.
       <version>${project.parent.version}</version>
     </dependency>
     <dependency>
+      <groupId>org.springframework</groupId>
+      <artifactId>spring-core</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.springframework</groupId>
+      <artifactId>spring-beans</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
+      <groupId>org.springframework</groupId>
+      <artifactId>spring-context</artifactId>
+      <version>2.5.4</version>
+    </dependency>
+    <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <version>10.0.1</version>
@@ -124,7 +139,13 @@ the License.
       <groupId>net.sf.saxon</groupId>
       <artifactId>saxon-dom</artifactId>
       <version>8.7</version>      
-    </dependency>
+	  </dependency>
+		<dependency>
+			<groupId>org.easymock</groupId>
+			<artifactId>easymock</artifactId>
+			<version>3.1</version>
+			<scope>test</scope>
+		</dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/PGETaskInstance.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/PGETaskInstance.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/PGETaskInstance.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/PGETaskInstance.java Mon Apr  9 22:08:42 2012
@@ -18,16 +18,16 @@ package org.apache.oodt.cas.pge;
 
 //OODT static imports
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.ACTION_IDS;
-import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.ACTION_REPO_FILE;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.ATTEMPT_INGEST_ALL;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CRAWLER_CONFIG_FILE;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CRAWLER_CRAWL_FOR_DIRS;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CRAWLER_RECUR;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.DUMP_METADATA;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.INGEST_CLIENT_TRANSFER_SERVICE_FACTORY;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.INGEST_FILE_MANAGER_URL;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.LOG_FILENAME_PATTERN;
-import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.MET_FILE_EXT;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.NAME;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.MIME_EXTRACTOR_REPO;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.PGE_CONFIG_BUILDER;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.PGE_RUNTIME;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.PROPERTY_ADDERS;
@@ -40,7 +40,6 @@ import static org.apache.oodt.cas.pge.me
 //JDK imports
 import java.io.File;
 import java.io.FileOutputStream;
-import java.io.IOException;
 import java.net.URL;
 import java.util.Date;
 import java.util.LinkedList;
@@ -49,28 +48,23 @@ import java.util.logging.FileHandler;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.logging.SimpleFormatter;
-import java.util.regex.Pattern;
 
 //Apache imports
 import org.apache.commons.lang.Validate;
 
 //OODT imports
+import org.apache.oodt.cas.crawl.AutoDetectProductCrawler;
 import org.apache.oodt.cas.crawl.ProductCrawler;
-import org.apache.oodt.cas.crawl.StdProductCrawler;
 import org.apache.oodt.cas.crawl.status.IngestStatus;
 import org.apache.oodt.cas.metadata.Metadata;
 import org.apache.oodt.cas.metadata.SerializableMetadata;
-import org.apache.oodt.cas.metadata.util.PathUtils;
 import org.apache.oodt.cas.pge.config.DynamicConfigFile;
 import org.apache.oodt.cas.pge.config.OutputDir;
 import org.apache.oodt.cas.pge.config.PgeConfig;
 import org.apache.oodt.cas.pge.config.PgeConfigBuilder;
-import org.apache.oodt.cas.pge.config.RegExprOutputFiles;
-import org.apache.oodt.cas.pge.config.RenamingConv;
 import org.apache.oodt.cas.pge.config.XmlFilePgeConfigBuilder;
 import org.apache.oodt.cas.pge.metadata.PgeMetadata;
 import org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys;
-import org.apache.oodt.cas.pge.writers.PcsMetFileWriter;
 import org.apache.oodt.cas.pge.writers.SciPgeConfigFileWriter;
 import org.apache.oodt.cas.workflow.metadata.CoreMetKeys;
 import org.apache.oodt.cas.workflow.structs.WorkflowTaskConfiguration;
@@ -85,6 +79,7 @@ import org.springframework.context.suppo
 
 //Google imports
 import com.google.common.base.Splitter;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 
 /**
@@ -124,12 +119,9 @@ public class PGETaskInstance implements 
          updateStatus(CONF_FILE_BUILD.getWorkflowStatusName());
          createSciPgeConfigFiles();
 
-         // Run the PGE and process its data.
+         // Run the PGE.
          runPge();
 
-         // Generate product metadata.
-         generateMetadataForProducts();
-
          // Ingest products.
          runIngestCrawler(createProductCrawler());
 
@@ -144,7 +136,10 @@ public class PGETaskInstance implements 
 
    protected void updateStatus(String status) throws Exception {
       logger.log(Level.INFO, "Updating status to workflow as [" + status + "]");
-      wm.updateWorkflowInstanceStatus(workflowInstId, status);
+      if (!wm.updateWorkflowInstanceStatus(workflowInstId, status)) {
+         throw new Exception(
+               "Failed to update workflow status : client returned false");
+      }
    }
 
    protected Logger createLogger() throws Exception {
@@ -352,98 +347,6 @@ public class PGETaskInstance implements 
       }
    }
 
-   protected void generateMetadataForProducts() throws Exception {
-      logger.log(Level.INFO, "Generating metadata for products...");
-      for (OutputDir outputDir : pgeConfig.getOuputDirs()) {
-
-         logger.log(Level.FINE, "Looking for products in output directory ["
-               + outputDir.getPath() + "]");
-
-         File[] createdFiles = new File(outputDir.getPath()).listFiles();
-         logger.log(Level.FINE, "Found files: " + Lists.newArrayList(createdFiles));
-
-         for (File createdFile : createdFiles) {
-            logger.log(Level.FINE, "Inspecting file [" + createdFile + "]");
-            List<RegExprOutputFiles> regexRules = findMatchingRegexRules(
-                  createdFile, outputDir);
-            if (!regexRules.isEmpty()) {
-               Metadata productMetadata = new Metadata();
-               for (RegExprOutputFiles regexRule : regexRules) {
-                  productMetadata.replaceMetadata(generateMetadataForProduct(
-                        createdFile, regexRule));
-               }
-               if (productMetadata.getAllKeys().isEmpty()) {
-                  throw new Exception("No metadata was generated for product ["
-                        + createdFile + "]");
-               }
-               writeMetadataFile(productMetadata, createdFile.getAbsolutePath()
-                     + "." + pgeMetadata.getMetadata(MET_FILE_EXT));
-            } else {
-               logger.log(Level.FINE, "Ignoring file [" + createdFile
-                     + "] because it doesn't matches any product regex rules"
-                     + " for this directory [" + outputDir.getPath() + "]");
-            }
-         }
-      }
-   }
-
-   protected List<RegExprOutputFiles> findMatchingRegexRules(File file,
-         OutputDir outputDir) throws Exception {
-      logger.log(Level.FINE, "Checking file [" + file
-            + "] against regex rules for output directory ["
-            + outputDir.getPath() + "]");
-      List<RegExprOutputFiles> regexRules = Lists.newArrayList();
-      for (RegExprOutputFiles regExprFiles : outputDir.getRegExprOutputFiles()) {
-         if (Pattern.matches(regExprFiles.getRegExp(), file.getName())) {
-            logger.log(Level.FINE, "File [" + file + "] matched regex rule ["
-                  + regExprFiles.getRegExp() + "]");
-            regexRules.add(regExprFiles);
-         }
-      }
-      return regexRules;
-   }
-
-   protected Metadata generateMetadataForProduct(
-         File product, RegExprOutputFiles regexRule) throws Exception {
-      logger.log(Level.FINE, "Loading metadata writer ["
-            + regexRule.getConverterClass() + "] for product [" + product + "]");
-      PcsMetFileWriter writer = (PcsMetFileWriter) Class.forName(
-            regexRule.getConverterClass()).newInstance();
-      if (regexRule.getRenamingConv() != null) {
-         logger.log(Level.FINE, "Renaming product [" + product + "]...");
-         product = renameProduct(product, regexRule.getRenamingConv());
-      }
-      return getMetadataForProduct(product, writer, regexRule.getArgs());
-   }
-
-   protected File renameProduct(File product, RenamingConv renamingConv)
-         throws Exception {
-      Metadata curMetadata = pgeMetadata.asMetadata();
-      curMetadata.replaceMetadata(renamingConv.getTmpReplaceMet());
-      String newFileName = PathUtils.doDynamicReplacement(
-            renamingConv.getRenamingString(), curMetadata);
-      File newFile = new File(product.getParentFile(), newFileName);
-      logger.log(Level.INFO, "Renaming product [" + product + "] to ["
-            + newFile + "]");
-      if (!product.renameTo(newFile)) {
-         throw new IOException("Renaming returned false");
-      }
-      return newFile;
-   }
-
-   protected Metadata getMetadataForProduct(File product,
-         PcsMetFileWriter writer, Object[] args) throws Exception {
-      logger.log(Level.INFO, "Generating metadata for product [" + product + "]");
-      return writer.getMetadataForFile(product, pgeMetadata, args);
-   }
-
-   protected void writeMetadataFile(Metadata metadata, String toFile)
-         throws Exception {
-      logger.log(Level.INFO, "Writing out metadata file [" + toFile + "]");
-      new SerializableMetadata(metadata, "UTF-8", false)
-            .writeMetadataToXmlStream(new FileOutputStream(toFile));
-   }
-
    protected ScriptFile buildPgeRunScript() {
       logger.log(Level.FINE,
             "Creating PGE run script for shell [" + pgeConfig.getShellType()
@@ -508,31 +411,25 @@ public class PGETaskInstance implements 
 
    protected ProductCrawler createProductCrawler() throws Exception {
       logger.log(Level.INFO, "Configuring ProductCrawler...");
-      StdProductCrawler crawler = new StdProductCrawler();
-      crawler.setMetFileExtension(pgeMetadata.getMetadata(MET_FILE_EXT));
+      AutoDetectProductCrawler crawler = new AutoDetectProductCrawler();
+      crawler.setMimeExtractorRepo(pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO));
       crawler.setClientTransferer(pgeMetadata
             .getMetadata(INGEST_CLIENT_TRANSFER_SERVICE_FACTORY));
       crawler.setFilemgrUrl(pgeMetadata.getMetadata(INGEST_FILE_MANAGER_URL));
-      String actionRepoFile = pgeMetadata.getMetadata(ACTION_REPO_FILE);
-      if (actionRepoFile != null && !actionRepoFile.equals("")) {
-         crawler.setApplicationContext(new FileSystemXmlApplicationContext(
-               actionRepoFile));
+      String crawlerConfigFile = pgeMetadata.getMetadata(CRAWLER_CONFIG_FILE);
+      if (!Strings.isNullOrEmpty(crawlerConfigFile)) {
+         crawler.setApplicationContext(
+               new FileSystemXmlApplicationContext(crawlerConfigFile));
          List<String> actionIds = pgeMetadata.getAllMetadata(ACTION_IDS);
          if (actionIds != null) {
             crawler.setActionIds(actionIds);
          }
       }
-      crawler.setRequiredMetadata(
-            pgeMetadata.getAllMetadata(REQUIRED_METADATA));
-      String crawlForDirsString = pgeMetadata
-            .getMetadata(CRAWLER_CRAWL_FOR_DIRS);
-      boolean crawlForDirs = (crawlForDirsString != null) ? crawlForDirsString
-            .toLowerCase().equals("true") : false;
-      String recurString = pgeMetadata.getMetadata(CRAWLER_RECUR);
-      boolean recur = (recurString != null) ? recurString.toLowerCase().equals(
-            "true") : true;
-      crawler.setCrawlForDirs(crawlForDirs);
-      crawler.setNoRecur(!recur);
+      crawler.setRequiredMetadata(pgeMetadata.getAllMetadata(REQUIRED_METADATA));
+      crawler.setCrawlForDirs(Boolean.parseBoolean(pgeMetadata
+            .getMetadata(CRAWLER_CRAWL_FOR_DIRS)));
+      crawler.setNoRecur(!Boolean.parseBoolean(
+            pgeMetadata.getMetadata(CRAWLER_RECUR)));
       logger.log(Level.FINE,
             "Passing Workflow Metadata to CAS-Crawler as global metadata . . .");
       crawler.setGlobalMetadata(pgeMetadata.asMetadata(PgeMetadata.Type.DYNAMIC));

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/OutputDir.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/OutputDir.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/OutputDir.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/OutputDir.java Mon Apr  9 22:08:42 2012
@@ -14,59 +14,47 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package org.apache.oodt.cas.pge.config;
 
-//JDK imports
-import java.util.LinkedList;
-import java.util.List;
+//Apache imports
+import org.apache.commons.lang.Validate;
 
 /**
+ * An output directory where PGE will create files.
  * 
  * @author bfoster
  * @version $Revision$
- * 
- * <p>
- * An output dir for PGE execution
- * </p>.
  */
 public class OutputDir {
 
-    private String path;
-
-    private List<RegExprOutputFiles> regExprOutputFilesList;
-
-    private boolean createBeforeExe;
-
-    public OutputDir(String path, boolean createBeforeExe) {
-        this.path = path;
-        this.createBeforeExe = createBeforeExe;
-        this.regExprOutputFilesList = new LinkedList<RegExprOutputFiles>();
-    }
-
-    public void setPath(String path) {
-        this.path = path;
-    }
-
-    public String getPath() {
-        return this.path;
-    }
-
-    public void addRegExprOutputFiles(RegExprOutputFiles regExprOutputFiles) {
-        this.regExprOutputFilesList.add(regExprOutputFiles);
-    }
-
-    public List<RegExprOutputFiles> getRegExprOutputFiles() {
-        return this.regExprOutputFilesList;
-    }
-
-    public void setCreateBeforeExe(boolean createBeforeExe) {
-        this.createBeforeExe = createBeforeExe;
-    }
-
-    public boolean isCreateBeforeExe() {
-        return this.createBeforeExe;
-    }
+   private String path;
+   private boolean createBeforeExe;
 
+   public OutputDir() {
+      path = null;
+      createBeforeExe = false;
+   }
+
+   public OutputDir(String path, boolean createBeforeExe) {
+      setPath(path);
+      setCreateBeforeExe(createBeforeExe);
+   }
+
+   public void setPath(String path) {
+      Validate.notNull(path, "path cannot be null");
+
+      this.path = path;
+   }
+
+   public String getPath() {
+      return path;
+   }
+
+   public void setCreateBeforeExe(boolean createBeforeExe) {
+      this.createBeforeExe = createBeforeExe;
+   }
+
+   public boolean isCreateBeforeExe() {
+      return createBeforeExe;
+   }
 }

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfig.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfig.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfig.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfig.java Mon Apr  9 22:08:42 2012
@@ -14,42 +14,34 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package org.apache.oodt.cas.pge.config;
 
 //JDK imports
-import java.util.LinkedList;
 import java.util.List;
 
-/**
- * 
- * @author bfoster
- * @version $Revision$
- * 
- * <p>
- * Configuration file for CAS-PGE
- * </p>.
+//Google imports
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
+/** 
+ * Configuration file for CAS-PGE.
+ *
+ * @author bfoster (Brian Foster)
  */
 public class PgeConfig {
 
     private List<DynamicConfigFile> dynamicConfigFiles;
-
     private List<OutputDir> outputDirs;
-
     private Object[] propertyAdderCustomArgs;
-
     private String exeDir;
-
     private String shellType;
-
     private List<String> exeCmds;
 
     public PgeConfig() {
-        this.shellType = "sh";
-        this.outputDirs = new LinkedList<OutputDir>();
-        this.dynamicConfigFiles = new LinkedList<DynamicConfigFile>();
-        this.exeCmds = new LinkedList<String>();
+        shellType = "sh";
+        outputDirs = Lists.newArrayList();
+        dynamicConfigFiles = Lists.newArrayList();
+        exeCmds = Lists.newArrayList();
     }
 
     public void addDynamicConfigFile(DynamicConfigFile dynamicConfigFile) {
@@ -57,15 +49,15 @@ public class PgeConfig {
     }
 
     public List<DynamicConfigFile> getDynamicConfigFiles() {
-        return this.dynamicConfigFiles;
+        return dynamicConfigFiles;
     }
 
     public void addOuputDirAndExpressions(OutputDir outputDir) {
-        this.outputDirs.add(outputDir);
+        outputDirs.add(outputDir);
     }
 
     public List<OutputDir> getOuputDirs() {
-        return this.outputDirs;
+        return outputDirs;
     }
 
     public void setExeDir(String exeDir) {
@@ -73,16 +65,17 @@ public class PgeConfig {
     }
 
     public String getExeDir() {
-        return this.exeDir;
+        return exeDir;
     }
 
     public void setShellType(String shellType) {
-        if (shellType != null && !shellType.equals(""))
+        if (!Strings.isNullOrEmpty(shellType)) {
             this.shellType = shellType;
+        }
     }
 
     public String getShellType() {
-        return this.shellType;
+        return shellType;
     }
 
     public void setExeCmds(List<String> exeCmds) {
@@ -90,7 +83,7 @@ public class PgeConfig {
     }
 
     public List<String> getExeCmds() {
-        return this.exeCmds;
+        return exeCmds;
     }
 
     public void setPropertyAdderCustomArgs(Object[] args) {
@@ -101,5 +94,4 @@ public class PgeConfig {
         return propertyAdderCustomArgs != null ? propertyAdderCustomArgs
                 : new Object[0];
     }
-
 }

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigBuilder.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigBuilder.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigBuilder.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigBuilder.java Mon Apr  9 22:08:42 2012
@@ -14,24 +14,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package org.apache.oodt.cas.pge.config;
 
 //OODT imports
 import org.apache.oodt.cas.pge.metadata.PgeMetadata;
 
 /**
- * 
- * @author bfoster
- * @version $Revision$
- * 
- * <p>
- * Interface for building a {@link PgeConfig}
- * </p>.
+ * Interface for building a {@link PgeConfig}.
+ *
+ * @author bfoster (Brian Foster)
  */
 public interface PgeConfigBuilder {
 
     public PgeConfig build(PgeMetadata pgeMetadata) throws Exception;
-
 }

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigMetKeys.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigMetKeys.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigMetKeys.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/PgeConfigMetKeys.java Mon Apr  9 22:08:42 2012
@@ -14,18 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package org.apache.oodt.cas.pge.config;
 
 /**
- * @author mattmann
- * @author bfoster
- * @version $Revision$
- * 
- * <p>
- * Met Keys used when reading a {@link PgeConfig} XML file
- * </p>.
+ * Met Keys used when reading a {@link PgeConfig} XML file.
+ *
+ * @author mattmann (Chris Mattmann)
+ * @author bfoster (Brian Foster)
  */
 public interface PgeConfigMetKeys {
 
@@ -78,17 +73,4 @@ public interface PgeConfigMetKeys {
     public static final String DIR_TAG = "dir";
 
     public static final String CREATE_BEFORE_EXEC_ATTR = "createBeforeExe";
-
-    public static final String FILES_TAG = "files";
-
-    public static final String REGEX_ATTR = "regExp";
-
-    public static final String NAME_ATTR = "name";
-
-    public static final String MET_FILE_WRITER_CLASS_ATTR = "metFileWriterClass";
-
-    public static final String RENAMING_CONV_TAG = "renamingConv";
-
-    public static final String NAMING_EXPR_ATTR = "namingExpr";
-
 }

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/XmlFilePgeConfigBuilder.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/XmlFilePgeConfigBuilder.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/XmlFilePgeConfigBuilder.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/config/XmlFilePgeConfigBuilder.java Mon Apr  9 22:08:42 2012
@@ -27,7 +27,6 @@ import static org.apache.oodt.cas.pge.co
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.ENV_REPLACE_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.ENV_REPLACE_NO_RECUR_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.EXE_TAG;
-import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILES_TAG;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILE_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILE_TAG;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.IMPORT_TAG;
@@ -35,14 +34,9 @@ import static org.apache.oodt.cas.pge.co
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.KEY_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.KEY_GEN_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.METADATA_TAG;
-import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.MET_FILE_WRITER_CLASS_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.NAMESPACE_ATTR;
-import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.NAME_ATTR;
-import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.NAMING_EXPR_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.OUTPUT_TAG;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.PATH_ATTR;
-import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.REGEX_ATTR;
-import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.RENAMING_CONV_TAG;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.SHELL_TYPE_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.SPLIT_ATTR;
 import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.VAL_ATTR;
@@ -292,47 +286,12 @@ public class XmlFilePgeConfigBuilder imp
                     .getElementsByTagName(DIR_TAG);
             for (int i = 0; i < outputDirsList.getLength(); i++) {
                 Element outputDirElement = (Element) outputDirsList.item(i);
-                String dirPath = this.fillIn(outputDirElement
-                        .getAttribute(PATH_ATTR), curMetadata);
-                OutputDir outputDir = new OutputDir(dirPath, this.fillIn(
-                        outputDirElement.getAttribute(CREATE_BEFORE_EXEC_ATTR),
-                        curMetadata).equals("true"));
-                NodeList fileList = outputDirElement
-                        .getElementsByTagName(FILES_TAG);
-                for (int j = 0; j < fileList.getLength(); j++) {
-                    Element fileElement = (Element) fileList.item(j);
-                    String outputFile = fileElement.getAttribute(REGEX_ATTR);
-                    if (outputFile.equals(""))
-                        outputFile = this.fillIn(fileElement
-                                .getAttribute(NAME_ATTR), curMetadata);
-                    NodeList renamingConvNodes = fileElement.getElementsByTagName(RENAMING_CONV_TAG);
-                    RenamingConv renamingConv = null;
-                    if (renamingConvNodes.getLength() > 0) {
-                    	Element renamingElement = (Element) renamingConvNodes.item(0);
-                    	String namingExpr = renamingElement.getAttribute(NAMING_EXPR_ATTR);
-                        if (renamingElement.getAttribute(ENV_REPLACE_ATTR)
-                                .toLowerCase().equals("true"))
-                        	namingExpr = this.fillIn(namingExpr, curMetadata, false);
-                        else if (!renamingElement.getAttribute(ENV_REPLACE_ATTR)
-                                .toLowerCase().equals("false"))
-                        	namingExpr = this.fillIn(namingExpr, curMetadata);
-                        renamingConv = new RenamingConv(namingExpr);
-                    	NodeList metadataNodes = renamingElement.getElementsByTagName(METADATA_TAG);
-                        for (int k = 0; k < metadataNodes.getLength(); k++) 
-                        	renamingConv.addTmpReplaceMet(
-									((Element) metadataNodes.item(k))
-											.getAttribute(KEY_ATTR), Arrays
-											.asList(((Element) metadataNodes
-													.item(k)).getAttribute(
-													VAL_ATTR).split(",")));
-                    }
-                    outputDir.addRegExprOutputFiles(new RegExprOutputFiles(
-                            outputFile, this.fillIn(fileElement
-                                    .getAttribute(MET_FILE_WRITER_CLASS_ATTR),
-                                    curMetadata), renamingConv, (Object[]) this.fillIn(
-                                    fileElement.getAttribute(ARGS_ATTR),
-                                    curMetadata).split(",")));
-                }
+                String dirPath = fillIn(outputDirElement
+                      .getAttribute(PATH_ATTR), curMetadata);
+                OutputDir outputDir = new OutputDir(dirPath,
+                      Boolean.parseBoolean(fillIn(outputDirElement
+                            .getAttribute(CREATE_BEFORE_EXEC_ATTR),
+                            curMetadata)));
                 outputDirs.add(outputDir);
             }
         }
@@ -356,5 +315,4 @@ public class XmlFilePgeConfigBuilder imp
     		throw new Exception("Failed to parse value: " + value, e);
     	}
     }
-
 }

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/metadata/PgeTaskMetKeys.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/metadata/PgeTaskMetKeys.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/metadata/PgeTaskMetKeys.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/metadata/PgeTaskMetKeys.java Mon Apr  9 22:08:42 2012
@@ -101,11 +101,11 @@ public enum PgeTaskMetKeys {
          "PGETask/Ingest/ClientTransferServiceFactory",
          "PCS_ClientTransferServiceFactory"),
    /**
-    * Path to Spring XML file which contains CAS {@link CrawlerAction}s.
+    * Path to Crawler Spring XML config file.
     */
-   ACTION_REPO_FILE(
-         "PGETask/Ingest/ActionRepoFile",
-         "PCS_ActionRepoFile"),
+   CRAWLER_CONFIG_FILE(
+         "PGETask/Ingest/CrawlerConfigFile",
+         "PGETask/Ingest/CrawlerConfigFile"),
    /**
     * The IDs of the {@link CrawlerAction}s in the {@link #ACTION_REPO_FILE} to run.
     */
@@ -126,11 +126,11 @@ public enum PgeTaskMetKeys {
          "PGETask/Ingest/CrawlerRecur",
          "PCS_CrawlerRecur"),
    /**
-    * File extension given to each Product's metadata file.
+    * Path to AutoDetectProductCrawler's MimeExtractorRepo XML config.
     */
-   MET_FILE_EXT(
-         "PGETask/Ingest/MetFileExtension",
-         "PCS_MetFileExtension"),
+   MIME_EXTRACTOR_REPO(
+         "PGETask/Ingest/MimeExtractorRepo",
+         "PGETask/Ingest/MimeExtractorRepo"),
    /**
     * List of metadata keys required for Product ingest.
     */

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/SciPgeConfigFileWriter.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/SciPgeConfigFileWriter.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/SciPgeConfigFileWriter.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/SciPgeConfigFileWriter.java Mon Apr  9 22:08:42 2012
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package org.apache.oodt.cas.pge.writers;
 
 //JDK imports
@@ -26,26 +24,13 @@ import java.io.IOException;
 import org.apache.oodt.cas.metadata.Metadata;
 
 /**
- * 
- * @author bfoster
- * @version $Revision$
- * 
- * <p>
  * Abstract interface for generating PGE config input files defining the input
- * necessary to run the underlying PGE
- * </p>.
+ * necessary to run the underlying PGE.
+ *
+ * @author bfoster (Brian Foster)
  */
 public interface SciPgeConfigFileWriter {
 
-    /**
-     * 
-     * @param sciPgeConfigFilePath
-     * @param inputMetadata
-     * @param customArgs
-     * @return
-     * @throws IOException
-     */
     public File createConfigFile(String sciPgeConfigFilePath,
             Metadata inputMetadata, Object... customArgs) throws IOException;
-
 }

Modified: oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/xslt/XslTransformWriter.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/xslt/XslTransformWriter.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/xslt/XslTransformWriter.java (original)
+++ oodt/trunk/pge/src/main/java/org/apache/oodt/cas/pge/writers/xslt/XslTransformWriter.java Mon Apr  9 22:08:42 2012
@@ -14,13 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package org.apache.oodt.cas.pge.writers.xslt;
 
 //JDK imports
 import java.io.File;
 import java.io.IOException;
+
+//JavaX imports.
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Source;
 import javax.xml.transform.Result;
@@ -35,15 +35,11 @@ import org.apache.oodt.cas.metadata.Meta
 import org.apache.oodt.cas.metadata.SerializableMetadata;
 import org.apache.oodt.cas.pge.writers.SciPgeConfigFileWriter;
 
-/**
- * 
- * @author bfoster
- * @version $Revision$
- * 
- * <p>
+/** 
  * XSL Transformation class which writes Science PGE config files based from the
- * XML format of SerializableMetadata
- * </p>.
+ * XML format of SerializableMetadata.
+ *
+ * @author bfoster (Brian Foster)
  */
 public class XslTransformWriter implements SciPgeConfigFileWriter {
 
@@ -75,5 +71,4 @@ public class XslTransformWriter implemen
                     + sciPgeConfigFilePath + "' : " + e.getMessage());
         }
     }
-
 }

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.properties
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.properties?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.properties (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.properties Mon Apr  9 22:08:42 2012
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements.  See the NOTICE.txt file distributed with this work for
+# additional information regarding copyright ownership.  The ASF licenses this
+# file to you under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+crawler.failure.dir=/path/to/failure/dir
+crawler.backup.dir=/path/to/backup/dir
+crawler.workflowmgr.url=http://localhost:9001
+crawler.filemgr.url=http://localhost:9000
+crawler.met.file.ext=met

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
+
+	<bean class="org.apache.oodt.commons.spring.postprocessor.SetIdBeanPostProcessor" />
+	<bean class="org.apache.oodt.cas.crawl.util.CasPropertyPlaceholderConfigurer">
+
+		<!-- Allow for system-level properties to override all properties below -->
+		<property name="systemPropertiesMode" value="2" />
+
+    <property name="location" value="src/main/resources/examples/Crawler/action-beans.properties" /> 
+	</bean>
+
+	<!-- DELETE Actions -->
+	<bean id="DeleteDataFile" lazy-init="true" class="org.apache.oodt.cas.crawl.action.DeleteFile">
+		<property name="description" value="Deletes the current data file" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">postIngestSuccess</value>
+			</list>
+		</property>
+	</bean>
+
+	<bean id="DeleteMetadataFile" lazy-init="true" class="org.apache.oodt.cas.crawl.action.DeleteFile">
+		<property name="description" value="Deletes the metadata file for the current data file" />
+		<property name="fileExtension" value="${crawler.met.file.ext}" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">postIngestSuccess</value>
+			</list>
+		</property>
+	</bean>
+
+	<!-- MOVE to FAILURE_DIR Actions -->
+	<bean id="MoveDataFileToFailureDir" lazy-init="true" class="org.apache.oodt.cas.crawl.action.MoveFile">
+		<property name="description" value="Moves the current data file to failure directory" />
+		<property name="toDir" value="${crawler.failure.dir}" />
+		<property name="createToDir" value="true" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">postIngestFailure</value>
+			</list>
+		</property>
+	</bean>
+
+	<bean id="MoveMetadataFileToFailureDir" lazy-init="true" class="org.apache.oodt.cas.crawl.action.MoveFile">
+		<property name="description" value="Moves the metadata file for the current data file to failure directory" />
+		<property name="fileExtension" value="${crawler.met.file.ext}" />
+		<property name="toDir" value="${crawler.failure.dir}" />
+		<property name="createToDir" value="true" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">postIngestFailure</value>
+			</list>
+		</property>
+	</bean>
+
+	<!-- MOVE to BACKUP_DIR Actions -->
+	<bean id="MoveMetadataFileToBackupDir" lazy-init="true" class="org.apache.oodt.cas.crawl.action.MoveFile">
+		<property name="description" value="Moves the metadata file for the current data file to success directory" />
+		<property name="fileExtension" value="${crawler.met.file.ext}" />
+		<property name="toDir" value="${crawler.backup.dir}" />
+		<property name="createToDir" value="true" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">postIngestSuccess</value>
+			</list>
+		</property>
+	</bean>
+
+	<!-- Workflow Manager Actions -->
+	<bean id="TriggerPostIngestWorkflow" lazy-init="true" class="org.apache.oodt.cas.crawl.action.WorkflowMgrStatusUpdate">
+		<property name="description" value="Triggers workflow event with the name [ProductType]Ingest" />
+		<property name="ingestSuffix" value="Ingest" />
+		<property name="workflowMgrUrl" value="${crawler.workflowmgr.url}" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">postIngestSuccess</value>
+			</list>
+		</property>
+	</bean>
+
+	<!-- File Manager Actions -->
+	<bean id="Unique" lazy-init="true" class="org.apache.oodt.cas.crawl.action.FilemgrUniquenessChecker">
+		<property name="description" value="Checks the filemgr against the PRODUCT_NAME for the current data file to make sure it has not yet been ingested" />
+		<property name="filemgrUrl" value="${crawler.filemgr.url}" />
+		<property name="phases">
+			<list>
+				<value type="java.lang.String">preIngest</value>
+			</list>
+		</property>
+	</bean>
+</beans>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/action-beans.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p"
+	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
+
+	<bean class="org.apache.oodt.cas.crawl.util.CasPropertyOverrideConfigurer" />
+
+	<import resource="action-beans.xml" />
+	<import resource="precondition-beans.xml" />
+  <import resource="naming-beans.xml" />
+
+</beans>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml
------------------------------------------------------------------------------
    svn:executable = *

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/crawler-config.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/filename.extractor.config.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/filename.extractor.config.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/filename.extractor.config.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/filename.extractor.config.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<input>
+
+   <group name="TokenNameListGroup">
+     <scalar name="Delimeter">_</scalar>
+     <vector name="TokenMetKeys">
+    
+     <!-- 
+     [sourcename]_[tablename]_[productiondatetime].[ext]
+     
+     ex file name: datasource_tablename_20101129122700.txt
+     -->
+       <element>SourceName</element>
+       <element>TableName</element>
+       <element>ProductionDateTime</element>
+     </vector>
+  
+   </group>
+
+
+    <group name="SubstringOffsetGroup">
+     <!--  the indices for the substring met key selection -->
+     <!--  the first element is the starting index in the string -->
+     <!--  NOTE: this assumes 1-indexed strings (and is internally converted to
+           0-indexed.
+      -->
+     <!--  the second element is the length -->
+     
+     <!--  MissionName_Date_StartOrbitNumber_StopOrbitNumber.txt -->
+     
+      <vector name="MissionName">
+        <element>1</element>
+        <element>11</element>      
+      </vector>
+      
+      <vector name="Date">
+        <element>13</element>
+        <element>4</element>
+      </vector>
+      
+      <vector name="StartOrbitNumber">
+        <element>18</element>
+        <element>16</element>
+      </vector>
+      
+      <vector name="StopOrbitNumber">
+        <element>35</element>
+        <element>15</element>
+      </vector>
+      
+   </group>
+   
+   <group name="ProductionDateTimeGroup">
+     <scalar name="DateTimeFormat">yyyyMMddHHmmss</scalar>
+   </group>   
+    
+    <group name="CommonMetadata">
+       <!--  can now use environment variable replacement in any of the values for 
+             scalars or vectors, just use CAS bracket style [ENV VAR NAME]
+         
+             Also can use vector to indicate multiple values for a particular met field.
+        -->
+        <scalar name="DataVersion">1.0</scalar>
+        <scalar name="CollectionName">Products extracted by the OODT Filename Met Extractor</scalar>
+        <scalar name="DataProvider">OODT</scalar>
+    </group>
+</input>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/filename.extractor.config.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/mime-extractor-map.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/mime-extractor-map.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/mime-extractor-map.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/mime-extractor-map.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<cas:mimetypemap xmlns:cas="http://oodt.jpl.nassa.gov/1.0/cas" magic="false"
+    mimeRepo="mime-types.xml">
+
+  <!-- This will be the default extractor used if product doesn't match any
+    of the mime-types defined in mime-types.xml file (this element is optional) -->
+  <default>
+    <!-- you can put a default NamingConvention here too.
+    <namingConvention id="PDFNamingConv" />
+    -->
+    <!-- You can add multiple extractors here if there are more than one,
+      however extractor is optional, so below extractor can be removed -->
+    <extractor class="org.apache.oodt.cas.metadata.extractors.MetReaderExtractor" />
+  </default>
+
+  <mime type="all/products">
+    <extractor class="org.apache.oodt.cas.metadata.extractors.MetReaderExtractor" />
+  </mime>
+
+  <!-- Example type where PGE generates a metadata file for the product, but
+    the product also needs additional metadata extraction from it... in the
+    mime-types.xml file txt/product has super type all/products, which means
+    that for any product that is a txt/product then all/products extractors
+    will be first be run on it (which, in this case, is just one extractor
+    which reads in the existing metadata file) followed by txt/product
+    extractors which is a filename based extractor. -->
+  <mime type="txt/product">
+    <extractor class="org.apache.oodt.cas.metadata.extractors.FilenameTokenMetExtractor">
+      <config file="filename.extractor.config.xml" />
+    </extractor>
+  </mime>
+
+  <!-- This is an example of a type where only one extractor is run on
+    product, this is because pdf/product only specifies one extractor and
+    in mime-types.xml file pdf/product has no super type. -->
+  <mime type="pdf/product">
+    <namingConvention id="PDFNamingConv" />
+    <extractor class="org.apache.oodt.cas.metadata.extractors.FilenameTokenMetExtractor">
+      <config file="filename.extractor.config.xml" />
+    </extractor>
+  </mime>
+
+  <!-- This is an example of a type where two extractors get run... this case
+    is an alternative way of handling txt/product... txt/product will run
+    the same 2 extractors which this type will run but for different reasons...
+    doc/product doesn't have any super type, it instead defines both extractors
+    directly to itself. -->
+  <mime type="doc/product">
+    <extractor class="org.apache.oodt.cas.metadata.extractors.MetReaderExtractor" />
+    <extractor class="org.apache.oodt.cas.metadata.extractors.FilenameTokenMetExtractor">
+      <config file="filename.extractor.config.xml" />
+    </extractor>
+  </mime>
+</cas:mimetypemap>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/mime-extractor-map.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/mime-types.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/mime-types.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/mime-types.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/mime-types.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<mime-info>
+	
+	<mime-type type="all/prouducts" />
+
+  <mime-type type="txt/product">
+    <sub-class-of type="all/prouducts" />
+    <glob pattern=".*\.txt" isregex="true" />
+  </mime-type>
+
+  <mime-type type="pdf/product">
+    <glob pattern=".*\.pdf" isregex="true" />
+  </mime-type>
+
+  <mime-type type="doc/product">
+    <glob pattern=".*\.doc" isregex="true" />
+  </mime-type>
+</mime-info>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/mime-types.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/naming-beans.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/naming-beans.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/naming-beans.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/naming-beans.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p"
+  xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
+
+  <bean id="PDFNamingConv" class="org.apache.oodt.cas.metadata.filenaming.PathUtilsNamingConvention">
+    <property name="namingConv" value="[ProductType].[NominalDate].pdf" />
+  </bean>
+</beans>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/naming-beans.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: oodt/trunk/pge/src/main/resources/examples/Crawler/precondition-beans.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/Crawler/precondition-beans.xml?rev=1311492&view=auto
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/Crawler/precondition-beans.xml (added)
+++ oodt/trunk/pge/src/main/resources/examples/Crawler/precondition-beans.xml Mon Apr  9 22:08:42 2012
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements.  See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership.  The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License.  You may obtain a copy of
+the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
+
+	<bean class="org.apache.oodt.commons.spring.postprocessor.SetIdBeanPostProcessor" />
+
+	<!-- Precondition Comparators -->
+	<bean id="CheckThatDataFileSizeIsGreaterThanZero" lazy-init="true" class="org.apache.oodt.cas.metadata.preconditions.FileSizeComparator">
+		<property name="description" value="Check if the current data file size is greater than zero" />
+		<property name="compareItem">
+			<value type="java.lang.Long">0</value>
+		</property>
+		<property name="type" value="greater_than" />
+	</bean>
+
+	<bean id="AprioriUniquessCheckWithFilemgr" lazy-init="true" class="org.apache.oodt.cas.crawl.comparator.FilemgrUniquenessCheckComparator">
+		<property name="description" value="Checks where the current data file existing in the filemgr based on its FILENAME" />
+		<property name="compareItem">
+			<value type="java.lang.Boolean">false</value>
+		</property>
+		<property name="type" value="equal_to" />
+	</bean>
+</beans>

Propchange: oodt/trunk/pge/src/main/resources/examples/Crawler/precondition-beans.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: oodt/trunk/pge/src/main/resources/examples/PgeConfigFiles/pge-config.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/main/resources/examples/PgeConfigFiles/pge-config.xml?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/main/resources/examples/PgeConfigFiles/pge-config.xml (original)
+++ oodt/trunk/pge/src/main/resources/examples/PgeConfigFiles/pge-config.xml Mon Apr  9 22:08:42 2012
@@ -14,6 +14,8 @@ distributed under the License is distrib
 WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 License for the specific language governing permissions and limitations under
 the License.
+
+TODO(bfoster): Make this a working example.
 -->
 <pgeConfig>
     
@@ -39,22 +41,7 @@ the License.
     <output>
 
         <!-- one or more of these -->
-        <dir path="path-to-an-output-dir" createBeforeExe="true-or-false">
-            
-            <!-- one or more of these ** regExp or name can be used-->
-            <files regExp="regex-of-files-to-ingest-to-filemgr" metFileWriterClass="class-path-to-PcsMetFileWriter" args="zero-or-more-args-comma-segregated">
-                
-                <!-- zero or one of these -->                
-                <renamingConv namingExpr="">
-                    
-                    <!-- zero or more of these -->                    
-                    <metadata key="name-of-metadata-field" val="metadata-value"/>
-                    
-                </renamingConv>
-                
-            </files>
-            
-        </dir>
+        <dir path="path-to-an-output-dir" createBeforeExe="true-or-false" />
 
     </output>
     

Modified: oodt/trunk/pge/src/test/org/apache/oodt/cas/pge/TestPGETaskInstance.java
URL: http://svn.apache.org/viewvc/oodt/trunk/pge/src/test/org/apache/oodt/cas/pge/TestPGETaskInstance.java?rev=1311492&r1=1311491&r2=1311492&view=diff
==============================================================================
--- oodt/trunk/pge/src/test/org/apache/oodt/cas/pge/TestPGETaskInstance.java (original)
+++ oodt/trunk/pge/src/test/org/apache/oodt/cas/pge/TestPGETaskInstance.java Mon Apr  9 22:08:42 2012
@@ -17,29 +17,48 @@
 package org.apache.oodt.cas.pge;
 
 //OODT static imports
-import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.NAME;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.ACTION_IDS;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.ATTEMPT_INGEST_ALL;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CONFIG_FILE_PATH;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CRAWLER_CONFIG_FILE;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CRAWLER_CRAWL_FOR_DIRS;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.CRAWLER_RECUR;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.DUMP_METADATA;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.INGEST_CLIENT_TRANSFER_SERVICE_FACTORY;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.INGEST_FILE_MANAGER_URL;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.MIME_EXTRACTOR_REPO;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.NAME;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.PGE_CONFIG_BUILDER;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.PROPERTY_ADDERS;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.REQUIRED_METADATA;
 import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.WORKFLOW_MANAGER_URL;
+import static org.apache.oodt.cas.pge.metadata.PgeTaskStatus.CRAWLING;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
 
-//JDK imports
 import java.io.File;
 import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.UUID;
 import java.util.logging.Handler;
 import java.util.logging.Level;
+import java.util.logging.Logger;
 
-//Apache imports
-import org.apache.commons.io.FileUtils;
+import junit.framework.TestCase;
 
-//OODT imports
+import org.apache.commons.io.FileUtils;
+import org.apache.oodt.cas.crawl.AutoDetectProductCrawler;
+import org.apache.oodt.cas.crawl.ProductCrawler;
+import org.apache.oodt.cas.crawl.action.CrawlerAction;
+import org.apache.oodt.cas.crawl.action.MoveFile;
+import org.apache.oodt.cas.crawl.status.IngestStatus;
 import org.apache.oodt.cas.metadata.Metadata;
-import org.apache.oodt.cas.pge.PGETaskInstance;
 import org.apache.oodt.cas.pge.config.DynamicConfigFile;
 import org.apache.oodt.cas.pge.config.MockPgeConfigBuilder;
 import org.apache.oodt.cas.pge.config.OutputDir;
@@ -52,12 +71,9 @@ import org.apache.oodt.cas.workflow.meta
 import org.apache.oodt.cas.workflow.structs.WorkflowTaskConfiguration;
 import org.apache.oodt.cas.workflow.system.XmlRpcWorkflowManagerClient;
 
-//Google imports
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-
-//JUnit imports
-import junit.framework.TestCase;
+import com.google.common.collect.Sets;
 
 /**
  * Test class for {@link PGETaskInstance}.
@@ -330,6 +346,217 @@ public class TestPGETaskInstance extends
       assertEquals("</cas:metadata>", dumpedMet.get(10));
    }
 
+   public void testCreateProductCrawler() throws Exception {
+      PGETaskInstance pgeTask = createTestInstance();
+      pgeTask.pgeMetadata.replaceMetadata(MIME_EXTRACTOR_REPO,
+            "src/main/resources/examples/Crawler/mime-extractor-map.xml");
+      pgeTask.pgeMetadata.replaceMetadata(
+            INGEST_CLIENT_TRANSFER_SERVICE_FACTORY,
+            "org.apache.oodt.cas.filemgr.datatransfer.LocalDataTransferFactory");
+      pgeTask.pgeMetadata.replaceMetadata(INGEST_FILE_MANAGER_URL,
+            "http://localhost:9000");
+      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_CONFIG_FILE,
+            "src/main/resources/examples/Crawler/crawler-config.xml");
+      pgeTask.pgeMetadata.replaceMetadata(ACTION_IDS,
+            Lists.newArrayList("DeleteDataFile", "MoveMetadataFileToFailureDir"));
+      pgeTask.pgeMetadata.replaceMetadata(REQUIRED_METADATA,
+            Lists.newArrayList("Owners"));
+      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_CRAWL_FOR_DIRS,
+            Boolean.toString(false));
+      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_RECUR,
+            Boolean.toString(true));
+
+      ProductCrawler pc = pgeTask.createProductCrawler();
+      assertEquals(
+            "org.apache.oodt.cas.filemgr.datatransfer.LocalDataTransferFactory",
+            pc.getClientTransferer());
+      assertEquals("http://localhost:9000", pc.getFilemgrUrl());
+      assertEquals(
+            Sets.newHashSet("DeleteDataFile", "MoveMetadataFileToFailureDir"),
+            Sets.newHashSet(pc.getActionIds()));
+      CrawlerAction action = (CrawlerAction) pc.getApplicationContext().getBean("DeleteDataFile");
+      assertNotNull(action);
+      MoveFile moveFileAction = (MoveFile) pc.getApplicationContext().getBean("MoveMetadataFileToFailureDir");
+      Properties properties = new Properties();
+      properties.load(new FileInputStream(new File(
+            "src/main/resources/examples/Crawler/action-beans.properties")));
+      assertEquals(properties.get("crawler.failure.dir"),
+            moveFileAction.getToDir());
+      assertTrue(pc.getRequiredMetadata().contains("Owners"));
+      assertFalse(pc.isCrawlForDirs());
+      assertFalse(pc.isNoRecur());
+   }
+
+   public void testRunIngestCrawler() throws Exception {
+      // Case: UpdateStatus Success, VerifyIngest Success, 
+      PGETaskInstance pgeTask = createTestInstance();
+      pgeTask.pgeConfig.addOuputDirAndExpressions(new OutputDir("/tmp/dir1", true));
+      pgeTask.pgeConfig.addOuputDirAndExpressions(new OutputDir("/tmp/dir2", true));
+      pgeTask.pgeMetadata.replaceMetadata(ATTEMPT_INGEST_ALL, Boolean.toString(true));
+      pgeTask.workflowInstId = "WorkflowInstanceId";
+
+      pgeTask.wm = createMock(XmlRpcWorkflowManagerClient.class);
+      expect(pgeTask.wm.updateWorkflowInstanceStatus(pgeTask.workflowInstId,
+            CRAWLING.getWorkflowStatusName())).andReturn(true);
+      replay(pgeTask.wm);
+
+      AutoDetectProductCrawler pc = createMock(AutoDetectProductCrawler.class);
+      pc.crawl(new File("/tmp/dir1"));
+      pc.crawl(new File("/tmp/dir2"));
+      expect(pc.getIngestStatus()).andReturn(Collections.<IngestStatus>emptyList());
+      replay(pc);
+
+      pgeTask.runIngestCrawler(pc);
+
+      verify(pgeTask.wm);
+      verify(pc);
+
+      // Case: UpdateStatus Fail
+      pgeTask.wm = createMock(XmlRpcWorkflowManagerClient.class);
+      expect(pgeTask.wm.updateWorkflowInstanceStatus(pgeTask.workflowInstId,
+            CRAWLING.getWorkflowStatusName())).andReturn(false);
+      replay(pgeTask.wm);
+
+      pc = createMock(AutoDetectProductCrawler.class);
+      replay(pc);
+
+      try {
+         pgeTask.runIngestCrawler(pc);
+         fail("Should have thrown");
+      } catch (Exception e) { /* expect throw */ }
+
+      verify(pgeTask.wm);
+      verify(pc);
+
+      // Case: UpdateStatus Success, VerifyIngest Fail
+      pgeTask.wm = createMock(XmlRpcWorkflowManagerClient.class);
+      expect(pgeTask.wm.updateWorkflowInstanceStatus(pgeTask.workflowInstId,
+            CRAWLING.getWorkflowStatusName())).andReturn(true);
+      replay(pgeTask.wm);
+
+      pc = createMock(AutoDetectProductCrawler.class);
+      pc.crawl(new File("/tmp/dir1"));
+      pc.crawl(new File("/tmp/dir2"));
+      IngestStatus failedIngestStatus = new IngestStatus() {
+         @Override
+         public String getMessage() {
+            return "Ingest Failure";
+         }
+         @Override
+         public File getProduct() {
+            return new File("/tmp/dir1");
+         }
+         @Override
+         public Result getResult() {
+            return Result.FAILURE;
+         }
+      };
+      expect(pc.getIngestStatus()).andReturn(
+            Lists.newArrayList(failedIngestStatus));
+      replay(pc);
+
+      try {
+         pgeTask.runIngestCrawler(pc);
+         fail("Should have thrown");
+      } catch (Exception e) { /* expect throw */ }
+
+      verify(pgeTask.wm);
+      verify(pc);
+   }
+
+   public void testVerifyIngests() throws Exception {
+      PGETaskInstance pgeTask = createTestInstance();
+
+      // Test case failure.
+      AutoDetectProductCrawler pc = createMock(AutoDetectProductCrawler.class);
+      IngestStatus failedIngestStatus = new IngestStatus() {
+         @Override
+         public String getMessage() {
+            return "Ingest Failure";
+         }
+         @Override
+         public File getProduct() {
+            return new File("/tmp/dir1");
+         }
+         @Override
+         public Result getResult() {
+            return Result.FAILURE;
+         }
+      };
+      expect(pc.getIngestStatus()).andReturn(
+            Lists.newArrayList(failedIngestStatus));
+      replay(pc);
+
+      try {
+         pgeTask.verifyIngests(pc);
+         fail("Should have thrown");
+      } catch (Exception e) { /* expect throw */ }
+
+      verify(pc);
+
+      // Test case warn failure of precondition, but success overall.
+      pc = createMock(AutoDetectProductCrawler.class);
+      IngestStatus precondsFailIngestStatus = new IngestStatus() {
+         @Override
+         public String getMessage() {
+            return "Preconditions failed";
+         }
+         @Override
+         public File getProduct() {
+            return new File("/tmp/dir1");
+         }
+         @Override
+         public Result getResult() {
+            return Result.PRECONDS_FAILED;
+         }
+      };
+      expect(pc.getIngestStatus()).andReturn(
+            Lists.newArrayList(precondsFailIngestStatus));
+      replay(pc);
+
+      pgeTask.logger = createMock(Logger.class);
+      pgeTask.logger.log(Level.INFO, "Verifying ingests successful...");
+      pgeTask.logger.log(Level.WARNING,
+            "Product was not ingested [file='/tmp/dir1',result='PRECONDS_FAILED',msg='Preconditions failed']");
+      pgeTask.logger.log(Level.INFO, "Ingests were successful");
+      replay(pgeTask.logger);
+
+      pgeTask.verifyIngests(pc);
+
+      verify(pc);
+      verify(pgeTask.logger);
+
+      // Test case success.
+      pc = createMock(AutoDetectProductCrawler.class);
+      IngestStatus successIngestStatus = new IngestStatus() {
+         @Override
+         public String getMessage() {
+            return "Ingest Success";
+         }
+         @Override
+         public File getProduct() {
+            return new File("/tmp/dir1");
+         }
+         @Override
+         public Result getResult() {
+            return Result.SUCCESS;
+         }
+      };
+      expect(pc.getIngestStatus()).andReturn(
+            Lists.newArrayList(successIngestStatus));
+      replay(pc);
+
+      pgeTask.logger = createMock(Logger.class);
+      pgeTask.logger.log(Level.INFO, "Verifying ingests successful...");
+      pgeTask.logger.log(Level.INFO, "Ingests were successful");
+      replay(pgeTask.logger);
+
+      pgeTask.verifyIngests(pc);
+
+      verify(pc);
+      verify(pgeTask.logger);
+   }
+
    private PGETaskInstance createTestInstance() throws Exception {
       return createTestInstance(Long.toString(System.currentTimeMillis()));
    }