You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by bf...@apache.org on 2012/03/27 08:49:13 UTC
svn commit: r1305745 [1/2] - in /oodt/trunk/crawler: ./
src/main/java/org/apache/oodt/cas/crawl/
src/main/java/org/apache/oodt/cas/crawl/typedetection/ src/main/resources/
src/main/resources/examples/ src/test/org/apache/oodt/cas/crawl/
src/test/org/ap...
Author: bfoster
Date: Tue Mar 27 06:49:13 2012
New Revision: 1305745
URL: http://svn.apache.org/viewvc?rev=1305745&view=rev
Log:
- Introduce a CAS-Metadata based renaming interface (CAS-Crawler changes)
------------
OODT-426
Added:
oodt/trunk/crawler/src/main/resources/naming-beans.xml (with props)
oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/StateAwareProductCrawler.java (with props)
oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/TestProductCrawler.java (with props)
oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/typedetection/
oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/typedetection/TestMimeExtractorConfigReader.java (with props)
Modified:
oodt/trunk/crawler/pom.xml
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/AutoDetectProductCrawler.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/MetExtractorProductCrawler.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/ProductCrawler.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/StdProductCrawler.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MetExtractorSpec.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigMetKeys.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigReader.java
oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorRepo.java
oodt/trunk/crawler/src/main/resources/cmd-line-options.xml
oodt/trunk/crawler/src/main/resources/crawler-config.xml
oodt/trunk/crawler/src/main/resources/examples/mime-extractor-map.xml
Modified: oodt/trunk/crawler/pom.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/pom.xml?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/pom.xml (original)
+++ oodt/trunk/crawler/pom.xml Tue Mar 27 06:49:13 2012
@@ -177,6 +177,12 @@ the License.
<artifactId>geronimo-activation_1.1_spec</artifactId>
<version>1.1</version>
</dependency>
+ <dependency>
+ <groupId>org.easymock</groupId>
+ <artifactId>easymock</artifactId>
+ <version>3.1</version>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/AutoDetectProductCrawler.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/AutoDetectProductCrawler.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/AutoDetectProductCrawler.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/AutoDetectProductCrawler.java Tue Mar 27 06:49:13 2012
@@ -14,8 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
package org.apache.oodt.cas.crawl;
//OODT imports
@@ -25,6 +23,7 @@ import org.apache.oodt.cas.crawl.typedet
import org.apache.oodt.cas.filemgr.metadata.CoreMetKeys;
import org.apache.oodt.cas.metadata.MetExtractor;
import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.cas.metadata.filenaming.NamingConvention;
import org.apache.oodt.cas.metadata.preconditions.PreCondEvalUtils;
//JDK imports
@@ -39,10 +38,6 @@ import org.apache.tika.mime.MimeType; //
import org.springframework.beans.factory.annotation.Required;
/**
- * @author mattmann
- * @author bfoster
- * @version $Revision$
- *
* <p>
* A {@link ProductCrawler} that uses a suite of files to define its crawling
* and ingestion policy:
@@ -68,65 +63,76 @@ import org.springframework.beans.factory
* is then mapped to an extractor vai the <code>mime-extractor-map.xml</code>
* file, described above. </li>
* </p>.
+ *
+ * @author mattmann (Chris Mattmann)
+ * @author bfoster (Brian Foster)
*/
public class AutoDetectProductCrawler extends ProductCrawler implements
- CoreMetKeys {
-
- private MimeExtractorRepo mimeExtractorRepo;
-
- protected Metadata getMetadataForProduct(File product) {
- try {
- List<MetExtractorSpec> specs = this.mimeExtractorRepo
- .getExtractorSpecsForFile(product);
- Metadata metadata = new Metadata();
- metadata.addMetadata(MIME_TYPES_HIERARCHY,
- this.mimeExtractorRepo.getMimeTypes(product));
- for (int i = 0; i < specs.size(); i++) {
- Metadata m = ((MetExtractorSpec) specs.get(i))
- .getMetExtractor().extractMetadata(product);
- if (m != null)
- metadata.addMetadata(m.getHashtable(), true);
- }
- return metadata;
- } catch (Exception e) {
- LOG.log(Level.WARNING, "Failed to get metadata for product "
- + product + " : " + e.getMessage());
- return new Metadata();
- }
- }
-
- protected boolean passesPreconditions(File product) {
- try {
- List<MetExtractorSpec> specs = this.mimeExtractorRepo
- .getExtractorSpecsForFile(product);
- if (specs.size() > 0) {
- if (this.getApplicationContext() != null) {
- PreCondEvalUtils evalUtils = new PreCondEvalUtils(
- this.getApplicationContext());
- for (int i = 0; i < specs.size(); i++) {
- List<String> preCondComparatorIds = ((MetExtractorSpec) specs
- .get(i)).getPreCondComparatorIds();
- if (!evalUtils.eval(preCondComparatorIds, product))
- return false;
- }
- }
- return true;
- } else {
- LOG.log(Level.WARNING, "No extractor specs specified for "
- + product);
- return false;
- }
- } catch (Exception e) {
- LOG.log(Level.WARNING, "Failed to evaluate preconditions : "
- + e.getMessage());
- return false;
- }
- }
+ CoreMetKeys {
- @Required
- public void setMimeExtractorRepo(String mimeExtractorRepo) throws Exception {
- this.mimeExtractorRepo = MimeExtractorConfigReader
- .read(mimeExtractorRepo);
- }
+ private MimeExtractorRepo mimeExtractorRepo;
+ @Override
+ protected Metadata getMetadataForProduct(File product) throws Exception {
+ List<MetExtractorSpec> specs = this.mimeExtractorRepo
+ .getExtractorSpecsForFile(product);
+ Metadata metadata = new Metadata();
+ metadata.addMetadata(MIME_TYPES_HIERARCHY,
+ mimeExtractorRepo.getMimeTypes(product));
+ for (int i = 0; i < specs.size(); i++) {
+ Metadata m = ((MetExtractorSpec) specs.get(i)).getMetExtractor()
+ .extractMetadata(product);
+ if (m != null) {
+ metadata.addMetadata(m.getHashtable(), true);
+ }
+ }
+ return metadata;
+ }
+
+ @Override
+ protected boolean passesPreconditions(File product) {
+ try {
+ List<MetExtractorSpec> specs = this.mimeExtractorRepo
+ .getExtractorSpecsForFile(product);
+ if (specs.size() > 0) {
+ if (this.getApplicationContext() != null) {
+ PreCondEvalUtils evalUtils = new PreCondEvalUtils(
+ this.getApplicationContext());
+ for (int i = 0; i < specs.size(); i++) {
+ List<String> preCondComparatorIds = ((MetExtractorSpec) specs
+ .get(i)).getPreCondComparatorIds();
+ if (!evalUtils.eval(preCondComparatorIds, product))
+ return false;
+ }
+ }
+ return true;
+ } else {
+ LOG.log(Level.WARNING, "No extractor specs specified for "
+ + product);
+ return false;
+ }
+ } catch (Exception e) {
+ LOG.log(Level.WARNING,
+ "Failed to evaluate preconditions : " + e.getMessage());
+ return false;
+ }
+ }
+
+ @Override
+ protected File renameProduct(File product, Metadata productMetadata)
+ throws Exception {
+ NamingConvention namingConvention = mimeExtractorRepo
+ .getNamingConvention(mimeExtractorRepo.getMimeType(product));
+ if (namingConvention != null) {
+ return namingConvention.rename(product, productMetadata);
+ } else {
+ return product;
+ }
+ }
+
+ @Required
+ public void setMimeExtractorRepo(String mimeExtractorRepo) throws Exception {
+ this.mimeExtractorRepo = MimeExtractorConfigReader
+ .read(mimeExtractorRepo);
+ }
}
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/MetExtractorProductCrawler.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/MetExtractorProductCrawler.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/MetExtractorProductCrawler.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/MetExtractorProductCrawler.java Tue Mar 27 06:49:13 2012
@@ -14,20 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
package org.apache.oodt.cas.crawl;
//OODT imports
import org.apache.oodt.cas.metadata.MetExtractor;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.metadata.exceptions.MetExtractionException;
+import org.apache.oodt.cas.metadata.filenaming.NamingConvention;
import org.apache.oodt.cas.metadata.preconditions.PreConditionComparator;
//JDK imports
import java.io.File;
import java.util.List;
-import java.util.logging.Level;
//Spring imports
import org.springframework.beans.factory.annotation.Required;
@@ -41,7 +39,6 @@ import org.springframework.beans.factory
* A variant of the Standard Product Crawler where .met files are generated on
* the fly as product files are encountered.
* </p>
- *
*/
public class MetExtractorProductCrawler extends ProductCrawler {
@@ -51,16 +48,14 @@ public class MetExtractorProductCrawler
private List<String> preCondIds;
- protected Metadata getMetadataForProduct(File product) {
- try {
- return this.metExtractor.extractMetadata(product);
- } catch (Exception e) {
- LOG.log(Level.WARNING, "Failed to extract metadata from file "
- + e.getMessage());
- return new Metadata();
- }
+ private String namingConventionId;
+
+ @Override
+ protected Metadata getMetadataForProduct(File product) throws Exception {
+ return metExtractor.extractMetadata(product);
}
+ @Override
protected boolean passesPreconditions(File product) {
if (this.getPreCondIds() != null) {
for (String preCondId : this.getPreCondIds()) {
@@ -72,6 +67,22 @@ public class MetExtractorProductCrawler
return product.exists() && product.length() > 0;
}
+ @Override
+ protected File renameProduct(File product, Metadata productMetadata)
+ throws Exception {
+ if (getNamingConventionId() != null) {
+ NamingConvention namingConvention = (NamingConvention)
+ getApplicationContext().getBean(getNamingConventionId());
+ if (namingConvention == null) {
+ throw new Exception("NamingConvention Id '" + getNamingConventionId()
+ + "' is not defined");
+ }
+ return namingConvention.rename(product, productMetadata);
+ } else {
+ return product;
+ }
+ }
+
@Required
public void setMetExtractor(String metExtractor)
throws MetExtractionException, InstantiationException,
@@ -99,4 +110,11 @@ public class MetExtractorProductCrawler
this.preCondIds = preCondIds;
}
+ public void setNamingConventionId(String namingConventionId) {
+ this.namingConventionId = namingConventionId;
+ }
+
+ public String getNamingConventionId() {
+ return namingConventionId;
+ }
}
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/ProductCrawler.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/ProductCrawler.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/ProductCrawler.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/ProductCrawler.java Tue Mar 27 06:49:13 2012
@@ -26,6 +26,8 @@ import org.apache.oodt.cas.filemgr.inges
import org.apache.oodt.cas.filemgr.metadata.CoreMetKeys;
import org.apache.oodt.cas.metadata.Metadata;
+import com.google.common.annotations.VisibleForTesting;
+
//JDK imports
import java.io.File;
import java.io.FileFilter;
@@ -38,269 +40,327 @@ import java.util.logging.Level;
import java.util.logging.Logger;
/**
- * @author mattmann (Chris Mattmann)
- * @author bfoster (Brian Foster)
- * @version $Revision$
- *
- * <p>
* An abstract base class for Product Crawling. This class provides methods to
* communicate with the file manager and parse met files that show how to ingest
* a particular Product into the File Manager.
- * </p>
+ *
+ * @author mattmann (Chris Mattmann)
+ * @author bfoster (Brian Foster)
*/
public abstract class ProductCrawler extends ProductCrawlerBean {
- /* our log stream */
- protected static Logger LOG = Logger.getLogger(ProductCrawler.class
- .getName());
-
- // filter to only find directories when doing a listFiles
- protected static FileFilter DIR_FILTER = new FileFilter() {
- public boolean accept(File file) {
- return file.isDirectory();
- }
- };
-
- // filter to only find product files, not met files
- protected static FileFilter FILE_FILTER = new FileFilter() {
- public boolean accept(File file) {
- return file.isFile();
- }
- };
-
- private CrawlerActionRepo actionRepo;
- private List<IngestStatus> ingestStatus;
- private Ingester ingester;
-
- public void crawl() {
- crawl(new File(this.getProductPath()));
- }
-
- public void crawl(File dirRoot) {
- this.ingestStatus = new Vector<IngestStatus>();
-
- // Load actions
- if (this.getApplicationContext() != null) {
- (this.actionRepo = new CrawlerActionRepo())
- .loadActionsFromBeanFactory(this.getApplicationContext(), this
- .getActionIds());
- validateActions();
- }
-
- // create ingester
- this.ingester = new StdIngester(this.getClientTransferer());
-
- if (dirRoot == null || ((dirRoot != null && !dirRoot.exists())))
- throw new IllegalArgumentException("dir root is null or non existant!");
-
- // start crawling
- Stack<File> stack = new Stack<File>();
- stack.push(dirRoot.isDirectory() ? dirRoot : dirRoot.getParentFile());
- while (!stack.isEmpty()) {
- File dir = (File) stack.pop();
- LOG.log(Level.INFO, "Crawling " + dir);
-
- File[] productFiles = null;
- if (this.isCrawlForDirs()) {
- productFiles = dir.listFiles(DIR_FILTER);
- } else {
- productFiles = dir.listFiles(FILE_FILTER);
- }
-
- for (int j = 0; j < productFiles.length; j++) {
- try {
- this.handleFile(productFiles[j]);
- } catch (Exception e) {
- LOG.log(Level.WARNING, "Failed to process file : "
- + e.getMessage());
- }
+ /* our log stream */
+ protected static Logger LOG = Logger.getLogger(ProductCrawler.class
+ .getName());
+
+ // filter to only find directories when doing a listFiles
+ protected static FileFilter DIR_FILTER = new FileFilter() {
+ public boolean accept(File file) {
+ return file.isDirectory();
+ }
+ };
+
+ // filter to only find product files, not met files
+ protected static FileFilter FILE_FILTER = new FileFilter() {
+ public boolean accept(File file) {
+ return file.isFile();
+ }
+ };
+
+ protected List<IngestStatus> ingestStatus = new Vector<IngestStatus>();
+ protected CrawlerActionRepo actionRepo;
+ protected Ingester ingester;
+
+ public void crawl() {
+ crawl(new File(getProductPath()));
+ }
+
+ public void crawl(File dirRoot) {
+ // Reset ingest status.
+ ingestStatus.clear();
+
+ // Load actions.
+ loadAndValidateActions();
+
+ // Create Ingester.
+ setupIngester();
+
+ // Verify valid crawl directory.
+ if (dirRoot == null || !dirRoot.exists()) {
+ throw new IllegalArgumentException("dir root is null or non existant!");
+ }
+
+ // Start crawling.
+ Stack<File> stack = new Stack<File>();
+ stack.push(dirRoot.isDirectory() ? dirRoot : dirRoot.getParentFile());
+ while (!stack.isEmpty()) {
+ File dir = (File) stack.pop();
+ LOG.log(Level.INFO, "Crawling " + dir);
+
+ File[] productFiles = null;
+ if (isCrawlForDirs()) {
+ productFiles = dir.listFiles(DIR_FILTER);
+ } else {
+ productFiles = dir.listFiles(FILE_FILTER);
+ }
+
+ for (int j = 0; j < productFiles.length; j++) {
+ ingestStatus.add(handleFile(productFiles[j]));
+ }
+
+ if (!isNoRecur()) {
+ File[] subdirs = dir.listFiles(DIR_FILTER);
+ if (subdirs != null) {
+ for (int j = 0; j < subdirs.length; j++) {
+ stack.push(subdirs[j]);
+ }
}
-
- if (!this.isNoRecur()) {
- File[] subdirs = dir.listFiles(DIR_FILTER);
- if (subdirs != null)
- for (int j = 0; j < subdirs.length; j++)
- stack.push(subdirs[j]);
- }
- }
- }
-
- public List<IngestStatus> getIngestStatus() {
- return Collections.unmodifiableList(this.ingestStatus);
- }
-
- private void validateActions() {
- StringBuffer actionErrors = new StringBuffer("");
- for (CrawlerAction action : actionRepo.getActions()) {
- try {
- action.validate();
- } catch (Exception e) {
- actionErrors.append(" " + action.getId() + ": " + e.getMessage() + "\n");
- }
- }
- if (actionErrors.length() > 0) {
- throw new RuntimeException("Actions failed validation:\n" + actionErrors);
- }
- }
-
- private synchronized boolean containsRequiredMetadata(
- Metadata productMetadata) {
- for (int i = 0; i < this.getRequiredMetadata().size(); i++) {
- if (productMetadata.getMetadata((String) this.getRequiredMetadata()
- .get(i)) == null) {
- LOG.log(Level.WARNING, "Missing required metadata field "
- + this.getRequiredMetadata().get(i));
- return false;
- }
- }
- return true;
- }
-
- private void addKnownMetadata(File product, Metadata productMetadata) {
- if (productMetadata.getMetadata(CoreMetKeys.PRODUCT_NAME) == null)
- productMetadata.addMetadata(CoreMetKeys.PRODUCT_NAME, product
- .getName());
- if (productMetadata.getMetadata(CoreMetKeys.FILENAME) == null)
- productMetadata.addMetadata(CoreMetKeys.FILENAME, product
- .getName());
- if (productMetadata.getMetadata(CoreMetKeys.FILE_LOCATION) == null)
- productMetadata.addMetadata(CoreMetKeys.FILE_LOCATION, product
- .getAbsoluteFile().getParentFile().getAbsolutePath());
- }
-
- private void handleFile(final File product) {
- LOG.log(Level.INFO, "Handling file " + product);
- final IngestStatus.Result ingestResult;
- final String message;
- if (this.passesPreconditions(product)) {
- Metadata productMetadata = new Metadata();
- productMetadata.addMetadata(this.getGlobalMetadata().getHashtable());
- productMetadata.addMetadata(this.getMetadataForProduct(product).getHashtable(), true);
- this.addKnownMetadata(product, productMetadata);
-
- boolean isRequiredMetadataPresent = this.containsRequiredMetadata(productMetadata);
- boolean isPreIngestActionsComplete = this.performPreIngestActions(product, productMetadata);
-
- if (this.isSkipIngest()) {
- ingestResult = IngestStatus.Result.SKIPPED;
- message = "Crawler ingest turned OFF";
- LOG.log(Level.INFO, "Skipping ingest of product: ["
- + product.getAbsolutePath() + "]");
- } else {
- if (isRequiredMetadataPresent
- && isPreIngestActionsComplete
- && this.ingest(product, productMetadata)) {
- ingestResult = IngestStatus.Result.SUCCESS;
- message = "Ingest was successful";
- LOG.log(Level.INFO, "Successful ingest of product: ["
- + product.getAbsolutePath() + "]");
- this
- .performPostIngestOnSuccessActions(product,
- productMetadata);
- } else {
- ingestResult = IngestStatus.Result.FAILURE;
- if (!isRequiredMetadataPresent)
- message = "Missing required metadata";
- else if (!isPreIngestActionsComplete)
- message = "PreIngest actions failed to complete";
- else
- message = "Failed to ingest product";
- LOG.log(Level.WARNING, "Failed to ingest product: ["
- + product.getAbsolutePath()
- + "]: performing postIngestFail actions");
- this.performPostIngestOnFailActions(product, productMetadata);
- }
- }
- } else {
- ingestResult = IngestStatus.Result.PRECONDS_FAILED;
- message = "Failed to pass preconditions";
- LOG.log(Level.WARNING,
- "Failed to pass preconditions for ingest of product: ["
- + product.getAbsolutePath() + "]");
- }
- this.ingestStatus.add(new IngestStatus() {
- public File getProduct() {
- return product;
- }
- public Result getResult() {
- return ingestResult;
- }
- public String getMessage() {
- return message;
- }
- });
- }
-
- private boolean ingest(File product, Metadata productMetdata) {
- try {
- LOG.log(Level.INFO, "ProductCrawler: Ready to ingest product: ["
- + product + "]: ProductType: ["
- + productMetdata.getMetadata(PRODUCT_TYPE) + "]");
- String productId = ingester.ingest(new URL(this.getFilemgrUrl()),
- product, productMetdata);
- LOG.log(Level.INFO, "Successfully ingested product: [" + product
- + "]: product id: " + productId);
- } catch (Exception e) {
- LOG.log(Level.WARNING,
- "ProductCrawler: Exception ingesting product: [" + product
- + "]: Message: " + e.getMessage()
- + ": attempting to continue crawling", e);
+ }
+ }
+ }
+
+ public IngestStatus handleFile(File product) {
+ LOG.log(Level.INFO, "Handling file " + product);
+
+ // Check preconditions.
+ if (!passesPreconditions(product)) {
+ LOG.log(Level.WARNING,
+ "Failed to pass preconditions for ingest of product: ["
+ + product.getAbsolutePath() + "]");
+ return createIngestStatus(product,
+ IngestStatus.Result.PRECONDS_FAILED,
+ "Failed to pass preconditions");
+ }
+
+ // Generate Metadata for product.
+ Metadata productMetadata = new Metadata();
+ productMetadata.addMetadata(getGlobalMetadata());
+ try {
+ productMetadata.addMetadata(getMetadataForProduct(product));
+ } catch (Exception e) {
+ LOG.log(Level.SEVERE,
+ "Failed to get metadata for product : " + e.getMessage(), e);
+ performPostIngestOnFailActions(product, productMetadata);
+ return createIngestStatus(product,
+ IngestStatus.Result.FAILURE,
+ "Failed to get metadata for product : " + e.getMessage());
+ }
+
+ // Rename the product.
+ try {
+ product = renameProduct(product, productMetadata);
+ } catch (Exception e) {
+ LOG.log(Level.SEVERE,
+ "Failed to rename product : " + e.getMessage(), e);
+ performPostIngestOnFailActions(product, productMetadata);
+ return createIngestStatus(product, IngestStatus.Result.FAILURE,
+ "Failed to rename product : " + e.getMessage());
+ }
+
+ // Set known metadata if not already specified.
+ addKnownMetadata(product, productMetadata);
+
+ // Check that metadata contains required metadata.
+ if (!containsRequiredMetadata(productMetadata)) {
+ LOG.log(Level.SEVERE, "Missing required metadata for product '"
+ + product + "'");
+ performPostIngestOnFailActions(product, productMetadata);
+ return createIngestStatus(product, IngestStatus.Result.FAILURE,
+ "Missing required metadata");
+ }
+
+ // Run preIngest actions.
+ if (!performPreIngestActions(product, productMetadata)) {
+ performPostIngestOnFailActions(product, productMetadata);
+ return createIngestStatus(product, IngestStatus.Result.FAILURE,
+ "PreIngest actions failed to complete");
+ }
+
+ // Check if ingest has been turned off.
+ if (isSkipIngest()) {
+ LOG.log(Level.INFO, "Skipping ingest of product: ["
+ + product.getAbsolutePath() + "]");
+ return createIngestStatus(product, IngestStatus.Result.SKIPPED,
+ "Crawler ingest turned OFF");
+ }
+
+ // Ingest product.
+ boolean ingestSuccess = ingest(product, productMetadata);
+
+ // On Successful Ingest.
+ if (ingestSuccess) {
+ LOG.log(Level.INFO, "Successful ingest of product: ["
+ + product.getAbsolutePath() + "]");
+ performPostIngestOnSuccessActions(product, productMetadata);
+ return createIngestStatus(product,
+ IngestStatus.Result.SUCCESS, "Ingest was successful");
+
+ // On Failed Ingest.
+ } else {
+ LOG.log(Level.WARNING, "Failed to ingest product: ["
+ + product.getAbsolutePath()
+ + "]: performing postIngestFail actions");
+ performPostIngestOnFailActions(product, productMetadata);
+ return createIngestStatus(product, IngestStatus.Result.FAILURE,
+ "Failed to ingest product");
+ }
+ }
+
+ public List<IngestStatus> getIngestStatus() {
+ return Collections.unmodifiableList(ingestStatus);
+ }
+
+ protected abstract boolean passesPreconditions(File product);
+
+ protected abstract Metadata getMetadataForProduct(File product)
+ throws Exception;
+
+ protected abstract File renameProduct(File product, Metadata productMetadata)
+ throws Exception;
+
+ @VisibleForTesting void setupIngester() {
+ ingester = new StdIngester(getClientTransferer());
+ }
+
+ @VisibleForTesting void loadAndValidateActions() {
+ if (actionRepo == null && getApplicationContext() != null) {
+ actionRepo = new CrawlerActionRepo();
+ actionRepo.loadActionsFromBeanFactory(
+ getApplicationContext(), getActionIds());
+ validateActions();
+ }
+ }
+
+ @VisibleForTesting void validateActions() {
+ StringBuffer actionErrors = new StringBuffer("");
+ for (CrawlerAction action : actionRepo.getActions()) {
+ try {
+ action.validate();
+ } catch (Exception e) {
+ actionErrors.append(" " + action.getId() + ": " + e.getMessage()
+ + "\n");
+ }
+ }
+ if (actionErrors.length() > 0) {
+ throw new RuntimeException("Actions failed validation:\n"
+ + actionErrors);
+ }
+ }
+
+ @VisibleForTesting synchronized boolean containsRequiredMetadata(
+ Metadata productMetadata) {
+ for (String reqMetKey : getRequiredMetadata()) {
+ if (!productMetadata.containsKey(reqMetKey)) {
+ LOG.log(Level.WARNING, "Missing required metadata field "
+ + reqMetKey);
return false;
- }
- return true;
- }
-
- protected abstract boolean passesPreconditions(File product);
-
- protected abstract Metadata getMetadataForProduct(File product);
-
- private boolean performPreIngestActions(File product,
- Metadata productMetadata) {
- if (this.actionRepo != null)
- return this.performProductCrawlerActions(this.actionRepo
- .getPreIngestActions(), product, productMetadata);
- else
- return true;
- }
-
- private boolean performPostIngestOnSuccessActions(File product,
- Metadata productMetadata) {
- if (this.actionRepo != null)
- return this.performProductCrawlerActions(this.actionRepo
- .getPostIngestOnSuccessActions(), product, productMetadata);
- else
- return true;
- }
-
- private boolean performPostIngestOnFailActions(File product,
- Metadata productMetadata) {
- if (this.actionRepo != null)
- return this.performProductCrawlerActions(this.actionRepo
- .getPostIngestOnFailActions(), product, productMetadata);
- else
- return true;
- }
-
- private boolean performProductCrawlerActions(List<CrawlerAction> actions,
- File product, Metadata productMetadata) {
- boolean allSucceeded = true;
- for (CrawlerAction action : actions) {
- try {
- LOG.log(Level.INFO, "Performing action (id = "
- + action.getId() + " : description = "
- + action.getDescription() + ")");
- if (!action.performAction(product,
- productMetadata))
- throw new Exception("Action (id = "
- + action.getId() + " : description = "
- + action.getDescription()
- + ") returned false");
- } catch (Exception e) {
- allSucceeded = false;
- LOG.log(Level.WARNING, "Failed to perform crawler action : "
- + e.getMessage());
+ }
+ }
+ return true;
+ }
+
+ @VisibleForTesting void addKnownMetadata(File product,
+ Metadata productMetadata) {
+ // Add ProductName if not specified.
+ if (productMetadata.getMetadata(CoreMetKeys.PRODUCT_NAME) == null) {
+ productMetadata.addMetadata(
+ CoreMetKeys.PRODUCT_NAME, product.getName());
+ }
+ // Add Filename if not specified.
+ if (productMetadata.getMetadata(CoreMetKeys.FILENAME) == null) {
+ productMetadata.addMetadata(CoreMetKeys.FILENAME, product.getName());
+ }
+ // Add FileLocation if not specified.
+ if (productMetadata.getMetadata(CoreMetKeys.FILE_LOCATION) == null) {
+ productMetadata.addMetadata(CoreMetKeys.FILE_LOCATION, product
+ .getAbsoluteFile().getParentFile().getAbsolutePath());
+ }
+ }
+
+ @VisibleForTesting IngestStatus createIngestStatus(final File product,
+ final IngestStatus.Result result, final String message) {
+ return new IngestStatus() {
+ public File getProduct() {
+ return product;
+ }
+ public Result getResult() {
+ return result;
+ }
+ public String getMessage() {
+ return message;
+ }
+ };
+ }
+
+ @VisibleForTesting boolean ingest(File product, Metadata productMetdata) {
+ try {
+ LOG.log(Level.INFO, "ProductCrawler: Ready to ingest product: ["
+ + product + "]: ProductType: ["
+ + productMetdata.getMetadata(PRODUCT_TYPE) + "]");
+ String productId = ingester.ingest(new URL(getFilemgrUrl()),
+ product, productMetdata);
+ LOG.log(Level.INFO, "Successfully ingested product: [" + product
+ + "]: product id: " + productId);
+ } catch (Exception e) {
+ LOG.log(Level.WARNING,
+ "ProductCrawler: Exception ingesting product: [" + product
+ + "]: Message: " + e.getMessage()
+ + ": attempting to continue crawling", e);
+ return false;
+ }
+ return true;
+ }
+
+ @VisibleForTesting boolean performPreIngestActions(File product,
+ Metadata productMetadata) {
+ if (actionRepo != null) {
+ return performProductCrawlerActions(
+ actionRepo.getPreIngestActions(), product, productMetadata);
+ } else {
+ return true;
+ }
+ }
+
+ @VisibleForTesting boolean performPostIngestOnSuccessActions(File product,
+ Metadata productMetadata) {
+ if (actionRepo != null) {
+ return performProductCrawlerActions(
+ actionRepo.getPostIngestOnSuccessActions(), product,
+ productMetadata);
+ } else {
+ return true;
+ }
+ }
+
+ @VisibleForTesting boolean performPostIngestOnFailActions(File product,
+ Metadata productMetadata) {
+ if (actionRepo != null) {
+ return performProductCrawlerActions(
+ actionRepo.getPostIngestOnFailActions(), product,
+ productMetadata);
+ } else {
+ return true;
+ }
+ }
+
+ @VisibleForTesting boolean performProductCrawlerActions(
+ List<CrawlerAction> actions, File product, Metadata productMetadata) {
+ boolean allSucceeded = true;
+ for (CrawlerAction action : actions) {
+ try {
+ LOG.log(Level.INFO, "Performing action (id = " + action.getId()
+ + " : description = " + action.getDescription() + ")");
+ if (!action.performAction(product, productMetadata)) {
+ throw new Exception("Action (id = " + action.getId()
+ + " : description = " + action.getDescription()
+ + ") returned false");
}
- }
- return allSucceeded;
- }
+ } catch (Exception e) {
+ allSucceeded = false;
+ LOG.log(Level.WARNING,
+ "Failed to perform crawler action : " + e.getMessage(), e);
+ }
+ }
+ return allSucceeded;
+ }
}
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/StdProductCrawler.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/StdProductCrawler.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/StdProductCrawler.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/StdProductCrawler.java Tue Mar 27 06:49:13 2012
@@ -50,16 +50,10 @@ public class StdProductCrawler extends P
}
@Override
- protected Metadata getMetadataForProduct(File product) {
- try {
- MetReaderExtractor extractor = new MetReaderExtractor(
- this.metFileExtension);
- return extractor.extractMetadata(product);
- } catch (Exception e) {
- LOG.log(Level.WARNING, "Failed to get metadata for " + product + " : "
- + e.getMessage());
- return new Metadata();
- }
+ protected Metadata getMetadataForProduct(File product) throws Exception {
+ MetReaderExtractor extractor = new MetReaderExtractor(
+ this.metFileExtension);
+ return extractor.extractMetadata(product);
}
@Override
@@ -68,6 +62,11 @@ public class StdProductCrawler extends P
.exists();
}
+ @Override
+ protected File renameProduct(File product, Metadata productMetadata) {
+ return product;
+ }
+
public void setMetFileExtension(String metFileExtension) {
this.metFileExtension = metFileExtension;
}
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MetExtractorSpec.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MetExtractorSpec.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MetExtractorSpec.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MetExtractorSpec.java Tue Mar 27 06:49:13 2012
@@ -67,7 +67,7 @@ public class MetExtractorSpec {
this.preCondComparatorIds = preCondComparatorIds;
} catch (Exception e) {
throw new InstantiationException(
- "Failed to create MetExtractorSpec object : "
+ "Failed to create MetExtractorSpec object : "
+ e.getMessage());
}
}
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigMetKeys.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigMetKeys.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigMetKeys.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigMetKeys.java Tue Mar 27 06:49:13 2012
@@ -37,6 +37,8 @@ public interface MimeExtractorConfigMetK
public static final String EXTRACTOR_TAG = "extractor";
+ public static final String NAMING_CONVENTION_TAG = "namingConvention";
+
public static final String EXTRACTOR_CLASS_TAG = "extractorClass";
public static final String EXTRACTOR_CONFIG_TAG = "config";
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigReader.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigReader.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigReader.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorConfigReader.java Tue Mar 27 06:49:13 2012
@@ -14,29 +14,30 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
package org.apache.oodt.cas.crawl.typedetection;
//OODT imports
+import org.apache.oodt.cas.metadata.filenaming.NamingConvention;
import org.apache.oodt.cas.metadata.util.PathUtils;
import org.apache.oodt.commons.xml.XMLUtils;
//JDK imports
import java.io.FileInputStream;
import java.util.LinkedList;
+
+//W3C imports
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
+//Google imports
+import com.google.common.base.Strings;
+
/**
- * @author mattmann
- * @author bfoster
- * @version $Revision$
- *
- * <p>
* Static reader class for {@link MimeExtractor}s.
- * </p>.
+ *
+ * @author mattmann (Chris Mattmann)
+ * @author bfoster (Brian Foster)
*/
public final class MimeExtractorConfigReader implements
MimeExtractorConfigMetKeys {
@@ -73,15 +74,23 @@ public final class MimeExtractorConfigRe
for (int k = 0; k < preCondComparators.getLength(); k++)
preCondComparatorIds.add(((Element) preCondComparators
.item(k)).getAttribute(ID_ATTR));
+ // This seems wrong, so added support for CLASS_ATTR while still
+ // supporting EXTRACTOR_CLASS_TAG as an attribute for specifying
+ // extractor class.
+ String extractorClass = extractorElem.getAttribute(CLASS_ATTR);
+ if (Strings.isNullOrEmpty(extractorClass)) {
+ extractorClass = extractorElem.getAttribute(EXTRACTOR_CLASS_TAG);
+ }
defaultExtractorSpecs
- .add(new MetExtractorSpec(extractorElem
- .getAttribute(EXTRACTOR_CLASS_TAG),
+ .add(new MetExtractorSpec(extractorClass,
getFilePathFromElement(extractorElem,
EXTRACTOR_CONFIG_TAG),
preCondComparatorIds));
}
extractorRepo
.setDefaultMetExtractorSpecs(defaultExtractorSpecs);
+ extractorRepo.setDefaultNamingConvention(
+ getNamingConvention(defaultExtractorElem));
}
NodeList mimeElems = root.getElementsByTagName(MIME_TAG);
@@ -90,6 +99,10 @@ public final class MimeExtractorConfigRe
String mimeType = mimeElem.getAttribute(MIME_TYPE_ATTR);
LinkedList<MetExtractorSpec> specs = new LinkedList<MetExtractorSpec>();
+ // Load naming convention class.
+ extractorRepo.setNamingConvention(mimeType,
+ getNamingConvention(mimeElem));
+
NodeList extractorSpecElems = mimeElem
.getElementsByTagName(EXTRACTOR_TAG);
if (extractorSpecElems != null
@@ -132,6 +145,22 @@ public final class MimeExtractorConfigRe
}
}
+ private static NamingConvention getNamingConvention(Element parent)
+ throws Exception {
+ NodeList namingConventions = parent
+ .getElementsByTagName(NAMING_CONVENTION_TAG);
+ if (namingConventions != null && namingConventions.getLength() > 0) {
+ if (namingConventions.getLength() > 1) {
+ throw new Exception("Can only have 1 '"
+ + NAMING_CONVENTION_TAG + "' tag per mimetype");
+ }
+ Element namingConvention = (Element) namingConventions.item(0);
+ return (NamingConvention) Class.forName(
+ namingConvention.getAttribute(CLASS_ATTR)).newInstance();
+ }
+ return null;
+ }
+
private static String getFilePathFromElement(Element root, String elemName) {
String filePath = null;
Element elem = XMLUtils.getFirstElement(elemName, root);
Modified: oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorRepo.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorRepo.java?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorRepo.java (original)
+++ oodt/trunk/crawler/src/main/java/org/apache/oodt/cas/crawl/typedetection/MimeExtractorRepo.java Tue Mar 27 06:49:13 2012
@@ -14,8 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
package org.apache.oodt.cas.crawl.typedetection;
//JDK imports
@@ -23,28 +21,32 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import java.util.Vector;
//OODT imports
+import org.apache.oodt.cas.metadata.filenaming.NamingConvention;
import org.apache.oodt.cas.metadata.util.MimeTypeUtils;
+//Google imports
+import com.google.common.collect.Maps;
+
/**
- * @author mattmann
- * @author bfoster
- * @version $Revision$
+ * Repo of extractor spec and naming conventions mapped by mime-type.
+ *
+ * @author mattmann (Chris Mattmann)
+ * @author bfoster (Brian Foster)
*/
public class MimeExtractorRepo {
private List<MetExtractorSpec> defaultExtractorSpecs;
-
+ private NamingConvention defaultNamingConvention;
private MimeTypeUtils mimeRepo;
-
private boolean magic;
-
- private HashMap<String, List<MetExtractorSpec>> mimeTypeToMetExtractorSpecsMap;
+ private Map<String, List<MetExtractorSpec>> mimeTypeToMetExtractorSpecsMap;
+ private Map<String, NamingConvention> mimeTypeToNamingConventionMap;
/**
* Default Constructor
@@ -53,7 +55,7 @@ public class MimeExtractorRepo {
*
*/
public MimeExtractorRepo() throws FileNotFoundException {
- this(new LinkedList<MetExtractorSpec>(), null, false);
+ this(new LinkedList<MetExtractorSpec>(), null, null, false);
}
/**
@@ -72,11 +74,28 @@ public class MimeExtractorRepo {
* @throws FileNotFoundException
*/
public MimeExtractorRepo(List<MetExtractorSpec> defaultExtractorSpecs,
- String mimeRepoFile, boolean magic) throws FileNotFoundException {
- this.setDefaultMetExtractorSpecs(defaultExtractorSpecs);
- this.setMimeRepoFile(mimeRepoFile);
- this.setMagic(magic);
- this.mimeTypeToMetExtractorSpecsMap = new HashMap<String, List<MetExtractorSpec>>();
+ NamingConvention defaultNamingConvention, String mimeRepoFile,
+ boolean magic) throws FileNotFoundException {
+ setDefaultMetExtractorSpecs(defaultExtractorSpecs);
+ setDefaultNamingConvention(defaultNamingConvention);
+ setMimeRepoFile(mimeRepoFile);
+ setMagic(magic);
+ mimeTypeToMetExtractorSpecsMap = Maps.newHashMap();
+ mimeTypeToNamingConventionMap = Maps.newHashMap();
+ }
+
+ public synchronized void setNamingConvention(String mimeType,
+ NamingConvention namingConvention) {
+ mimeTypeToNamingConventionMap.put(mimeType, namingConvention);
+ }
+
+ public synchronized NamingConvention getNamingConvention(String mimeType) {
+ NamingConvention namingConvention = mimeTypeToNamingConventionMap.get(
+ mimeType);
+ if (namingConvention == null) {
+ return getDefaultNamingConvention();
+ }
+ return namingConvention;
}
public synchronized void addMetExtractorSpec(String mimeType,
@@ -108,7 +127,7 @@ public class MimeExtractorRepo {
extractorSpecs.addAll(specs);
mimeType = this.mimeRepo.getSuperTypeForMimeType(mimeType);
}
- return extractorSpecs != null ? extractorSpecs : this
+ return !extractorSpecs.isEmpty() ? extractorSpecs : this
.getDefaultMetExtractorSpecs();
}
@@ -138,6 +157,15 @@ public class MimeExtractorRepo {
this.defaultExtractorSpecs = defaultExtractorSpecs;
}
+ public void setDefaultNamingConvention(
+ NamingConvention defaultNamingConvention) {
+ this.defaultNamingConvention = defaultNamingConvention;
+ }
+
+ public NamingConvention getDefaultNamingConvention() {
+ return defaultNamingConvention;
+ }
+
/**
* @return the magic
*/
@@ -165,7 +193,11 @@ public class MimeExtractorRepo {
if (mimeRepoFile != null)
this.mimeRepo = new MimeTypeUtils(mimeRepoFile, this.magic);
}
-
+
+ public String getMimeType(File file) {
+ return mimeRepo.getMimeType(file);
+ }
+
/**
* Gets the mime-type hierarchy. Index 0 is this files mime-type,
* index 1 is index 0's mime-type's parent mime-type, and so on.
@@ -174,12 +206,11 @@ public class MimeExtractorRepo {
*/
public List<String> getMimeTypes(File file) {
List<String> mimeTypes = new Vector<String>();
- String mimeType = this.mimeRepo.getMimeType(file);
+ String mimeType = getMimeType(file);
mimeTypes.add(mimeType);
while ((mimeType = this.mimeRepo.getSuperTypeForMimeType(mimeType)) != null
&& !mimeType.equals("application/octet-stream"))
mimeTypes.add(mimeType);
return mimeTypes;
}
-
}
Modified: oodt/trunk/crawler/src/main/resources/cmd-line-options.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/resources/cmd-line-options.xml?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/resources/cmd-line-options.xml (original)
+++ oodt/trunk/crawler/src/main/resources/cmd-line-options.xml Tue Mar 27 06:49:13 2012
@@ -389,6 +389,29 @@
</property>
</bean>
+ <bean id="namingConventionId" class="org.apache.oodt.cas.cli.option.AdvancedCmdLineOption">
+ <property name="shortOption" value="ncid" />
+ <property name="longOption" value="namingConventionId" />
+ <property name="description" value="ID of the NamingConvention to use to rename products before ingest" />
+ <property name="hasArgs" value="true" />
+ <property name="argsDescription" value="NamingConvention ID" />
+ <property name="requirementRules">
+ <list>
+ <bean class="org.apache.oodt.cas.cli.option.require.ActionDependencyRule"
+ p:actionName="launchMetCrawler" p:relation="OPTIONAL" />
+ </list>
+ </property>
+ <property name="handler">
+ <bean class="org.apache.oodt.cas.crawl.cli.option.handler.CrawlerBeansPropHandler">
+ <property name="properties">
+ <list>
+ <value>MetExtractorProductCrawler.namingConventionId</value>
+ </list>
+ </property>
+ </bean>
+ </property>
+ </bean>
+
<bean id="noRecur" class="org.apache.oodt.cas.cli.option.AdvancedCmdLineOption">
<property name="shortOption" value="nr" />
<property name="longOption" value="noRecur" />
Modified: oodt/trunk/crawler/src/main/resources/crawler-config.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/resources/crawler-config.xml?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/resources/crawler-config.xml (original)
+++ oodt/trunk/crawler/src/main/resources/crawler-config.xml Tue Mar 27 06:49:13 2012
@@ -24,5 +24,6 @@ the License.
<import resource="crawler-beans.xml" />
<import resource="action-beans.xml" />
<import resource="precondition-beans.xml" />
+ <import resource="naming-beans.xml" />
</beans>
Modified: oodt/trunk/crawler/src/main/resources/examples/mime-extractor-map.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/resources/examples/mime-extractor-map.xml?rev=1305745&r1=1305744&r2=1305745&view=diff
==============================================================================
--- oodt/trunk/crawler/src/main/resources/examples/mime-extractor-map.xml (original)
+++ oodt/trunk/crawler/src/main/resources/examples/mime-extractor-map.xml Tue Mar 27 06:49:13 2012
@@ -19,7 +19,10 @@ the License.
<!-- extractor specs for mime-type from mimetypes.xml (one or more) -->
<mime type="some/mime-type">
-
+
+ <!-- naming convention for renaming file (zero or one) -->
+ <namingConvention class="naming.convention.class" />
+
<!-- extractor spec for given mime-type (one or more) -->
<extractor class="extractor.class">
Added: oodt/trunk/crawler/src/main/resources/naming-beans.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/main/resources/naming-beans.xml?rev=1305745&view=auto
==============================================================================
--- oodt/trunk/crawler/src/main/resources/naming-beans.xml (added)
+++ oodt/trunk/crawler/src/main/resources/naming-beans.xml Tue Mar 27 06:49:13 2012
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more contributor
+license agreements. See the NOTICE.txt file distributed with this work for
+additional information regarding copyright ownership. The ASF licenses this
+file to you under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License. You may obtain a copy of
+the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations under
+the License.
+-->
+<beans xmlns="http://www.springframework.org/schema/beans"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p"
+ xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd">
+
+ <bean id="ExampleNamingConv" class="org.apache.oodt.cas.metadata.filenaming.PathUtilsNamingConvention">
+ <property name="namingConv" value="[ProductType].[NominalDate].txt" />
+ </bean>
+</beans>
Propchange: oodt/trunk/crawler/src/main/resources/naming-beans.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/StateAwareProductCrawler.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/StateAwareProductCrawler.java?rev=1305745&view=auto
==============================================================================
--- oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/StateAwareProductCrawler.java (added)
+++ oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/StateAwareProductCrawler.java Tue Mar 27 06:49:13 2012
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.oodt.cas.crawl;
+
+//JDK imports
+import java.io.File;
+
+//OODT imports
+import org.apache.oodt.cas.metadata.Metadata;
+
+/**
+ * {@link ProductCrawler} used for testing the appropriated states are
+ * entered for each part of the crawler's workflow.
+ *
+ * @author bfoster (Brian Foster)
+ */
+public class StateAwareProductCrawler extends ProductCrawler{
+
+ private boolean passPreconditions = true;
+ private boolean passExtraction = true;
+ private boolean passRenaming = true;
+ private boolean passRequiredMetadata = true;
+ private boolean passPreIngestActions = true;
+ private boolean passIngest = true;
+
+ private boolean ranPreconditions = false;
+ private boolean ranExtraction = false;
+ private boolean ranRenaming = false;
+ private boolean ranRequiredMetadata = false;
+ private boolean ranPreIngestActions = false;
+ private boolean ranIngest = false;
+ private boolean ranPostIngestSuccessActions = false;
+ private boolean ranPostIngestFailActions = false;
+
+ public void markFailPreconditions() {
+ passPreconditions = false;
+ }
+
+ public void markFailExtraction() {
+ passExtraction = false;
+ }
+
+ public void markFailRenaming() {
+ passRenaming = false;
+ }
+
+ public void markFailRequiredMetadata() {
+ passRequiredMetadata = false;
+ }
+
+ public void markFailPreIngestActions() {
+ passPreIngestActions = false;
+ }
+
+ public void markSkipIngest() {
+ this.setSkipIngest(true);
+ }
+
+ public void markFailIngest() {
+ passIngest = false;
+ }
+
+ public boolean ranPreconditions() {
+ return ranPreconditions;
+ }
+
+ public boolean ranExtraction() {
+ return ranExtraction;
+ }
+
+ public boolean ranRenaming() {
+ return ranRenaming;
+ }
+
+ public boolean ranRequiredMetadata() {
+ return ranRequiredMetadata;
+ }
+
+ public boolean ranPreIngestActions() {
+ return ranPreIngestActions;
+ }
+
+ public boolean ranIngest() {
+ return ranIngest;
+ }
+
+ public boolean ranPostIngestSuccessActions() {
+ return ranPostIngestSuccessActions;
+ }
+
+ public boolean ranPostIngestFailActions() {
+ return ranPostIngestFailActions;
+ }
+
+ @Override
+ protected boolean passesPreconditions(File p) {
+ ranPreconditions = true;
+ return passPreconditions;
+ }
+
+ @Override
+ protected Metadata getMetadataForProduct(File p)
+ throws Exception {
+ ranExtraction = true;
+ if (passExtraction) {
+ return new Metadata();
+ } else {
+ throw new Exception("Failed Extraction");
+ }
+ }
+
+ @Override
+ protected File renameProduct(File p, Metadata m)
+ throws Exception {
+ ranRenaming = true;
+ if (passRenaming) {
+ return p;
+ } else {
+ throw new Exception("Failed Renaming");
+ }
+ }
+
+ @Override
+ boolean containsRequiredMetadata(Metadata m) {
+ ranRequiredMetadata = true;
+ return passRequiredMetadata;
+ }
+
+ @Override
+ boolean performPreIngestActions(File p, Metadata m) {
+ ranPreIngestActions = true;
+ return passPreIngestActions;
+ }
+
+ @Override
+ boolean ingest(File p, Metadata m) {
+ ranIngest = true;
+ return passIngest;
+ }
+
+ @Override
+ boolean performPostIngestOnSuccessActions(File p, Metadata m) {
+ ranPostIngestSuccessActions = true;
+ return true;
+ }
+
+ @Override
+ boolean performPostIngestOnFailActions(File p, Metadata m) {
+ ranPostIngestFailActions = true;
+ return true;
+ }
+}
Propchange: oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/StateAwareProductCrawler.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/TestProductCrawler.java
URL: http://svn.apache.org/viewvc/oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/TestProductCrawler.java?rev=1305745&view=auto
==============================================================================
--- oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/TestProductCrawler.java (added)
+++ oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/TestProductCrawler.java Tue Mar 27 06:49:13 2012
@@ -0,0 +1,625 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.oodt.cas.crawl;
+
+//EasyMock static imports
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.expectLastCall;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
+
+//JDK imports
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Collections;
+
+//OODT imports
+import org.apache.oodt.cas.crawl.action.CrawlerAction;
+import org.apache.oodt.cas.crawl.action.CrawlerActionRepo;
+import org.apache.oodt.cas.crawl.status.IngestStatus;
+import org.apache.oodt.cas.crawl.structs.exceptions.CrawlerActionException;
+import org.apache.oodt.cas.filemgr.datatransfer.LocalDataTransferFactory;
+import org.apache.oodt.cas.filemgr.ingest.Ingester;
+import org.apache.oodt.cas.filemgr.metadata.CoreMetKeys;
+import org.apache.oodt.cas.filemgr.structs.exceptions.IngestException;
+import org.apache.oodt.cas.metadata.Metadata;
+
+//Spring imports
+import org.springframework.context.support.FileSystemXmlApplicationContext;
+
+//Google imports
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+//JUnit imports
+import junit.framework.TestCase;
+
+/**
+ * Test class for {@link ProductCrawler}.
+ *
+ * @author bfoster (Brian Foster)
+ */
+public class TestProductCrawler extends TestCase {
+
+ private static final String CRAWLER_CONFIG =
+ "src/main/resources/crawler-config.xml";
+
+ // Case1:
+ // - Preconditions: fail
+ public void testHandleFileCase1() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markFailPreconditions();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.PRECONDS_FAILED, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertFalse(pc.ranExtraction());
+ assertFalse(pc.ranRenaming());
+ assertFalse(pc.ranRequiredMetadata());
+ assertFalse(pc.ranPreIngestActions());
+ assertFalse(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertFalse(pc.ranPostIngestFailActions());
+ }
+
+ // Case2:
+ // - Preconditions: pass
+ // - FailExtraction: fail
+ public void testHandleFileCase2() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markFailExtraction();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.FAILURE, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertFalse(pc.ranRenaming());
+ assertFalse(pc.ranRequiredMetadata());
+ assertFalse(pc.ranPreIngestActions());
+ assertFalse(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertTrue(pc.ranPostIngestFailActions());
+ }
+
+ // Case3:
+ // - Preconditions: pass
+ // - FailExtraction: pass
+ // - RenameProduct: fail
+ public void testHandleFileCase3() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markFailRenaming();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.FAILURE, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertTrue(pc.ranRenaming());
+ assertFalse(pc.ranRequiredMetadata());
+ assertFalse(pc.ranPreIngestActions());
+ assertFalse(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertTrue(pc.ranPostIngestFailActions());
+ }
+
+ // Case4:
+ // - Preconditions: pass
+ // - FailExtraction: pass
+ // - RenameProduct: pass
+ // - RequiredMetadata: fail
+ public void testHandleFileCase4() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markFailRequiredMetadata();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.FAILURE, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertTrue(pc.ranRenaming());
+ assertTrue(pc.ranRequiredMetadata());
+ assertFalse(pc.ranPreIngestActions());
+ assertFalse(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertTrue(pc.ranPostIngestFailActions());
+ }
+
+ // Case5:
+ // - Preconditions: pass
+ // - FailExtraction: pass
+ // - RenameProduct: pass
+ // - RequiredMetadata: pass
+ // - PreIngestActions: fail
+ public void testHandleFileCase5() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markFailPreIngestActions();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.FAILURE, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertTrue(pc.ranRenaming());
+ assertTrue(pc.ranRequiredMetadata());
+ assertTrue(pc.ranPreIngestActions());
+ assertFalse(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertTrue(pc.ranPostIngestFailActions());
+ }
+
+ // Case6:
+ // - Preconditions: pass
+ // - FailExtraction: pass
+ // - RenameProduct: pass
+ // - RequiredMetadata: pass
+ // - PreIngestActions: pass
+ // - SkipIngest: true
+ public void testHandleFileCase6() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markSkipIngest();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.SKIPPED, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertTrue(pc.ranRenaming());
+ assertTrue(pc.ranRequiredMetadata());
+ assertTrue(pc.ranPreIngestActions());
+ assertFalse(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertFalse(pc.ranPostIngestFailActions());
+ }
+
+ // Case7:
+ // - Preconditions: pass
+ // - FailExtraction: pass
+ // - RenameProduct: pass
+ // - RequiredMetadata: pass
+ // - PreIngestActions: pass
+ // - SkipIngest: false
+ // - Ingest: fail
+ public void testHandleFileCase7() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+ pc.markFailIngest();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.FAILURE, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertTrue(pc.ranRenaming());
+ assertTrue(pc.ranRequiredMetadata());
+ assertTrue(pc.ranPreIngestActions());
+ assertTrue(pc.ranIngest());
+ assertFalse(pc.ranPostIngestSuccessActions());
+ assertTrue(pc.ranPostIngestFailActions());
+ }
+
+ // Case8:
+ // - Preconditions: pass
+ // - FailExtraction: pass
+ // - RenameProduct: pass
+ // - RequiredMetadata: pass
+ // - PreIngestActions: pass
+ // - SkipIngest: false
+ // - Ingest: pass
+ public void testHandleFileCase8() {
+ File p = new File("/tmp/data.dat");
+
+ // Setup Crawler.
+ StateAwareProductCrawler pc = new StateAwareProductCrawler();
+
+ // Run Crawler.
+ IngestStatus status = pc.handleFile(p);
+
+ // Verify IngestStatus.
+ assertEquals(IngestStatus.Result.SUCCESS, status.getResult());
+ assertEquals(p, status.getProduct());
+
+ // Verify correct methods were run.
+ assertTrue(pc.ranPreconditions());
+ assertTrue(pc.ranExtraction());
+ assertTrue(pc.ranRenaming());
+ assertTrue(pc.ranRequiredMetadata());
+ assertTrue(pc.ranPreIngestActions());
+ assertTrue(pc.ranIngest());
+ assertTrue(pc.ranPostIngestSuccessActions());
+ assertFalse(pc.ranPostIngestFailActions());
+ }
+
+ public void testSetupIngester() {
+ ProductCrawler pc = createDummyCrawler();
+ pc.setClientTransferer(LocalDataTransferFactory.class.getCanonicalName());
+ pc.setupIngester();
+ assertNotNull(pc.ingester);
+ }
+
+ public void testLoadAndValidateActions() {
+ ProductCrawler pc = createDummyCrawler();
+ pc.setApplicationContext(new FileSystemXmlApplicationContext(
+ CRAWLER_CONFIG));
+ pc.loadAndValidateActions();
+ assertEquals(0, pc.actionRepo.getActions().size());
+
+ pc = createDummyCrawler();
+ pc.setApplicationContext(new FileSystemXmlApplicationContext(
+ CRAWLER_CONFIG));
+ pc.setActionIds(Lists.newArrayList("Unique", "DeleteDataFile"));
+ pc.loadAndValidateActions();
+ assertEquals(Sets.newHashSet(
+ pc.getApplicationContext().getBean("Unique"),
+ pc.getApplicationContext().getBean("DeleteDataFile")),
+ pc.actionRepo.getActions());
+ }
+
+ public void testValidateActions() throws CrawlerActionException {
+ // Test case invalid action.
+ ProductCrawler pc = createDummyCrawler();
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+
+ CrawlerAction action = createMock(CrawlerAction.class);
+ action.validate();
+ expectLastCall().andThrow(new CrawlerActionException());
+ expect(action.getId()).andReturn("ActionId");
+ replay(action);
+
+ expect(pc.actionRepo.getActions()).andReturn(
+ Sets.newHashSet(action));
+ replay(pc.actionRepo);
+ try {
+ pc.validateActions();
+ fail("Should have thrown RuntimeException");
+ } catch (RuntimeException e) { /* expect throw */ }
+ verify(pc.actionRepo);
+ verify(action);
+
+ // Test case valid action.
+ pc = createDummyCrawler();
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ action = createMock(CrawlerAction.class);
+ expect(pc.actionRepo.getActions()).andReturn(
+ Sets.newHashSet(action));
+ action.validate();
+ replay(pc.actionRepo);
+ replay(action);
+ pc.validateActions();
+ verify(pc.actionRepo);
+ verify(action);
+ }
+
+ public void testContainsRequiredMetadata() {
+ ProductCrawler pc = createDummyCrawler();
+ Metadata m = new Metadata();
+ m.replaceMetadata(CoreMetKeys.PRODUCT_TYPE, "GenericFile");
+ m.replaceMetadata(CoreMetKeys.FILENAME, "TestFile.txt");
+ m.replaceMetadata(CoreMetKeys.FILE_LOCATION, "/tmp/dir");
+ assertTrue(pc.containsRequiredMetadata(m));
+ assertFalse(pc.containsRequiredMetadata(new Metadata()));
+ }
+
+ public void testAddKnowMetadata() {
+ File p = new File("/tmp/data.dat");
+ Metadata m = new Metadata();
+ ProductCrawler pc = createDummyCrawler();
+ pc.addKnownMetadata(p, m);
+ assertEquals(3, m.getAllKeys().size());
+ assertEquals(p.getName(), m.getMetadata(CoreMetKeys.PRODUCT_NAME));
+ assertEquals(p.getName(), m.getMetadata(CoreMetKeys.FILENAME));
+ assertEquals(p.getParentFile().getAbsolutePath(),
+ m.getMetadata(CoreMetKeys.FILE_LOCATION));
+ }
+
+ public void testCreateIngestStatus() {
+ File p = new File("/tmp/data.dat");
+ IngestStatus.Result result = IngestStatus.Result.SUCCESS;
+ String message = "Ingest OK";
+ ProductCrawler pc = createDummyCrawler();
+ IngestStatus status = pc.createIngestStatus(p, result, message);
+ assertEquals(p, status.getProduct());
+ assertEquals(result, status.getResult());
+ assertEquals(message, status.getMessage());
+ }
+
+ public void testIngest() throws MalformedURLException, IngestException {
+ File p = new File("/tmp/data.dat");
+ Metadata m = new Metadata();
+
+ // Test successful ingest.
+ ProductCrawler pc = createDummyCrawler();
+ pc.setFilemgrUrl("http://localhost:9000");
+ pc.ingester = createMock(Ingester.class);
+ expect(pc.ingester.ingest(new URL("http://localhost:9000"), p, m))
+ .andReturn("TestProductId");
+ replay(pc.ingester);
+ assertTrue(pc.ingest(p, m));
+ verify(pc.ingester);
+
+ // Test failed ingest.
+ pc = createDummyCrawler();
+ pc.setFilemgrUrl("http://localhost:9000");
+ pc.ingester = createMock(Ingester.class);
+ expect(pc.ingester.ingest(new URL("http://localhost:9000"), p, m))
+ .andThrow(new IngestException());
+ replay(pc.ingester);
+ assertFalse(pc.ingest(p, m));
+ verify(pc.ingester);
+ }
+
+ public void testPerformPreIngestActions() throws CrawlerActionException {
+ ProductCrawler pc = createDummyCrawler();
+ File p = new File("/tmp/data.dat");
+ Metadata m = new Metadata();
+
+ // Test actionRepo == null.
+ assertTrue(pc.performPreIngestActions(p, m));
+
+ // Test actionRepo != null and performAction return true.
+ CrawlerAction action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(true);
+ replay(action);
+
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ expect(pc.actionRepo.getPreIngestActions())
+ .andReturn(Lists.newArrayList(action));
+ replay(pc.actionRepo);
+
+ assertTrue(pc.performPreIngestActions(p, m));
+ verify(action);
+ verify(pc.actionRepo);
+
+ // Test actionRepo != null and performAction return false.
+ action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(false);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ replay(action);
+
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ expect(pc.actionRepo.getPreIngestActions())
+ .andReturn(Lists.newArrayList(action));
+ replay(pc.actionRepo);
+
+ assertFalse(pc.performPreIngestActions(p, m));
+ verify(action);
+ verify(pc.actionRepo);
+ }
+
+ public void testPerformPostIngestOnSuccessActions() throws CrawlerActionException {
+ ProductCrawler pc = createDummyCrawler();
+ File p = new File("/tmp/data.dat");
+ Metadata m = new Metadata();
+
+ // Test actionRepo == null.
+ assertTrue(pc.performPostIngestOnSuccessActions(p, m));
+
+ // Test actionRepo != null and performAction return true.
+ CrawlerAction action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(true);
+ replay(action);
+
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ expect(pc.actionRepo.getPostIngestOnSuccessActions())
+ .andReturn(Lists.newArrayList(action));
+ replay(pc.actionRepo);
+
+ assertTrue(pc.performPostIngestOnSuccessActions(p, m));
+ verify(action);
+ verify(pc.actionRepo);
+
+ // Test actionRepo != null and performAction return false.
+ action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(false);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ replay(action);
+
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ expect(pc.actionRepo.getPostIngestOnSuccessActions())
+ .andReturn(Lists.newArrayList(action));
+ replay(pc.actionRepo);
+
+ assertFalse(pc.performPostIngestOnSuccessActions(p, m));
+ verify(action);
+ verify(pc.actionRepo);
+ }
+
+ public void testPerformPostIngestOnFailActions() throws CrawlerActionException {
+ ProductCrawler pc = createDummyCrawler();
+ File p = new File("/tmp/data.dat");
+ Metadata m = new Metadata();
+
+ // Test actionRepo == null.
+ assertTrue(pc.performPostIngestOnFailActions(p, m));
+
+ // Test actionRepo != null and performAction return true.
+ CrawlerAction action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(true);
+ replay(action);
+
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ expect(pc.actionRepo.getPostIngestOnFailActions())
+ .andReturn(Lists.newArrayList(action));
+ replay(pc.actionRepo);
+
+ assertTrue(pc.performPostIngestOnFailActions(p, m));
+ verify(action);
+ verify(pc.actionRepo);
+
+ // Test actionRepo != null and performAction return false.
+ action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(false);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ replay(action);
+
+ pc.actionRepo = createMock(CrawlerActionRepo.class);
+ expect(pc.actionRepo.getPostIngestOnFailActions())
+ .andReturn(Lists.newArrayList(action));
+ replay(pc.actionRepo);
+
+ assertFalse(pc.performPostIngestOnFailActions(p, m));
+ verify(action);
+ verify(pc.actionRepo);
+ }
+
+ public void testPerformProductCrawlerActions() throws CrawlerActionException {
+ ProductCrawler pc = createDummyCrawler();
+ File p = new File("/tmp/data.dat");
+ Metadata m = new Metadata();
+
+ // Test no actions.
+ assertTrue(pc.performProductCrawlerActions(
+ Collections.<CrawlerAction>emptyList(), p, m));
+
+ // Test 1 action pass.
+ CrawlerAction action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(true);
+ replay(action);
+ assertTrue(pc.performProductCrawlerActions(
+ Lists.newArrayList(action), p, m));
+ verify(action);
+
+ // Test 1 action fail.
+ action = createMock(CrawlerAction.class);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ expect(action.performAction(p, m)).andReturn(false);
+ expect(action.getId()).andReturn("ActionId");
+ expect(action.getDescription()).andReturn("Action Description");
+ replay(action);
+ assertFalse(pc.performProductCrawlerActions(
+ Lists.newArrayList(action), p, m));
+ verify(action);
+
+ // Test 1 action pass and 1 action fail.
+ CrawlerAction passAction = createMock(CrawlerAction.class);
+ expect(passAction.getId()).andReturn("ActionId");
+ expect(passAction.getDescription()).andReturn("Action Description");
+ expect(passAction.performAction(p, m)).andReturn(true);
+ replay(passAction);
+ CrawlerAction failAction = createMock(CrawlerAction.class);
+ expect(failAction.getId()).andReturn("ActionId");
+ expect(failAction.getDescription()).andReturn("Action Description");
+ expect(failAction.performAction(p, m)).andReturn(false);
+ expect(failAction.getId()).andReturn("ActionId");
+ expect(failAction.getDescription()).andReturn("Action Description");
+ replay(failAction);
+ assertFalse(pc.performProductCrawlerActions(
+ Lists.newArrayList(passAction, failAction), p, m));
+ verify(passAction);
+ verify(failAction);
+ }
+
+ private static ProductCrawler createDummyCrawler() {
+ return createDummyCrawler(true, new Metadata(), null);
+ }
+
+ private static ProductCrawler createDummyCrawler(
+ final boolean passesPreconditions, final Metadata productMetadata,
+ final File renamedFile) {
+ return new ProductCrawler() {
+ @Override
+ protected boolean passesPreconditions(File product) {
+ return passesPreconditions;
+ }
+ @Override
+ protected Metadata getMetadataForProduct(File product) {
+ return productMetadata;
+ }
+ @Override
+ protected File renameProduct(File product, Metadata productMetadata)
+ throws Exception {
+ return renamedFile == null ? product : renamedFile;
+ }
+ };
+ }
+}
Propchange: oodt/trunk/crawler/src/test/org/apache/oodt/cas/crawl/TestProductCrawler.java
------------------------------------------------------------------------------
svn:mime-type = text/plain