You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by lu...@apache.org on 2012/11/07 16:13:51 UTC
svn commit: r1406666 - in /oodt/trunk/filemgr/src/main:
java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java
java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java
resources/filemgr.properties
Author: luca
Date: Wed Nov 7 15:13:51 2012
New Revision: 1406666
URL: http://svn.apache.org/viewvc?rev=1406666&view=rev
Log:
Allowing for "lenient" LuceneCatlog implementation (OODT-529)
Also inserting better error message when product by id is not found (OODT-534)
Modified:
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java
oodt/trunk/filemgr/src/main/resources/filemgr.properties
Modified: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java?rev=1406666&r1=1406665&r2=1406666&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java (original)
+++ oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalog.java Wed Nov 7 15:13:51 2012
@@ -20,6 +20,8 @@ package org.apache.oodt.cas.filemgr.cata
//JDK imports
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -27,6 +29,23 @@ import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;
+//Lucene imports
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.RangeQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+
//OODT imports
import org.apache.oodt.cas.filemgr.structs.BooleanQueryCriteria;
import org.apache.oodt.cas.filemgr.structs.Element;
@@ -40,34 +59,18 @@ import org.apache.oodt.cas.filemgr.struc
import org.apache.oodt.cas.filemgr.structs.TermQueryCriteria;
import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException;
import org.apache.oodt.cas.filemgr.structs.exceptions.ValidationLayerException;
-import org.apache.oodt.commons.pagination.PaginationUtils;
import org.apache.oodt.cas.filemgr.validation.ValidationLayer;
import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.commons.pagination.PaginationUtils;
//JUG imports
import org.safehaus.uuid.UUID;
import org.safehaus.uuid.UUIDGenerator;
-//Lucene imports
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.RangeQuery;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.WildcardQuery;
-
/**
* @author mattmann
* @author bfoster
+ * @author luca
* @version $Revision$
*
* <p>
@@ -180,7 +183,7 @@ public class LuceneCatalog implements Ca
public synchronized void removeMetadata(Metadata m, Product product)
throws CatalogException {
CompleteProduct p = CATALOG_CACHE.get(product.getProductId());
-
+
if (p == null) {
// not in local cache, get doc and rewrite index
String prodId = product.getProductId();
@@ -198,21 +201,28 @@ public class LuceneCatalog implements Ca
}
Metadata currMet = p.getMetadata();
- List<Element> metadataTypes = null;
+ List<String> metadataTypes = new ArrayList<String>();
- try {
- metadataTypes = valLayer.getElements(product.getProductType());
- } catch (ValidationLayerException e) {
- e.printStackTrace();
- throw new CatalogException(
- "ValidationLayerException when trying to obtain element list for product type: "
- + product.getProductType().getName()
- + ": Message: " + e.getMessage());
+ if (valLayer!=null) {
+ try {
+ // remove metadata elements specified by validation layer
+ for (Element element : valLayer.getElements(product.getProductType())) {
+ metadataTypes.add(element.getElementName());
+ }
+ } catch (ValidationLayerException e) {
+ e.printStackTrace();
+ throw new CatalogException(
+ "ValidationLayerException when trying to obtain element list for product type: "
+ + product.getProductType().getName()
+ + ": Message: " + e.getMessage());
+ }
+ } else {
+ // remove all metadata
+ metadataTypes = currMet.getAllKeys();
}
- for (Iterator<Element> i = metadataTypes.iterator(); i.hasNext();) {
- Element element = i.next();
- currMet.removeMetadata(element.getElementName());
+ for (String name : metadataTypes) {
+ currMet.removeMetadata(name);
}
p.setMetadata(currMet);
@@ -380,9 +390,10 @@ public class LuceneCatalog implements Ca
Hits hits = searcher.search(query);
// should be exactly 1 hit
- if (hits.length() != 1) {
- throw new CatalogException("Product: [" + productId
- + "] is not unique in the catalog!");
+ if (hits.length() == 0) {
+ throw new CatalogException("Product: [" + productId + "] NOT found in the catalog!");
+ } else if (hits.length() > 1) {
+ throw new CatalogException("Product: [" + productId+ "] is not unique in the catalog!");
}
Document productDoc = hits.doc(0);
@@ -1017,31 +1028,43 @@ public class LuceneCatalog implements Ca
product.setProductType(type);
if (getMetadata) {
- List<Element> elements = null;
+ List<String> names = new ArrayList<String>();
- try {
- elements = valLayer.getElements(type);
- } catch (ValidationLayerException e) {
- LOG.log(Level.WARNING,
- "Unable to obtain metadata for product: ["
- + product.getProductName() + "]: Message: "
- + e.getMessage());
- }
-
- if (elements != null) {
- for (Iterator<Element> i = elements.iterator(); i.hasNext();) {
- Element element = (Element) i.next();
-
- String[] elemValues = doc.getValues(element
- .getElementName());
-
- if (elemValues != null && elemValues.length > 0) {
- for (int j = 0; j < elemValues.length; j++) {
- metadata.addMetadata(element.getElementName(),
- elemValues[j]);
- }
- }
- }
+ if (valLayer!=null) {
+ // only add metadata elements specified by validation layer
+ try {
+ for (Element element : valLayer.getElements(type)) {
+ names.add(element.getElementName());
+ }
+ } catch (ValidationLayerException e) {
+ LOG.log(Level.WARNING,
+ "Unable to obtain metadata for product: ["
+ + product.getProductName() + "]: Message: "
+ + e.getMessage());
+ }
+ } else {
+ // add all metadata elements found in document
+ Enumeration<Field> fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = fields.nextElement();
+ if (!names.contains(field.name())) {
+ names.add(field.name());
+ }
+ }
+
+ }
+
+ // loop over field names to add to metadata
+ for (String name : names) {
+ if (metadata.getAllMetadata(name)==null || metadata.getAllMetadata(name).size()==0) {
+ String[] elemValues = doc.getValues(name);
+
+ if (elemValues != null && elemValues.length > 0) {
+ for (int j = 0; j < elemValues.length; j++) {
+ metadata.addMetadata(name, elemValues[j]);
+ }
+ }
+ }
}
completeProduct.setMetadata(metadata);
@@ -1114,13 +1137,30 @@ public class LuceneCatalog implements Ca
doc.add(new Field("product_type_versioner", product.getProductType()
.getVersioner() != null ? product.getProductType()
.getVersioner() : "", Field.Store.YES, Field.Index.NO));
+
+ // write metadata fields to the Lucene document
+ List<String> keys = new ArrayList<String>();
+ // validation layer: add only specifically configured keys
+ if (valLayer!=null) {
+ List<Element> elements = quietGetElements(product.getProductType());
+ for (Iterator<Element> i = elements.iterator(); i.hasNext();) {
+ Element element = i.next();
+ String key = element.getElementName();
+ keys.add(key);
+ }
+ // no validation layer: add all keys that are NOT already in doc
+ // (otherwise some keys such as the product_* keys are duplicated)
+ } else {
+ for (String key : metadata.getAllKeys()) {
+ if (doc.getField(key)==null) {
+ keys.add(key);
+ }
+ }
+ }
- List<Element> elements = quietGetElements(product.getProductType());
- for (Iterator<Element> i = elements.iterator(); i.hasNext();) {
- Element element = i.next();
- String key = element.getElementName();
- List<String> values = metadata.getAllMetadata(key);
+ for (String key : keys) {
+ List<String> values = metadata.getAllMetadata(key);
if (values == null) {
LOG
Modified: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java?rev=1406666&r1=1406665&r2=1406666&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java (original)
+++ oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/catalog/LuceneCatalogFactory.java Wed Nov 7 15:13:51 2012
@@ -59,6 +59,10 @@ public class LuceneCatalogFactory implem
/* the merge factor */
private int mergeFactor = -1;
+ /* Whether or not to enforce strict definition of metadata fields:
+ * 'lenient=false' means that all metadata fields need to be explicitly defined in the XML configuration file */
+ private boolean lenientFields = false;
+
/* our log stream */
private static final Logger LOG = Logger.getLogger(LuceneCatalogFactory.class.getName());
@@ -78,11 +82,15 @@ public class LuceneCatalogFactory implem
//do env var replacement
indexFilePath = PathUtils.replaceEnvVariables(indexFilePath);
- String validationLayerFactoryClass = System
+ // instantiate validation layer, unless catalog is explicitly configured for lenient fields
+ lenientFields = Boolean.parseBoolean( System.getProperty("org.apache.oodt.cas.filemgr.catalog.lucene.lenientFields", "false") );
+ if (!lenientFields) {
+ String validationLayerFactoryClass = System
.getProperty("filemgr.validationLayer.factory",
"org.apache.oodt.cas.filemgr.validation.XMLValidationLayerFactory");
- validationLayer = GenericFileManagerObjectFactory
+ validationLayer = GenericFileManagerObjectFactory
.getValidationLayerFromFactory(validationLayerFactoryClass);
+ }
pageSize = Integer.getInteger("org.apache.oodt.cas.filemgr.catalog.lucene.pageSize", 20).intValue();
Modified: oodt/trunk/filemgr/src/main/resources/filemgr.properties
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/resources/filemgr.properties?rev=1406666&r1=1406665&r2=1406666&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/resources/filemgr.properties (original)
+++ oodt/trunk/filemgr/src/main/resources/filemgr.properties Wed Nov 7 15:13:51 2012
@@ -76,6 +76,11 @@ org.apache.oodt.cas.filemgr.repositorymg
# XML validation layer configuration
org.apache.oodt.cas.filemgr.validation.dirs=file:///dir1,file:///dir2
+# set the following property to 'true' to allow dynamic metadata fields,
+# effectively bypassing the validation layer.
+# by default the property is false
+#org.apache.oodt.cas.filemgr.catalog.lucene.lenientFields=true
+
# data source validation layer configuration
org.apache.oodt.cas.filemgr.validation.datasource.jdbc.url=some_jdbc_url
org.apache.oodt.cas.filemgr.validation.datasource.jdbc.user=user