You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by ma...@apache.org on 2013/03/15 04:52:24 UTC

svn commit: r1456758 - in /oodt/trunk: CHANGES.txt filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java filemgr/src/main/resources/examples/core/product-types.xml

Author: mattmann
Date: Fri Mar 15 03:52:23 2013
New Revision: 1456758

URL: http://svn.apache.org/r1456758
Log:
- fix for OODT-575 Metadata extractor for parsing filename based on regex contributed by Nga Chung.

Added:
    oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java
Modified:
    oodt/trunk/CHANGES.txt
    oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml

Modified: oodt/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1456758&r1=1456757&r2=1456758&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Fri Mar 15 03:52:23 2013
@@ -2,11 +2,15 @@ Apache OODT Change Log
 ======================
 Release 0.6 - Current Development
 --------------------------------------------
+
+* OODT-575 Metadata extractor for parsing filename based on regex 
+  (Nga Chung via mattmann)
+
 * OODT-573 Refactored the return statement in the getTopNProducts method in the
-LuceneCatalog class (rlaidlaw)
+  LuceneCatalog class (rlaidlaw)
 
 * OODT-571 Updated assignments in setWorkflowInst and setWaitforConditionSatisfy
-methods in IterativeWorkflowProcessorThread class (rlaidlaw)
+  methods in IterativeWorkflowProcessorThread class (rlaidlaw)
 
 * OODT-574 RADiX POM Parent References (Arni Sumarlidason via mattmann)
 

Added: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java?rev=1456758&view=auto
==============================================================================
--- oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java (added)
+++ oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java Fri Mar 15 03:52:23 2013
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.cas.filemgr.metadata.extractors.examples;
+
+//JDK imports
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+//OODT imports
+import org.apache.oodt.cas.filemgr.metadata.extractors.AbstractFilemgrMetExtractor;
+import org.apache.oodt.cas.filemgr.structs.Product;
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.cas.metadata.exceptions.MetExtractionException;
+
+/**
+ * @author nchung
+ * @version $Revision$
+ * 
+ * <p>
+ * Extracts {@link Metadata} from a {@link Product} filename that matches 
+ * a provided regular expression.
+ * </p>.
+ */
+public class FilenameRegexMetExtractor extends AbstractFilemgrMetExtractor {
+
+   private String filenamePattern;
+   private List<String> metadataKeys;
+
+   public void doConfigure() {
+      if (this.configuration != null) {
+         this.filenamePattern = this.configuration
+               .getProperty("filenamePattern");
+         this.metadataKeys = Arrays.asList(this.configuration.getProperty(
+               "metadataKeys").split(","));
+      }
+   }
+
+   public Metadata doExtract(Product product, Metadata met)
+         throws MetExtractionException {
+      Metadata extractMet = new Metadata();
+      merge(met, extractMet);
+      
+      Pattern pattern = Pattern.compile(this.filenamePattern);
+      Matcher matcher = pattern.matcher(getProductFile(product).getName());
+      if (matcher.matches()) {
+         for (int i = 0; i < this.metadataKeys.size(); i++) {
+            String key = this.metadataKeys.get(i);
+            String value = matcher.group(i + 1);
+            extractMet.addMetadata(key, value);
+         }
+      } else {
+         throw new MetExtractionException("Filename does not conform to the pattern "
+               + this.filenamePattern);
+      }
+      return extractMet;
+   }
+}

Modified: oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml?rev=1456758&r1=1456757&r2=1456758&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml (original)
+++ oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml Fri Mar 15 03:52:23 2013
@@ -61,6 +61,27 @@
            <property name="replace" value="true"/>
         </configuration>
       </extractor>
+      
+      <!-- 
+        The below enables the FilenameRegexMetExtractor.
+        It allows a user to specify a filename pattern with Java Regex that identifies groups 
+        within a particular file name. Those groups are then mapped to Metadata key names, and 
+        extracted as metadta from a file's name.
+        
+        In the example below, there are two groups, Filename, and ProductId, for files
+        of the form:
+        
+        Filename_ProductId.txt
+        
+        <extractor class="org.apache.oodt.cas.filemgr.metadata.extrctors.examples.FilenameRegexMetExtractor">
+          <configuration>
+            <property name="filenamePattern" value="(\\w*)_(\\d*)\\.txt"/>
+            <property name="metadataKeys" value="Filename,ProductId"/>
+          </configuration>
+        
+        </extractor>
+      
+       -->
     </metExtractors>
   </type>
 </cas:producttypes>