You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by ma...@apache.org on 2013/03/15 04:52:24 UTC
svn commit: r1456758 - in /oodt/trunk: CHANGES.txt
filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java
filemgr/src/main/resources/examples/core/product-types.xml
Author: mattmann
Date: Fri Mar 15 03:52:23 2013
New Revision: 1456758
URL: http://svn.apache.org/r1456758
Log:
- fix for OODT-575 Metadata extractor for parsing filename based on regex contributed by Nga Chung.
Added:
oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java
Modified:
oodt/trunk/CHANGES.txt
oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml
Modified: oodt/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/oodt/trunk/CHANGES.txt?rev=1456758&r1=1456757&r2=1456758&view=diff
==============================================================================
--- oodt/trunk/CHANGES.txt (original)
+++ oodt/trunk/CHANGES.txt Fri Mar 15 03:52:23 2013
@@ -2,11 +2,15 @@ Apache OODT Change Log
======================
Release 0.6 - Current Development
--------------------------------------------
+
+* OODT-575 Metadata extractor for parsing filename based on regex
+ (Nga Chung via mattmann)
+
* OODT-573 Refactored the return statement in the getTopNProducts method in the
-LuceneCatalog class (rlaidlaw)
+ LuceneCatalog class (rlaidlaw)
* OODT-571 Updated assignments in setWorkflowInst and setWaitforConditionSatisfy
-methods in IterativeWorkflowProcessorThread class (rlaidlaw)
+ methods in IterativeWorkflowProcessorThread class (rlaidlaw)
* OODT-574 RADiX POM Parent References (Arni Sumarlidason via mattmann)
Added: oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java?rev=1456758&view=auto
==============================================================================
--- oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java (added)
+++ oodt/trunk/filemgr/src/main/java/org/apache/oodt/cas/filemgr/metadata/extractors/examples/FilenameRegexMetExtractor.java Fri Mar 15 03:52:23 2013
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oodt.cas.filemgr.metadata.extractors.examples;
+
+//JDK imports
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+//OODT imports
+import org.apache.oodt.cas.filemgr.metadata.extractors.AbstractFilemgrMetExtractor;
+import org.apache.oodt.cas.filemgr.structs.Product;
+import org.apache.oodt.cas.metadata.Metadata;
+import org.apache.oodt.cas.metadata.exceptions.MetExtractionException;
+
+/**
+ * @author nchung
+ * @version $Revision$
+ *
+ * <p>
+ * Extracts {@link Metadata} from a {@link Product} filename that matches
+ * a provided regular expression.
+ * </p>.
+ */
+public class FilenameRegexMetExtractor extends AbstractFilemgrMetExtractor {
+
+ private String filenamePattern;
+ private List<String> metadataKeys;
+
+ public void doConfigure() {
+ if (this.configuration != null) {
+ this.filenamePattern = this.configuration
+ .getProperty("filenamePattern");
+ this.metadataKeys = Arrays.asList(this.configuration.getProperty(
+ "metadataKeys").split(","));
+ }
+ }
+
+ public Metadata doExtract(Product product, Metadata met)
+ throws MetExtractionException {
+ Metadata extractMet = new Metadata();
+ merge(met, extractMet);
+
+ Pattern pattern = Pattern.compile(this.filenamePattern);
+ Matcher matcher = pattern.matcher(getProductFile(product).getName());
+ if (matcher.matches()) {
+ for (int i = 0; i < this.metadataKeys.size(); i++) {
+ String key = this.metadataKeys.get(i);
+ String value = matcher.group(i + 1);
+ extractMet.addMetadata(key, value);
+ }
+ } else {
+ throw new MetExtractionException("Filename does not conform to the pattern "
+ + this.filenamePattern);
+ }
+ return extractMet;
+ }
+}
Modified: oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml
URL: http://svn.apache.org/viewvc/oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml?rev=1456758&r1=1456757&r2=1456758&view=diff
==============================================================================
--- oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml (original)
+++ oodt/trunk/filemgr/src/main/resources/examples/core/product-types.xml Fri Mar 15 03:52:23 2013
@@ -61,6 +61,27 @@
<property name="replace" value="true"/>
</configuration>
</extractor>
+
+ <!--
+ The below enables the FilenameRegexMetExtractor.
+ It allows a user to specify a filename pattern with Java Regex that identifies groups
+ within a particular file name. Those groups are then mapped to Metadata key names, and
+ extracted as metadta from a file's name.
+
+ In the example below, there are two groups, Filename, and ProductId, for files
+ of the form:
+
+ Filename_ProductId.txt
+
+ <extractor class="org.apache.oodt.cas.filemgr.metadata.extrctors.examples.FilenameRegexMetExtractor">
+ <configuration>
+ <property name="filenamePattern" value="(\\w*)_(\\d*)\\.txt"/>
+ <property name="metadataKeys" value="Filename,ProductId"/>
+ </configuration>
+
+ </extractor>
+
+ -->
</metExtractors>
</type>
</cas:producttypes>