You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/06/19 00:54:13 UTC

svn commit: r1603663 - in /manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika: TikaConfig.java TikaExtractor.java

Author: kwright
Date: Wed Jun 18 22:54:13 2014
New Revision: 1603663

URL: http://svn.apache.org/r1603663
Log:
Add specification support class

Added:
    manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java   (with props)
Modified:
    manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java

Added: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java?rev=1603663&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java (added)
+++ manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java Wed Jun 18 22:54:13 2014
@@ -0,0 +1,36 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.agents.transformation.tika;
+
+/** Parameters for AmazonCloudSearch output connector.
+ */
+public class TikaConfig {
+
+  // Configuration parameters
+  // None
+
+  // Specification nodes and values
+  public static final String NODE_FIELDMAP = "fieldmap";
+  public static final String NODE_KEEPMETADATA = "keepAllMetadata";
+  public static final String ATTRIBUTE_SOURCE = "source";
+  public static final String ATTRIBUTE_TARGET = "target";
+  public static final String ATTRIBUTE_VALUE = "value";
+  
+}

Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java?rev=1603663&r1=1603662&r2=1603663&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java (original)
+++ manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java Wed Jun 18 22:54:13 2014
@@ -363,7 +363,96 @@ public class TikaExtractor extends org.a
     }
 
   }
-  
+
+  protected static class SpecPacker {
+    
+    private final Map<String,String> sourceTargets = new HashMap<String,String>();
+    private final boolean keepAllMetadata;
+    
+    public SpecPacker(Specification os) {
+      boolean keepAllMetadata = true;
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+        
+        if(sn.getType().equals(TikaConfig.NODE_KEEPMETADATA)) {
+          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
+          keepAllMetadata = Boolean.parseBoolean(value);
+        } else if (sn.getType().equals(TikaConfig.NODE_FIELDMAP)) {
+          String source = sn.getAttributeValue(TikaConfig.ATTRIBUTE_SOURCE);
+          String target = sn.getAttributeValue(TikaConfig.ATTRIBUTE_TARGET);
+          
+          if (target == null) {
+            target = "";
+          }
+          sourceTargets.put(source, target);
+        }
+      }
+      this.keepAllMetadata = keepAllMetadata;
+    }
+    
+    public SpecPacker(String packedString) {
+      
+      int index = 0;
+      
+      // Mappings
+      final List<String> packedMappings = new ArrayList<String>();
+      index = unpackList(packedMappings,packedString,index,'+');
+      String[] fixedList = new String[2];
+      for (String packedMapping : packedMappings) {
+        unpackFixedList(fixedList,packedMapping,0,':');
+        sourceTargets.put(fixedList[0], fixedList[1]);
+      }
+      
+      // Keep all metadata
+      if (packedString.length() > index)
+        keepAllMetadata = (packedString.charAt(index++) == '+');
+      else
+        keepAllMetadata = true;
+      
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      int i;
+      
+      // Mappings
+      final String[] sortArray = new String[sourceTargets.size()];
+      i = 0;
+      for (String source : sourceTargets.keySet()) {
+        sortArray[i++] = source;
+      }
+      java.util.Arrays.sort(sortArray);
+      
+      List<String> packedMappings = new ArrayList<String>();
+      String[] fixedList = new String[2];
+      for (String source : sortArray) {
+        String target = sourceTargets.get(source);
+        StringBuilder localBuffer = new StringBuilder();
+        fixedList[0] = source;
+        fixedList[1] = target;
+        packFixedList(localBuffer,fixedList,':');
+        packedMappings.add(localBuffer.toString());
+      }
+      packList(sb,packedMappings,'+');
+
+      // Keep all metadata
+      if (keepAllMetadata)
+        sb.append('+');
+      else
+        sb.append('-');
+      
+      return sb.toString();
+    }
+    
+    public String getMapping(String source) {
+      return sourceTargets.get(source);
+    }
+    
+    public boolean keepAllMetadata() {
+      return keepAllMetadata;
+    }
+  }
+
 }