You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/06/19 00:54:13 UTC
svn commit: r1603663 - in
/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika:
TikaConfig.java TikaExtractor.java
Author: kwright
Date: Wed Jun 18 22:54:13 2014
New Revision: 1603663
URL: http://svn.apache.org/r1603663
Log:
Add specification support class
Added:
manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java (with props)
Modified:
manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
Added: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java?rev=1603663&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java (added)
+++ manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java Wed Jun 18 22:54:13 2014
@@ -0,0 +1,36 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.agents.transformation.tika;
+
+/** Parameters for AmazonCloudSearch output connector.
+ */
+public class TikaConfig {
+
+ // Configuration parameters
+ // None
+
+ // Specification nodes and values
+ public static final String NODE_FIELDMAP = "fieldmap";
+ public static final String NODE_KEEPMETADATA = "keepAllMetadata";
+ public static final String ATTRIBUTE_SOURCE = "source";
+ public static final String ATTRIBUTE_TARGET = "target";
+ public static final String ATTRIBUTE_VALUE = "value";
+
+}
Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaConfig.java
------------------------------------------------------------------------------
svn:keywords = Id
Modified: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java?rev=1603663&r1=1603662&r2=1603663&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java (original)
+++ manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java Wed Jun 18 22:54:13 2014
@@ -363,7 +363,96 @@ public class TikaExtractor extends org.a
}
}
-
+
+ protected static class SpecPacker {
+
+ private final Map<String,String> sourceTargets = new HashMap<String,String>();
+ private final boolean keepAllMetadata;
+
+ public SpecPacker(Specification os) {
+ boolean keepAllMetadata = true;
+ for (int i = 0; i < os.getChildCount(); i++) {
+ SpecificationNode sn = os.getChild(i);
+
+ if(sn.getType().equals(TikaConfig.NODE_KEEPMETADATA)) {
+ String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
+ keepAllMetadata = Boolean.parseBoolean(value);
+ } else if (sn.getType().equals(TikaConfig.NODE_FIELDMAP)) {
+ String source = sn.getAttributeValue(TikaConfig.ATTRIBUTE_SOURCE);
+ String target = sn.getAttributeValue(TikaConfig.ATTRIBUTE_TARGET);
+
+ if (target == null) {
+ target = "";
+ }
+ sourceTargets.put(source, target);
+ }
+ }
+ this.keepAllMetadata = keepAllMetadata;
+ }
+
+ public SpecPacker(String packedString) {
+
+ int index = 0;
+
+ // Mappings
+ final List<String> packedMappings = new ArrayList<String>();
+ index = unpackList(packedMappings,packedString,index,'+');
+ String[] fixedList = new String[2];
+ for (String packedMapping : packedMappings) {
+ unpackFixedList(fixedList,packedMapping,0,':');
+ sourceTargets.put(fixedList[0], fixedList[1]);
+ }
+
+ // Keep all metadata
+ if (packedString.length() > index)
+ keepAllMetadata = (packedString.charAt(index++) == '+');
+ else
+ keepAllMetadata = true;
+
+ }
+
+ public String toPackedString() {
+ StringBuilder sb = new StringBuilder();
+ int i;
+
+ // Mappings
+ final String[] sortArray = new String[sourceTargets.size()];
+ i = 0;
+ for (String source : sourceTargets.keySet()) {
+ sortArray[i++] = source;
+ }
+ java.util.Arrays.sort(sortArray);
+
+ List<String> packedMappings = new ArrayList<String>();
+ String[] fixedList = new String[2];
+ for (String source : sortArray) {
+ String target = sourceTargets.get(source);
+ StringBuilder localBuffer = new StringBuilder();
+ fixedList[0] = source;
+ fixedList[1] = target;
+ packFixedList(localBuffer,fixedList,':');
+ packedMappings.add(localBuffer.toString());
+ }
+ packList(sb,packedMappings,'+');
+
+ // Keep all metadata
+ if (keepAllMetadata)
+ sb.append('+');
+ else
+ sb.append('-');
+
+ return sb.toString();
+ }
+
+ public String getMapping(String source) {
+ return sourceTargets.get(source);
+ }
+
+ public boolean keepAllMetadata() {
+ return keepAllMetadata;
+ }
+ }
+
}