You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2013/06/23 10:25:12 UTC

[3/4] git commit: CAMEL-6004: TokenizeXML added support for self closing tags. Thanks to Aki Yoshida for the patch.

CAMEL-6004: TokenizeXML added support for self closing tags. Thanks to Aki Yoshida for the patch.

Conflicts:
	camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java


Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/6a641ec6
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/6a641ec6
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/6a641ec6

Branch: refs/heads/camel-2.11.x
Commit: 6a641ec65777b9e63ba905312880b08372ed0b2b
Parents: 5467ef1
Author: Claus Ibsen <da...@apache.org>
Authored: Sun Jun 23 10:19:25 2013 +0200
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jun 23 10:23:04 2013 +0200

----------------------------------------------------------------------
 .../apache/camel/builder/ExpressionBuilder.java |   8 +-
 .../support/TokenPairExpressionIterator.java    |   2 +-
 .../support/TokenXMLExpressionIterator.java     | 270 +++++++++++++++++++
 .../support/TokenXMLPairExpressionIterator.java |   3 +
 .../tokenizer/TokenizeLanguageTest.java         |  98 +++++++
 5 files changed, 375 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java b/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
index 0d135bd..b5d29ef 100644
--- a/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
+++ b/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
@@ -45,7 +45,7 @@ import org.apache.camel.model.language.MethodCallExpression;
 import org.apache.camel.spi.Language;
 import org.apache.camel.support.ExpressionAdapter;
 import org.apache.camel.support.TokenPairExpressionIterator;
-import org.apache.camel.support.TokenXMLPairExpressionIterator;
+import org.apache.camel.support.TokenXMLExpressionIterator;
 import org.apache.camel.util.ExchangeHelper;
 import org.apache.camel.util.FileUtil;
 import org.apache.camel.util.GroupIterator;
@@ -1153,7 +1153,7 @@ public final class ExpressionBuilder {
     }
 
     /**
-     * Returns an {@link TokenXMLPairExpressionIterator} expression
+     * Returns an {@link TokenXMLExpressionIterator} expression
      */
     public static Expression tokenizeXMLExpression(String tagName, String inheritNamespaceTagName) {
         ObjectHelper.notEmpty(tagName, "tagName");
@@ -1166,8 +1166,6 @@ public final class ExpressionBuilder {
             tagName = tagName + ">";
         }
 
-        String endToken = "</" + tagName.substring(1);
-
         if (inheritNamespaceTagName != null) {
             if (!inheritNamespaceTagName.startsWith("<")) {
                 inheritNamespaceTagName = "<" + inheritNamespaceTagName;
@@ -1177,7 +1175,7 @@ public final class ExpressionBuilder {
             }
         }
 
-        return new TokenXMLPairExpressionIterator(tagName, endToken, inheritNamespaceTagName);
+        return new TokenXMLExpressionIterator(tagName, inheritNamespaceTagName);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
index 489f5e6..b6b9133 100644
--- a/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
@@ -34,7 +34,7 @@ import org.apache.camel.util.ObjectHelper;
  * The message body must be able to convert to {@link InputStream} type which is used as stream
  * to access the message body.
  * <p/>
- * For splitting XML files use {@link TokenXMLPairExpressionIterator} instead.
+ * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead.
  */
 public class TokenPairExpressionIterator extends ExpressionAdapter {
 

http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
new file mode 100644
index 0000000..b8d4374
--- /dev/null
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
@@ -0,0 +1,270 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.support;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.MessageFormat;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.camel.Exchange;
+import org.apache.camel.InvalidPayloadException;
+import org.apache.camel.util.IOHelper;
+import org.apache.camel.util.ObjectHelper;
+
+/**
+ * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
+ * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
+ * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
+ * <p/>
+ * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
+ * to access the message body.
+ * <p/>
+ * Can be used to split big XML files.
+ * <p/>
+ * This implementation supports inheriting namespaces from a parent/root tag.
+ */
+public class TokenXMLExpressionIterator extends ExpressionAdapter {
+    private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
+    private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
+    private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^/]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!</{0}).)*</{0}\\s*>";
+    private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
+    
+    protected final String tagToken;
+    protected final String inheritNamespaceToken;
+
+    public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
+        ObjectHelper.notEmpty(tagToken, "tagToken");
+        this.tagToken = tagToken;
+        // namespace token is optional
+        this.inheritNamespaceToken = inheritNamespaceToken;
+
+        // must be XML tokens
+        if (!tagToken.startsWith("<") || !tagToken.endsWith(">")) {
+            throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tagToken);
+        }
+        if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) {
+            throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken);
+        }
+    }
+
+    protected Iterator<?> createIterator(InputStream in, String charset) {
+        XMLTokenIterator iterator = new XMLTokenIterator(tagToken, inheritNamespaceToken, in, charset);
+        iterator.init();
+        return iterator;
+    }
+
+    @Override
+    public boolean matches(Exchange exchange) {
+        // as a predicate we must close the stream, as we do not return an iterator that can be used
+        // afterwards to iterate the input stream
+        Object value = doEvaluate(exchange, true);
+        return ObjectHelper.evaluateValuePredicate(value);
+    }
+
+    @Override
+    public Object evaluate(Exchange exchange) {
+        // as we return an iterator to access the input stream, we should not close it
+        return doEvaluate(exchange, false);
+    }
+
+    /**
+     * Strategy to evaluate the exchange
+     *
+     * @param exchange   the exchange
+     * @param closeStream whether to close the stream before returning from this method.
+     * @return the evaluated value
+     */
+    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
+        InputStream in = null;
+        try {
+            in = exchange.getIn().getMandatoryBody(InputStream.class);
+            // we may read from a file, and want to support custom charset defined on the exchange
+            String charset = IOHelper.getCharsetName(exchange);
+            return createIterator(in, charset);
+        } catch (InvalidPayloadException e) {
+            exchange.setException(e);
+            // must close input stream
+            IOHelper.close(in);
+            return null;
+        } finally {
+            if (closeStream) {
+                IOHelper.close(in);
+            }
+        }
+    }
+    
+    /**
+     * Iterator to walk the input stream
+     */
+    static class XMLTokenIterator implements Iterator<Object>, Closeable {
+        final String tagToken;
+        final InputStream in;
+        final String charset;
+        Scanner scanner;
+        Object image;
+
+        private final Pattern tagTokenPattern;
+        private final String inheritNamespaceToken;
+        private Pattern inheritNamespaceTokenPattern;
+        private String rootTokenNamespaces;
+
+        XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
+            this.tagToken = tagToken;
+            this.in = in;
+            this.charset = charset;
+
+            // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
+            this.tagTokenPattern = 
+                Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 
+                                                     SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 
+                                                     Pattern.MULTILINE | Pattern.DOTALL);
+            
+            this.inheritNamespaceToken = inheritNamespaceToken;
+            if (inheritNamespaceToken != null) {
+                // the inherit namespace token may itself have a namespace prefix
+                // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
+                this.inheritNamespaceTokenPattern = 
+                    Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
+                                                         SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
+                                                         Pattern.MULTILINE | Pattern.DOTALL);
+            }
+        }
+
+        void init() {
+            // use a scanner with the default delimiter
+            this.scanner = new Scanner(in, charset);
+            this.image = scanner.hasNext() ? (String) next(true) : null;
+        }
+
+        String getNext(boolean first) {
+            // initialize inherited namespaces on first
+            if (first && inheritNamespaceToken != null) {
+                rootTokenNamespaces =  getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
+            }
+
+            String next = scanner.findWithinHorizon(tagTokenPattern, 0);
+            if (next == null) {
+                return null;
+            }
+
+            // build answer accordingly to whether namespaces should be inherited or not
+            // REVISIT should skip the prefixes that are declared within the child itself.
+            if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
+                String head = ObjectHelper.before(next, ">");
+                boolean empty = false;
+                if (head.endsWith("/")) {
+                    head = head.substring(0, head.length() - 1);
+                    empty = true;
+                }
+                StringBuilder sb = new StringBuilder();
+                // append root namespaces to local start token
+                // grab the text
+                String tail = ObjectHelper.after(next, ">");
+                // build result with inherited namespaces
+                next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString();
+            }
+            
+            return next;
+        }
+
+        private String getNamespacesFromNamespaceToken(String text) {
+            if (text == null) {
+                return null;
+            }
+
+            // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
+            Map<String, String> namespaces = new LinkedHashMap<String, String>();
+            Matcher matcher = NAMESPACE_PATTERN.matcher(text);
+            while (matcher.find()) {
+                String prefix = matcher.group(1);
+                String url = matcher.group(2);
+                if (ObjectHelper.isEmpty(prefix)) {
+                    prefix = "_DEFAULT_";
+                } else {
+                    // skip leading :
+                    prefix = prefix.substring(1);
+                }
+                namespaces.put(prefix, url);
+            }
+
+            // did we find any namespaces
+            if (namespaces.isEmpty()) {
+                return null;
+            }
+
+            // build namespace String
+            StringBuilder sb = new StringBuilder();
+            for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+                String key = entry.getKey();
+                // note the value is already quoted
+                String value = entry.getValue();
+                if ("_DEFAULT_".equals(key)) {
+                    sb.append(" xmlns=").append(value);
+                } else {
+                    sb.append(" xmlns:").append(key).append("=").append(value);
+                }
+            }
+
+            return sb.toString();
+        }
+        
+        @Override
+        public boolean hasNext() {
+            return image != null;
+        }
+
+        @Override
+        public Object next() {
+            return next(false);
+        }
+
+        Object next(boolean first) {
+            Object answer = image;
+            // calculate next
+            if (scanner.hasNext()) {
+                image = getNext(first);
+            } else {
+                image = null;
+            }
+
+            if (answer == null) {
+                // first time the image may be null
+                answer = image;
+            }
+            return answer;
+        }
+
+        @Override
+        public void remove() {
+            // noop
+        }
+
+        @Override
+        public void close() throws IOException {
+            scanner.close();
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
index 9ae1477..c5fbea4 100644
--- a/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
@@ -36,7 +36,10 @@ import org.apache.camel.util.ObjectHelper;
  * Can be used to split big XML files.
  * <p/>
  * This implementation supports inheriting namespaces from a parent/root tag.
+ *
+ * @deprecated use {@link TokenXMLExpressionIterator} instead.
  */
+@Deprecated
 public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator {
 
     private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\"");

http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
----------------------------------------------------------------------
diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
new file mode 100644
index 0000000..bfc3bfa
--- /dev/null
+++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.language.tokenizer;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.builder.RouteBuilder;
+
+public class TokenizeLanguageTest extends ContextTestSupport {
+
+    public void testSendClosedTagMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a'></child>", "<child some_attr='b' anotherAttr='b'></child>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child><child some_attr='b' anotherAttr='b'></child></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendClosedTagWithLineBreaksMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a'>\n</child>", "<child some_attr='b' anotherAttr='b'>\n</child>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?>\n"
+                + "<parent>\n"
+                + "<child some_attr='a' anotherAttr='a'>\n"
+                + "</child>\n"
+                + "<child some_attr='b' anotherAttr='b'>\n"
+                + "</child>\n"
+                + "</parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendSelfClosingTagMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a' />", "<child some_attr='b' anotherAttr='b' />");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /><child some_attr='b' anotherAttr='b' /></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendMixedClosingTagMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<child some_attr='a' anotherAttr='a'>ha</child>", "<child some_attr='b' anotherAttr='b' />", "<child some_attr='c'></child>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child><child some_attr='b' anotherAttr='b' /><child some_attr='c'></child></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendNamespacedChildMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child>", "<c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' />");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendNamespacedParentMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<c:child some_attr='a' anotherAttr='a' xmlns:c='urn:c' xmlns:d=\"urn:d\"></c:child>", "<c:child some_attr='b' anotherAttr='b' xmlns:c='urn:c' xmlns:d=\"urn:d\"/>");
+        
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() {
+        return new RouteBuilder() {
+            public void configure() {
+                from("direct:start")
+                    .split().tokenizeXML("child", "parent")
+                        .to("mock:result")
+                    .end();
+            }
+        };
+    }
+}