You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ay...@apache.org on 2014/04/23 10:18:20 UTC

git commit: CAMEL-7388: xmlTokenizer to optionally wrap the token with the enclosing elements

Repository: camel
Updated Branches:
  refs/heads/master 374a2c41a -> 01d587525


CAMEL-7388: xmlTokenizer to optionally wrap the token with the enclosing elements


Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/01d58752
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/01d58752
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/01d58752

Branch: refs/heads/master
Commit: 01d587525f937b3135791f1c1d100909b245382a
Parents: 374a2c4
Author: Akitoshi Yoshida <ay...@apache.org>
Authored: Wed Apr 23 10:16:13 2014 +0200
Committer: Akitoshi Yoshida <ay...@apache.org>
Committed: Wed Apr 23 10:17:37 2014 +0200

----------------------------------------------------------------------
 .../support/TokenXMLExpressionIterator.java     | 122 ++++++++++++++++--
 .../tokenizer/TokenizeLanguageTest.java         |  10 ++
 .../tokenizer/TokenizeWrapLanguageTest.java     | 128 +++++++++++++++++++
 3 files changed, 249 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/camel/blob/01d58752/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
index ba21a71..f6ac6be 100644
--- a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
@@ -16,14 +16,20 @@
  */
 package org.apache.camel.support;
 
+import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
+import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
 import java.text.MessageFormat;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Scanner;
+import java.util.regex.MatchResult;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -49,7 +55,8 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
     private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
     private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>";
     private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
-    
+    private static final String OPTION_WRAP_TOKEN = "<*>";
+
     protected final String tagToken;
     protected final String inheritNamespaceToken;
 
@@ -126,12 +133,14 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
 
         private final Pattern tagTokenPattern;
         private final String inheritNamespaceToken;
+        private final boolean wrapToken;
         private Pattern inheritNamespaceTokenPattern;
         private String rootTokenNamespaces;
+        private String wrapHead;
+        private String wrapTail;
 
         XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
             this.tagToken = tagToken;
-            this.in = in;
             this.charset = charset;
           
             // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
@@ -141,13 +150,20 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
                                                      Pattern.MULTILINE | Pattern.DOTALL);
             
             this.inheritNamespaceToken = inheritNamespaceToken;
-            if (inheritNamespaceToken != null) {
-                // the inherit namespace token may itself have a namespace prefix
-                // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
-                this.inheritNamespaceTokenPattern = 
-                    Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
-                                                         SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
-                                                         Pattern.MULTILINE | Pattern.DOTALL);
+            if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) {
+                this.wrapToken = true;
+                this.in = new RecordableInputStream(in, charset);
+            } else {
+                this.wrapToken = false;
+                this.in = in;
+                if (inheritNamespaceToken != null) {
+                    // the inherit namespace token may itself have a namespace prefix
+                    // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
+                    this.inheritNamespaceTokenPattern = 
+                        Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
+                                                             SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
+                                                             Pattern.MULTILINE | Pattern.DOTALL);
+                }
             }
         }
 
@@ -159,7 +175,7 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
 
         String getNext(boolean first) {
             // initialize inherited namespaces on first
-            if (first && inheritNamespaceToken != null) {
+            if (first && inheritNamespaceToken != null && !wrapToken) {
                 rootTokenNamespaces =  getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
             }
 
@@ -167,10 +183,15 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
             if (next == null) {
                 return null;
             }
+            if (first && wrapToken) {
+                MatchResult mres = scanner.match();
+                wrapHead = ((RecordableInputStream)in).getText(mres.start());
+                wrapTail = buildXMLTail(wrapHead);
+            }
 
             // build answer accordingly to whether namespaces should be inherited or not
-            // REVISIT should skip the prefixes that are declared within the child itself.
             if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
+                // REVISIT should skip the prefixes that are declared within the child itself.
                 String head = ObjectHelper.before(next, ">");
                 boolean empty = false;
                 if (head.endsWith("/")) {
@@ -183,6 +204,10 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
                 String tail = ObjectHelper.after(next, ">");
                 // build result with inherited namespaces
                 next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString();
+            } else if (wrapToken) {
+                // wrap the token
+                StringBuilder sb = new StringBuilder();
+                next = sb.append(wrapHead).append(next).append(wrapTail).toString();
             }
             
             return next;
@@ -267,4 +292,79 @@ public class TokenXMLExpressionIterator extends ExpressionAdapter {
 
     }
 
+    private static String buildXMLTail(String xmlhead) {
+        // assume the input text is a portion of a well-formed xml
+        List<String> tags = new ArrayList<String>();
+        int p = 0;
+        while (p < xmlhead.length()) {
+            p = xmlhead.indexOf('<', p);
+            if (p < 0) {
+                break;
+            }
+            int nc = xmlhead.charAt(p + 1); 
+            if (nc == '?') {
+                p++;
+                continue;
+            } else if (nc == '/') {
+                p++;
+                tags.remove(tags.size() - 1);
+            } else {
+                final int ep = xmlhead.indexOf('>', p);
+                if (xmlhead.charAt(ep - 1) == '/') {
+                    p++;
+                    continue;
+                }
+                final int sp = xmlhead.substring(p, ep).indexOf(' ');
+                tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep));
+                p = ep;
+            }
+        }
+        StringBuilder sb = new StringBuilder();
+        for (int i = tags.size() - 1; i >= 0; i--) {
+            sb.append("</").append(tags.get(i)).append(">");
+        }
+        return sb.toString();
+    }
+
+    // this input stream records the stream until the first text extraction occurs.
+    private static class RecordableInputStream extends FilterInputStream {
+        private ByteArrayOutputStream buf;
+        private String charset;
+        private boolean recording;
+        protected RecordableInputStream(InputStream in, String charset) {
+            super(in);
+            this.buf = new ByteArrayOutputStream();
+            this.charset = charset;
+            this.recording = true;
+        }
+
+        @Override
+        public int read() throws IOException {
+            int c = super.read();
+            if (c > 0 && recording) {
+                buf.write(c);
+            }
+            return c;
+        }
+
+        @Override
+        public int read(byte[] b, int off, int len) throws IOException {
+            int n = super.read(b, off, len);
+            if (n > 0 && recording) {
+                buf.write(b, off, n);
+            }
+            return n;
+        }
+
+        public String getText(int pos) {
+            String t = null;
+            recording = false;
+            try {
+                t = new String(buf.toByteArray(), 0, pos, charset);
+            } catch (UnsupportedEncodingException e) {
+                // ignore it as this should have be caught while scanning.
+            }
+            return t;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/camel/blob/01d58752/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
----------------------------------------------------------------------
diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
index 939fd10..1238c64 100644
--- a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
+++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
@@ -96,6 +96,16 @@ public class TokenizeLanguageTest extends ContextTestSupport {
         assertMockEndpointsSatisfied();
     }
 
+    public void testSendMoreParentsMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<c:child some_attr='a' anotherAttr='a' xmlns:c='urn:c' xmlns:d=\"urn:d\"></c:child>", "<c:child some_attr='b' anotherAttr='b' xmlns:c='urn:c' xmlns:d=\"urn:d\"/>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent></greatparent></g:greatgreatparent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
     @Override
     protected RouteBuilder createRouteBuilder() {
         return new RouteBuilder() {

http://git-wip-us.apache.org/repos/asf/camel/blob/01d58752/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java
----------------------------------------------------------------------
diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java
new file mode 100644
index 0000000..61ed6ea
--- /dev/null
+++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeWrapLanguageTest.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.language.tokenizer;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.builder.RouteBuilder;
+
+public class TokenizeWrapLanguageTest extends ContextTestSupport {
+
+    public void testSendClosedTagMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived("<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child></parent>", 
+                                                              "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='b' anotherAttr='b'></child></parent>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child><child some_attr='b' anotherAttr='b'></child></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendClosedTagWithLineBreaksMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived("<?xml version='1.0' encoding='UTF-8'?>\n<parent>\n<child some_attr='a' anotherAttr='a'>\n</child></parent>", 
+                                                              "<?xml version='1.0' encoding='UTF-8'?>\n<parent>\n<child some_attr='b' anotherAttr='b'>\n</child></parent>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?>\n"
+                + "<parent>\n"
+                + "<child some_attr='a' anotherAttr='a'>\n"
+                + "</child>\n"
+                + "<child some_attr='b' anotherAttr='b'>\n"
+                + "</child>\n"
+                + "</parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendSelfClosingTagMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived("<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /></parent>", 
+                                                              "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='b' anotherAttr='b' /></parent>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /><child some_attr='b' anotherAttr='b' /></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendMixedClosingTagMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child></parent>", 
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='b' anotherAttr='b' /></parent>", 
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='c'></child></parent>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child><child some_attr='b' anotherAttr='b' /><child some_attr='c'></child></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendMixedClosingTagInsideMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<parent><child name='child1'><grandchild name='grandchild1'/> <grandchild name='grandchild2'/></child></parent>",
+            "<parent><child name='child2'><grandchild name='grandchild1'></grandchild><grandchild name='grandchild2'></grandchild></child></parent>");
+
+        template.sendBody("direct:start",
+            "<parent><child name='child1'><grandchild name='grandchild1'/> <grandchild name='grandchild2'/></child>"
+            + "<child name='child2'><grandchild name='grandchild1'></grandchild><grandchild name='grandchild2'></grandchild></child></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendNamespacedChildMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child></parent>", 
+            "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendNamespacedParentMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child></c:parent>", 
+            "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='b' anotherAttr='b'/></c:parent>");
+        
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    public void testSendMoreParentsMessageToTokenize() throws Exception {
+        getMockEndpoint("mock:result").expectedBodiesReceived(
+            "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child></c:parent></greatparent></g:greatgreatparent>", 
+            "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='b' anotherAttr='b'/></c:parent></greatparent></g:greatgreatparent>");
+
+        template.sendBody("direct:start",
+            "<?xml version='1.0' encoding='UTF-8'?><g:greatgreatparent xmlns:g='urn:g'><greatparent><uncle/><aunt>emma</aunt><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent></greatparent></g:greatgreatparent>");
+
+        assertMockEndpointsSatisfied();
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() {
+        return new RouteBuilder() {
+            public void configure() {
+                from("direct:start")
+                    .split().tokenizeXML("child", "*")
+                        .to("mock:result")
+                    .end();
+            }
+        };
+    }
+}