You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2013/06/23 10:25:12 UTC
[3/4] git commit: CAMEL-6004: TokenizeXML added support for self
closing tags. Thanks to Aki Yoshida for the patch.
CAMEL-6004: TokenizeXML added support for self closing tags. Thanks to Aki Yoshida for the patch.
Conflicts:
camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/6a641ec6
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/6a641ec6
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/6a641ec6
Branch: refs/heads/camel-2.11.x
Commit: 6a641ec65777b9e63ba905312880b08372ed0b2b
Parents: 5467ef1
Author: Claus Ibsen <da...@apache.org>
Authored: Sun Jun 23 10:19:25 2013 +0200
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jun 23 10:23:04 2013 +0200
----------------------------------------------------------------------
.../apache/camel/builder/ExpressionBuilder.java | 8 +-
.../support/TokenPairExpressionIterator.java | 2 +-
.../support/TokenXMLExpressionIterator.java | 270 +++++++++++++++++++
.../support/TokenXMLPairExpressionIterator.java | 3 +
.../tokenizer/TokenizeLanguageTest.java | 98 +++++++
5 files changed, 375 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java b/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
index 0d135bd..b5d29ef 100644
--- a/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
+++ b/camel-core/src/main/java/org/apache/camel/builder/ExpressionBuilder.java
@@ -45,7 +45,7 @@ import org.apache.camel.model.language.MethodCallExpression;
import org.apache.camel.spi.Language;
import org.apache.camel.support.ExpressionAdapter;
import org.apache.camel.support.TokenPairExpressionIterator;
-import org.apache.camel.support.TokenXMLPairExpressionIterator;
+import org.apache.camel.support.TokenXMLExpressionIterator;
import org.apache.camel.util.ExchangeHelper;
import org.apache.camel.util.FileUtil;
import org.apache.camel.util.GroupIterator;
@@ -1153,7 +1153,7 @@ public final class ExpressionBuilder {
}
/**
- * Returns an {@link TokenXMLPairExpressionIterator} expression
+ * Returns an {@link TokenXMLExpressionIterator} expression
*/
public static Expression tokenizeXMLExpression(String tagName, String inheritNamespaceTagName) {
ObjectHelper.notEmpty(tagName, "tagName");
@@ -1166,8 +1166,6 @@ public final class ExpressionBuilder {
tagName = tagName + ">";
}
- String endToken = "</" + tagName.substring(1);
-
if (inheritNamespaceTagName != null) {
if (!inheritNamespaceTagName.startsWith("<")) {
inheritNamespaceTagName = "<" + inheritNamespaceTagName;
@@ -1177,7 +1175,7 @@ public final class ExpressionBuilder {
}
}
- return new TokenXMLPairExpressionIterator(tagName, endToken, inheritNamespaceTagName);
+ return new TokenXMLExpressionIterator(tagName, inheritNamespaceTagName);
}
/**
http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
index 489f5e6..b6b9133 100644
--- a/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenPairExpressionIterator.java
@@ -34,7 +34,7 @@ import org.apache.camel.util.ObjectHelper;
* The message body must be able to convert to {@link InputStream} type which is used as stream
* to access the message body.
* <p/>
- * For splitting XML files use {@link TokenXMLPairExpressionIterator} instead.
+ * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead.
*/
public class TokenPairExpressionIterator extends ExpressionAdapter {
http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
new file mode 100644
index 0000000..b8d4374
--- /dev/null
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLExpressionIterator.java
@@ -0,0 +1,270 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.support;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.MessageFormat;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.camel.Exchange;
+import org.apache.camel.InvalidPayloadException;
+import org.apache.camel.util.IOHelper;
+import org.apache.camel.util.ObjectHelper;
+
+/**
+ * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
+ * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
+ * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
+ * <p/>
+ * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
+ * to access the message body.
+ * <p/>
+ * Can be used to split big XML files.
+ * <p/>
+ * This implementation supports inheriting namespaces from a parent/root tag.
+ */
+public class TokenXMLExpressionIterator extends ExpressionAdapter {
+ private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
+ private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
+ private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^/]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!</{0}).)*</{0}\\s*>";
+ private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
+
+ protected final String tagToken;
+ protected final String inheritNamespaceToken;
+
+ public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
+ ObjectHelper.notEmpty(tagToken, "tagToken");
+ this.tagToken = tagToken;
+ // namespace token is optional
+ this.inheritNamespaceToken = inheritNamespaceToken;
+
+ // must be XML tokens
+ if (!tagToken.startsWith("<") || !tagToken.endsWith(">")) {
+ throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tagToken);
+ }
+ if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) {
+ throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken);
+ }
+ }
+
+ protected Iterator<?> createIterator(InputStream in, String charset) {
+ XMLTokenIterator iterator = new XMLTokenIterator(tagToken, inheritNamespaceToken, in, charset);
+ iterator.init();
+ return iterator;
+ }
+
+ @Override
+ public boolean matches(Exchange exchange) {
+ // as a predicate we must close the stream, as we do not return an iterator that can be used
+ // afterwards to iterate the input stream
+ Object value = doEvaluate(exchange, true);
+ return ObjectHelper.evaluateValuePredicate(value);
+ }
+
+ @Override
+ public Object evaluate(Exchange exchange) {
+ // as we return an iterator to access the input stream, we should not close it
+ return doEvaluate(exchange, false);
+ }
+
+ /**
+ * Strategy to evaluate the exchange
+ *
+ * @param exchange the exchange
+ * @param closeStream whether to close the stream before returning from this method.
+ * @return the evaluated value
+ */
+ protected Object doEvaluate(Exchange exchange, boolean closeStream) {
+ InputStream in = null;
+ try {
+ in = exchange.getIn().getMandatoryBody(InputStream.class);
+ // we may read from a file, and want to support custom charset defined on the exchange
+ String charset = IOHelper.getCharsetName(exchange);
+ return createIterator(in, charset);
+ } catch (InvalidPayloadException e) {
+ exchange.setException(e);
+ // must close input stream
+ IOHelper.close(in);
+ return null;
+ } finally {
+ if (closeStream) {
+ IOHelper.close(in);
+ }
+ }
+ }
+
+ /**
+ * Iterator to walk the input stream
+ */
+ static class XMLTokenIterator implements Iterator<Object>, Closeable {
+ final String tagToken;
+ final InputStream in;
+ final String charset;
+ Scanner scanner;
+ Object image;
+
+ private final Pattern tagTokenPattern;
+ private final String inheritNamespaceToken;
+ private Pattern inheritNamespaceTokenPattern;
+ private String rootTokenNamespaces;
+
+ XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
+ this.tagToken = tagToken;
+ this.in = in;
+ this.charset = charset;
+
+ // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
+ this.tagTokenPattern =
+ Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE,
+ SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)),
+ Pattern.MULTILINE | Pattern.DOTALL);
+
+ this.inheritNamespaceToken = inheritNamespaceToken;
+ if (inheritNamespaceToken != null) {
+ // the inherit namespace token may itself have a namespace prefix
+ // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
+ this.inheritNamespaceTokenPattern =
+ Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
+ SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)),
+ Pattern.MULTILINE | Pattern.DOTALL);
+ }
+ }
+
+ void init() {
+ // use a scanner with the default delimiter
+ this.scanner = new Scanner(in, charset);
+ this.image = scanner.hasNext() ? (String) next(true) : null;
+ }
+
+ String getNext(boolean first) {
+ // initialize inherited namespaces on first
+ if (first && inheritNamespaceToken != null) {
+ rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
+ }
+
+ String next = scanner.findWithinHorizon(tagTokenPattern, 0);
+ if (next == null) {
+ return null;
+ }
+
+ // build answer accordingly to whether namespaces should be inherited or not
+ // REVISIT should skip the prefixes that are declared within the child itself.
+ if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
+ String head = ObjectHelper.before(next, ">");
+ boolean empty = false;
+ if (head.endsWith("/")) {
+ head = head.substring(0, head.length() - 1);
+ empty = true;
+ }
+ StringBuilder sb = new StringBuilder();
+ // append root namespaces to local start token
+ // grab the text
+ String tail = ObjectHelper.after(next, ">");
+ // build result with inherited namespaces
+ next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString();
+ }
+
+ return next;
+ }
+
+ private String getNamespacesFromNamespaceToken(String text) {
+ if (text == null) {
+ return null;
+ }
+
+ // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
+ Map<String, String> namespaces = new LinkedHashMap<String, String>();
+ Matcher matcher = NAMESPACE_PATTERN.matcher(text);
+ while (matcher.find()) {
+ String prefix = matcher.group(1);
+ String url = matcher.group(2);
+ if (ObjectHelper.isEmpty(prefix)) {
+ prefix = "_DEFAULT_";
+ } else {
+ // skip leading :
+ prefix = prefix.substring(1);
+ }
+ namespaces.put(prefix, url);
+ }
+
+ // did we find any namespaces
+ if (namespaces.isEmpty()) {
+ return null;
+ }
+
+ // build namespace String
+ StringBuilder sb = new StringBuilder();
+ for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+ String key = entry.getKey();
+ // note the value is already quoted
+ String value = entry.getValue();
+ if ("_DEFAULT_".equals(key)) {
+ sb.append(" xmlns=").append(value);
+ } else {
+ sb.append(" xmlns:").append(key).append("=").append(value);
+ }
+ }
+
+ return sb.toString();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return image != null;
+ }
+
+ @Override
+ public Object next() {
+ return next(false);
+ }
+
+ Object next(boolean first) {
+ Object answer = image;
+ // calculate next
+ if (scanner.hasNext()) {
+ image = getNext(first);
+ } else {
+ image = null;
+ }
+
+ if (answer == null) {
+ // first time the image may be null
+ answer = image;
+ }
+ return answer;
+ }
+
+ @Override
+ public void remove() {
+ // noop
+ }
+
+ @Override
+ public void close() throws IOException {
+ scanner.close();
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
----------------------------------------------------------------------
diff --git a/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java b/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
index 9ae1477..c5fbea4 100644
--- a/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
+++ b/camel-core/src/main/java/org/apache/camel/support/TokenXMLPairExpressionIterator.java
@@ -36,7 +36,10 @@ import org.apache.camel.util.ObjectHelper;
* Can be used to split big XML files.
* <p/>
* This implementation supports inheriting namespaces from a parent/root tag.
+ *
+ * @deprecated use {@link TokenXMLExpressionIterator} instead.
*/
+@Deprecated
public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator {
private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\"");
http://git-wip-us.apache.org/repos/asf/camel/blob/6a641ec6/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
----------------------------------------------------------------------
diff --git a/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
new file mode 100644
index 0000000..bfc3bfa
--- /dev/null
+++ b/camel-core/src/test/java/org/apache/camel/language/tokenizer/TokenizeLanguageTest.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.language.tokenizer;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.builder.RouteBuilder;
+
+public class TokenizeLanguageTest extends ContextTestSupport {
+
+ public void testSendClosedTagMessageToTokenize() throws Exception {
+ getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a'></child>", "<child some_attr='b' anotherAttr='b'></child>");
+
+ template.sendBody("direct:start",
+ "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'></child><child some_attr='b' anotherAttr='b'></child></parent>");
+
+ assertMockEndpointsSatisfied();
+ }
+
+ public void testSendClosedTagWithLineBreaksMessageToTokenize() throws Exception {
+ getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a'>\n</child>", "<child some_attr='b' anotherAttr='b'>\n</child>");
+
+ template.sendBody("direct:start",
+ "<?xml version='1.0' encoding='UTF-8'?>\n"
+ + "<parent>\n"
+ + "<child some_attr='a' anotherAttr='a'>\n"
+ + "</child>\n"
+ + "<child some_attr='b' anotherAttr='b'>\n"
+ + "</child>\n"
+ + "</parent>");
+
+ assertMockEndpointsSatisfied();
+ }
+
+ public void testSendSelfClosingTagMessageToTokenize() throws Exception {
+ getMockEndpoint("mock:result").expectedBodiesReceived("<child some_attr='a' anotherAttr='a' />", "<child some_attr='b' anotherAttr='b' />");
+
+ template.sendBody("direct:start",
+ "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a' /><child some_attr='b' anotherAttr='b' /></parent>");
+
+ assertMockEndpointsSatisfied();
+ }
+
+ public void testSendMixedClosingTagMessageToTokenize() throws Exception {
+ getMockEndpoint("mock:result").expectedBodiesReceived(
+ "<child some_attr='a' anotherAttr='a'>ha</child>", "<child some_attr='b' anotherAttr='b' />", "<child some_attr='c'></child>");
+
+ template.sendBody("direct:start",
+ "<?xml version='1.0' encoding='UTF-8'?><parent><child some_attr='a' anotherAttr='a'>ha</child><child some_attr='b' anotherAttr='b' /><child some_attr='c'></child></parent>");
+
+ assertMockEndpointsSatisfied();
+ }
+
+ public void testSendNamespacedChildMessageToTokenize() throws Exception {
+ getMockEndpoint("mock:result").expectedBodiesReceived(
+ "<c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child>", "<c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' />");
+
+ template.sendBody("direct:start",
+ "<?xml version='1.0' encoding='UTF-8'?><parent><c:child xmlns:c='urn:c' some_attr='a' anotherAttr='a'></c:child><c:child xmlns:c='urn:c' some_attr='b' anotherAttr='b' /></parent>");
+
+ assertMockEndpointsSatisfied();
+ }
+
+ public void testSendNamespacedParentMessageToTokenize() throws Exception {
+ getMockEndpoint("mock:result").expectedBodiesReceived(
+ "<c:child some_attr='a' anotherAttr='a' xmlns:c='urn:c' xmlns:d=\"urn:d\"></c:child>", "<c:child some_attr='b' anotherAttr='b' xmlns:c='urn:c' xmlns:d=\"urn:d\"/>");
+
+ template.sendBody("direct:start",
+ "<?xml version='1.0' encoding='UTF-8'?><c:parent xmlns:c='urn:c' xmlns:d=\"urn:d\"><c:child some_attr='a' anotherAttr='a'></c:child><c:child some_attr='b' anotherAttr='b'/></c:parent>");
+
+ assertMockEndpointsSatisfied();
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() {
+ return new RouteBuilder() {
+ public void configure() {
+ from("direct:start")
+ .split().tokenizeXML("child", "parent")
+ .to("mock:result")
+ .end();
+ }
+ };
+ }
+}