You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2020/03/03 15:11:50 UTC
[camel] branch master updated: CAMEL-14648: camel-core optimize -
CamelURIParser for parsing almost all commong Camel endpoint URIs in a
faster and lighter way than using java.net.URL.
This is an automated email from the ASF dual-hosted git repository.
davsclaus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/master by this push:
new c59bd35 CAMEL-14648: camel-core optimize - CamelURIParser for parsing almost all commong Camel endpoint URIs in a faster and lighter way than using java.net.URL.
c59bd35 is described below
commit c59bd359466ce4440bfed1de98ce2aefd1c522c3
Author: Claus Ibsen <cl...@gmail.com>
AuthorDate: Tue Mar 3 13:54:35 2020 +0100
CAMEL-14648: camel-core optimize - CamelURIParser for parsing almost all commong Camel endpoint URIs in a faster and lighter way than using java.net.URL.
---
.../java/org/apache/camel/util/CamelURIParser.java | 108 ++++++++++++++++++
.../java/org/apache/camel/util/URISupport.java | 64 +++++++++++
.../camel/util/UnsafeUriCharactersEncoder.java | 46 +++++++-
.../org/apache/camel/util/CamelURIParserTest.java | 123 +++++++++++++++++++++
.../java/org/apache/camel/util/URISupportTest.java | 11 ++
.../apache/camel/itest/jmh/NormalizeUriTest.java | 45 +++++++-
6 files changed, 389 insertions(+), 8 deletions(-)
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/CamelURIParser.java b/core/camel-util/src/main/java/org/apache/camel/util/CamelURIParser.java
new file mode 100644
index 0000000..0cf88e0
--- /dev/null
+++ b/core/camel-util/src/main/java/org/apache/camel/util/CamelURIParser.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.util;
+
+/**
+ * A Camel specific URI parser that parses endpoint URIs in a quasi syntax that Camel uses.
+ *
+ * The {@link java.net.URI} is much slower and parses endpoint URIs into additional parts which
+ * Camel does not use or need.
+ */
+public final class CamelURIParser {
+
+ /**
+ * Parses the URI.
+ *
+ * If this parser cannot parse the uri then <tt>null</tt> is returned. And instead the follow code can be used:
+ * <pre>
+ * URI u = new URI(UnsafeUriCharactersEncoder.encode(uri, true));
+ * </pre>
+ *
+ * @param uri the uri
+ *
+ * @return <tt>null</tt> if not possible to parse, or an array[3] with scheme,path,query
+ */
+ public static String[] parseUri(String uri) {
+ int schemeStart = 0;
+ int schemeEnd = 0;
+ int pathStart = 0;
+ int pathEnd = 0;
+ int queryStart = 0;
+
+ int len = uri.length();
+ for (int i = 0; i < len; i++) {
+ char ch = uri.charAt(i);
+ if (ch > 128) {
+ // must be an ascii char
+ return null;
+ }
+ // must be a safe char
+ if (!UnsafeUriCharactersEncoder.isSafeFastParser(ch)) {
+ return null;
+ }
+ if (schemeEnd == 0) {
+ if (ch == ':') {
+ schemeEnd = i;
+ // skip colon
+ pathStart = i + 1;
+ }
+ } else if (pathEnd == 0) {
+ if (ch == '?') {
+ pathEnd = i;
+ // skip ? marker
+ queryStart = i + 1;
+ }
+ }
+ }
+
+ if (pathStart == 0 && schemeEnd != 0) {
+ // skip colon
+ pathStart = schemeEnd + 1;
+ }
+ // invalid if there is no path anyway
+ if (pathStart >= len) {
+ return null;
+ }
+
+ String scheme = null;
+ if (schemeEnd != 0) {
+ scheme = uri.substring(schemeStart, schemeEnd);
+ }
+ if (scheme == null) {
+ return null;
+ }
+
+ String path;
+ // skip two leading slashes
+ int next = pathStart + 1;
+ if (uri.charAt(pathStart) == '/' && next < len && uri.charAt(next) == '/') {
+ pathStart = pathStart + 2;
+ }
+ if (pathEnd != 0) {
+ path = uri.substring(pathStart, pathEnd);
+ } else {
+ path = uri.substring(pathStart);
+ }
+
+ String query = null;
+ if (queryStart != 0 && queryStart < len) {
+ query = uri.substring(queryStart);
+ }
+
+ return new String[]{scheme, path, query};
+ }
+}
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java b/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java
index c194706..2df3bfc 100644
--- a/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java
+++ b/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java
@@ -521,7 +521,22 @@ public final class URISupport {
* @see #RAW_TOKEN_END
*/
public static String normalizeUri(String uri) throws URISyntaxException, UnsupportedEncodingException {
+ // try to parse using the simpler and faster Camel URI parser
+ String[] parts = CamelURIParser.parseUri(uri);
+ if (parts != null) {
+ // use the faster and more simple normalizer
+ return doFastNormalizeUri(parts);
+ } else {
+ // use the legacy normalizer as the uri is complex and may have unsafe URL characters
+ return doComplexNormalizeUri(uri);
+ }
+ }
+ /**
+ * The complex (and Camel 2.x) compatible URI normalizer when the URI is more complex
+ * such as having percent encoded values, or other unsafe URL characters, or have authority user/password, etc.
+ */
+ private static String doComplexNormalizeUri(String uri) throws URISyntaxException {
URI u = new URI(UnsafeUriCharactersEncoder.encode(uri, true));
String scheme = u.getScheme();
String path = u.getSchemeSpecificPart();
@@ -591,6 +606,55 @@ public final class URISupport {
}
}
+ /**
+ * The fast parser for normalizing Camel endpoint URIs when the URI is not complex and
+ * can be parsed in a much more efficient way.
+ */
+ private static String doFastNormalizeUri(String[] parts) throws URISyntaxException {
+ String scheme = parts[0];
+ String path = parts[1];
+ String query = parts[2];
+
+ // in case there are parameters we should reorder them
+ if (query == null) {
+ // no parameters then just return
+ return buildUri(scheme, path, null);
+ } else {
+ Map<String, Object> parameters = null;
+ if (query.indexOf('&') != -1) {
+ // only parse if there is parameters
+ parameters = URISupport.parseQuery(query, false, false);
+ }
+ if (parameters == null || parameters.size() == 1) {
+ return buildUri(scheme, path, query);
+ } else {
+ // reorder parameters a..z
+ // optimize and only build new query if the keys was resorted
+ boolean sort = false;
+ String prev = null;
+ for (String key : parameters.keySet()) {
+ if (prev == null) {
+ prev = key;
+ } else {
+ int comp = key.compareTo(prev);
+ if (comp < 0) {
+ sort = true;
+ break;
+ }
+ }
+ }
+ if (sort) {
+ List<String> keys = new ArrayList<>(parameters.keySet());
+ keys.sort(null);
+ // rebuild query with sorted parameters
+ query = URISupport.createQueryString(keys, parameters);
+ }
+
+ return buildUri(scheme, path, query);
+ }
+ }
+ }
+
private static String buildUri(String scheme, String path, String query) {
// must include :// to do a correct URI all components can work with
int len = scheme.length() + 3 + path.length();
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java b/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java
index e681ee9..4b3eb85 100644
--- a/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java
+++ b/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java
@@ -25,13 +25,34 @@ import java.util.List;
* A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
*/
public final class UnsafeUriCharactersEncoder {
+ private static BitSet unsafeCharactersFastParser;
private static BitSet unsafeCharactersRfc1738;
private static BitSet unsafeCharactersHttp;
private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
static {
- unsafeCharactersRfc1738 = new BitSet(256);
+ unsafeCharactersFastParser = new BitSet(14);
+ unsafeCharactersFastParser.set(' ');
+ unsafeCharactersFastParser.set('"');
+ unsafeCharactersFastParser.set('<');
+ unsafeCharactersFastParser.set('>');
+ unsafeCharactersFastParser.set('%');
+ unsafeCharactersFastParser.set('{');
+ unsafeCharactersFastParser.set('}');
+ unsafeCharactersFastParser.set('|');
+ unsafeCharactersFastParser.set('\\');
+ unsafeCharactersFastParser.set('^');
+ unsafeCharactersFastParser.set('~');
+ unsafeCharactersFastParser.set('[');
+ unsafeCharactersFastParser.set(']');
+ unsafeCharactersFastParser.set('`');
+ // we allow # as a safe when using the fast parser as its used for
+ // looking up beans in the registry (foo=#myBar)
+ }
+
+ static {
+ unsafeCharactersRfc1738 = new BitSet(15);
unsafeCharactersRfc1738.set(' ');
unsafeCharactersRfc1738.set('"');
unsafeCharactersRfc1738.set('<');
@@ -48,9 +69,9 @@ public final class UnsafeUriCharactersEncoder {
unsafeCharactersRfc1738.set(']');
unsafeCharactersRfc1738.set('`');
}
-
+
static {
- unsafeCharactersHttp = new BitSet(256);
+ unsafeCharactersHttp = new BitSet(13);
unsafeCharactersHttp.set(' ');
unsafeCharactersHttp.set('"');
unsafeCharactersHttp.set('<');
@@ -70,6 +91,25 @@ public final class UnsafeUriCharactersEncoder {
// util class
}
+ public static boolean isSafeFastParser(char ch) {
+ return !unsafeCharactersFastParser.get(ch);
+ }
+
+ public static boolean isSafeFastParser(String s) {
+ int len = s.length();
+ for (int i = 0; i < len; i++) {
+ char ch = s.charAt(i);
+ if (ch > 128) {
+ // must be an ascii char
+ return false;
+ }
+ if (unsafeCharactersFastParser.get(ch)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
public static String encode(String s) {
return encode(s, unsafeCharactersRfc1738);
}
diff --git a/core/camel-util/src/test/java/org/apache/camel/util/CamelURIParserTest.java b/core/camel-util/src/test/java/org/apache/camel/util/CamelURIParserTest.java
new file mode 100644
index 0000000..922d4d5
--- /dev/null
+++ b/core/camel-util/src/test/java/org/apache/camel/util/CamelURIParserTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.util;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class CamelURIParserTest {
+
+ @Test
+ public void testParseUri() throws Exception {
+ String[] out1 = CamelURIParser.parseUri("smtp://localhost?username=davsclaus&password=secret");
+ assertEquals("smtp", out1[0]);
+ assertEquals("localhost", out1[1]);
+ assertEquals("username=davsclaus&password=secret", out1[2]);
+ }
+
+ @Test
+ public void testParseNoSlashUri() throws Exception {
+ String[] out1 = CamelURIParser.parseUri("direct:start");
+ assertEquals("direct", out1[0]);
+ assertEquals("start", out1[1]);
+ assertEquals(null, out1[2]);
+ }
+
+ @Test
+ public void testParseUriSlashAndQuery() throws Exception {
+ String[] out1 = CamelURIParser.parseUri("file:/absolute?recursive=true");
+ assertEquals("file", out1[0]);
+ assertEquals("/absolute", out1[1]);
+ assertEquals("recursive=true", out1[2]);
+
+ String[] out2 = CamelURIParser.parseUri("file:///absolute?recursive=true");
+ assertEquals("file", out2[0]);
+ assertEquals("/absolute", out2[1]);
+ assertEquals("recursive=true", out2[2]);
+
+ String[] out3 = CamelURIParser.parseUri("file://relative?recursive=true");
+ assertEquals("file", out3[0]);
+ assertEquals("relative", out3[1]);
+ assertEquals("recursive=true", out3[2]);
+
+ String[] out4 = CamelURIParser.parseUri("file:relative?recursive=true");
+ assertEquals("file", out4[0]);
+ assertEquals("relative", out4[1]);
+ assertEquals("recursive=true", out4[2]);
+ }
+
+ @Test
+ public void testParseUriSlash() throws Exception {
+ String[] out1 = CamelURIParser.parseUri("file:/absolute");
+ assertEquals("file", out1[0]);
+ assertEquals("/absolute", out1[1]);
+ assertEquals(null, out1[2]);
+
+ String[] out2 = CamelURIParser.parseUri("file:///absolute");
+ assertEquals("file", out2[0]);
+ assertEquals("/absolute", out2[1]);
+ assertEquals(null, out2[2]);
+
+ String[] out3 = CamelURIParser.parseUri("file://relative");
+ assertEquals("file", out3[0]);
+ assertEquals("relative", out3[1]);
+ assertEquals(null, out3[2]);
+
+ String[] out4 = CamelURIParser.parseUri("file:relative");
+ assertEquals("file", out4[0]);
+ assertEquals("relative", out4[1]);
+ assertEquals(null, out4[2]);
+ }
+
+ @Test
+ public void testParseInvalid() throws Exception {
+ assertNull(CamelURIParser.parseUri("doesnotexists"));
+ assertNull(CamelURIParser.parseUri("doesnotexists:"));
+ assertNull(CamelURIParser.parseUri("doesnotexists/foo"));
+ assertNull(CamelURIParser.parseUri("doesnotexists?"));
+ }
+
+ @Test
+ public void testParseNoPathButSlash() throws Exception {
+ String[] out1 = CamelURIParser.parseUri("file:/");
+ assertEquals("file", out1[0]);
+ assertEquals("/", out1[1]);
+ assertEquals(null, out1[2]);
+
+ String[] out2 = CamelURIParser.parseUri("file:///");
+ assertEquals("file", out2[0]);
+ assertEquals("/", out2[1]);
+ assertEquals(null, out2[2]);
+ }
+
+ @Test
+ public void testParseEmptyQuery() throws Exception {
+ String[] out1 = CamelURIParser.parseUri("file:relative");
+ assertEquals("file", out1[0]);
+ assertEquals("relative", out1[1]);
+ assertEquals(null, out1[2]);
+
+ String[] out2 = CamelURIParser.parseUri("file:relative?");
+ assertEquals("file", out2[0]);
+ assertEquals("relative", out2[1]);
+ assertEquals(null, out2[2]);
+ }
+
+
+}
diff --git a/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java b/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java
index 60cad52..1837d0f 100644
--- a/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java
+++ b/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java
@@ -240,6 +240,17 @@ public class URISupportTest {
}
@Test
+ public void testNormalizeEndpointUriSort() throws Exception {
+ String out1 = URISupport.normalizeUri("smtp://localhost?to=foo&from=me");
+ assertEquals("smtp://localhost?from=me&to=foo", out1);
+
+ String out2 = URISupport.normalizeUri("smtp://localhost?from=me&to=foo");
+ assertEquals("smtp://localhost?from=me&to=foo", out2);
+
+ assertEquals(out1, out2);
+ }
+
+ @Test
public void testSanitizeAccessToken() throws Exception {
String out1 = URISupport.sanitizeUri("google-sheets-stream://spreadsheets?accessToken=MY_TOKEN&clientId=foo&clientSecret=MY_SECRET");
assertEquals("google-sheets-stream://spreadsheets?accessToken=xxxxxx&clientId=foo&clientSecret=xxxxxx", out1);
diff --git a/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java b/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java
index 8f9a6e2..f0ec54d 100644
--- a/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java
+++ b/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java
@@ -72,15 +72,52 @@ public class NormalizeUriTest {
}
@Benchmark
- public void benchmark(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+ public void benchmarkMixed(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+ // fast
+ bh.consume(URISupport.normalizeUri("log:foo?level=INFO&logMask=false&exchangeFormatter=#myFormatter"));
+ // slow
+ bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
+ // fast
+ bh.consume(URISupport.normalizeUri("file:target/inbox?recursive=true"));
+ // slow
+ bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
+ // fast
+ bh.consume(URISupport.normalizeUri("seda:foo?concurrentConsumer=2"));
+ // slow
+ bh.consume(URISupport.normalizeUri("ftp://us%40r:t%25st@localhost:21000/tmp3/camel?foo=us@r"));
+ // fast
+ bh.consume(URISupport.normalizeUri("http:www.google.com?q=Camel"));
+ // slow
+ bh.consume(URISupport.normalizeUri("ftp://us@r:t%25st@localhost:21000/tmp3/camel?foo=us@r"));
+ }
+
+ @Benchmark
+ public void benchmarkFast(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
bh.consume(URISupport.normalizeUri("log:foo"));
bh.consume(URISupport.normalizeUri("log:foo?level=INFO&logMask=false&exchangeFormatter=#myFormatter"));
+ bh.consume(URISupport.normalizeUri("file:target/inbox?recursive=true"));
bh.consume(URISupport.normalizeUri("smtp://localhost?password=secret&username=davsclaus"));
bh.consume(URISupport.normalizeUri("seda:foo?concurrentConsumer=2"));
bh.consume(URISupport.normalizeUri("irc:someserver/#camel?user=davsclaus"));
bh.consume(URISupport.normalizeUri("http:www.google.com?q=Camel"));
- bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
bh.consume(URISupport.normalizeUri("smtp://localhost?to=foo&to=bar&from=me&from=you"));
+ }
+
+ @Benchmark
+ public void benchmarkFastSorted(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+ bh.consume(URISupport.normalizeUri("log:foo"));
+ bh.consume(URISupport.normalizeUri("log:foo?exchangeFormatter=#myFormatter&level=INFO&logMask=false"));
+ bh.consume(URISupport.normalizeUri("file:target/inbox?recursive=true"));
+ bh.consume(URISupport.normalizeUri("smtp://localhost?username=davsclaus&password=secret"));
+ bh.consume(URISupport.normalizeUri("seda:foo?concurrentConsumer=2"));
+ bh.consume(URISupport.normalizeUri("irc:someserver/#camel?user=davsclaus"));
+ bh.consume(URISupport.normalizeUri("http:www.google.com?q=Camel"));
+ bh.consume(URISupport.normalizeUri("smtp://localhost?&from=me&from=you&to=foo&to=bar"));
+ }
+
+ @Benchmark
+ public void benchmarkSlow(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+ bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
bh.consume(URISupport.normalizeUri("ftp://us%40r:t%st@localhost:21000/tmp3/camel?foo=us@r"));
bh.consume(URISupport.normalizeUri("ftp://us%40r:t%25st@localhost:21000/tmp3/camel?foo=us@r"));
bh.consume(URISupport.normalizeUri("ftp://us@r:t%st@localhost:21000/tmp3/camel?foo=us@r"));
@@ -88,12 +125,10 @@ public class NormalizeUriTest {
bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=secret&serviceName=someCoolChat"));
bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW(++?w0rd)&serviceName=some chat"));
bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW(foo %% bar)&serviceName=some chat"));
- bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW{++?w0rd}&serviceName=some chat"));
- bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW{foo %% bar}&serviceName=some chat"));
}
@Benchmark
- public void sorted(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+ public void sorting(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
bh.consume(URISupport.normalizeUri("log:foo?zzz=123&xxx=222&hhh=444&aaa=tru&d=yes&cc=no&Camel=awesome&foo.hey=bar&foo.bar=blah"));
}