You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2020/03/03 15:11:50 UTC

[camel] branch master updated: CAMEL-14648: camel-core optimize - CamelURIParser for parsing almost all commong Camel endpoint URIs in a faster and lighter way than using java.net.URL.

This is an automated email from the ASF dual-hosted git repository.

davsclaus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel.git


The following commit(s) were added to refs/heads/master by this push:
     new c59bd35  CAMEL-14648: camel-core optimize - CamelURIParser for parsing almost all commong Camel endpoint URIs in a faster and lighter way than using java.net.URL.
c59bd35 is described below

commit c59bd359466ce4440bfed1de98ce2aefd1c522c3
Author: Claus Ibsen <cl...@gmail.com>
AuthorDate: Tue Mar 3 13:54:35 2020 +0100

    CAMEL-14648: camel-core optimize - CamelURIParser for parsing almost all commong Camel endpoint URIs in a faster and lighter way than using java.net.URL.
---
 .../java/org/apache/camel/util/CamelURIParser.java | 108 ++++++++++++++++++
 .../java/org/apache/camel/util/URISupport.java     |  64 +++++++++++
 .../camel/util/UnsafeUriCharactersEncoder.java     |  46 +++++++-
 .../org/apache/camel/util/CamelURIParserTest.java  | 123 +++++++++++++++++++++
 .../java/org/apache/camel/util/URISupportTest.java |  11 ++
 .../apache/camel/itest/jmh/NormalizeUriTest.java   |  45 +++++++-
 6 files changed, 389 insertions(+), 8 deletions(-)

diff --git a/core/camel-util/src/main/java/org/apache/camel/util/CamelURIParser.java b/core/camel-util/src/main/java/org/apache/camel/util/CamelURIParser.java
new file mode 100644
index 0000000..0cf88e0
--- /dev/null
+++ b/core/camel-util/src/main/java/org/apache/camel/util/CamelURIParser.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.util;
+
+/**
+ * A Camel specific URI parser that parses endpoint URIs in a quasi syntax that Camel uses.
+ *
+ * The {@link java.net.URI} is much slower and parses endpoint URIs into additional parts which
+ * Camel does not use or need.
+ */
+public final class CamelURIParser {
+
+    /**
+     * Parses the URI.
+     *
+     * If this parser cannot parse the uri then <tt>null</tt> is returned. And instead the follow code can be used:
+     * <pre>
+     *     URI u = new URI(UnsafeUriCharactersEncoder.encode(uri, true));
+     * </pre>
+     *
+     * @param uri the uri
+     *
+     * @return <tt>null</tt> if not possible to parse, or an array[3] with scheme,path,query
+     */
+    public static String[] parseUri(String uri) {
+        int schemeStart = 0;
+        int schemeEnd = 0;
+        int pathStart = 0;
+        int pathEnd = 0;
+        int queryStart = 0;
+
+        int len = uri.length();
+        for (int i = 0; i < len; i++) {
+            char ch = uri.charAt(i);
+            if (ch > 128) {
+                // must be an ascii char
+                return null;
+            }
+            // must be a safe char
+            if (!UnsafeUriCharactersEncoder.isSafeFastParser(ch)) {
+                return null;
+            }
+            if (schemeEnd == 0) {
+                if (ch == ':') {
+                    schemeEnd = i;
+                    // skip colon
+                    pathStart = i + 1;
+                }
+            } else if (pathEnd == 0) {
+                if (ch == '?') {
+                    pathEnd = i;
+                    // skip ? marker
+                    queryStart = i + 1;
+                }
+            }
+        }
+
+        if (pathStart == 0 && schemeEnd != 0) {
+            // skip colon
+            pathStart = schemeEnd + 1;
+        }
+        // invalid if there is no path anyway
+        if (pathStart >= len) {
+            return null;
+        }
+
+        String scheme = null;
+        if (schemeEnd != 0) {
+            scheme = uri.substring(schemeStart, schemeEnd);
+        }
+        if (scheme == null) {
+            return null;
+        }
+
+        String path;
+        // skip two leading slashes
+        int next = pathStart + 1;
+        if (uri.charAt(pathStart) == '/' && next < len && uri.charAt(next) == '/') {
+            pathStart = pathStart + 2;
+        }
+        if (pathEnd != 0) {
+            path = uri.substring(pathStart, pathEnd);
+        } else {
+            path = uri.substring(pathStart);
+        }
+
+        String query = null;
+        if (queryStart != 0 && queryStart < len) {
+            query = uri.substring(queryStart);
+        }
+
+        return new String[]{scheme, path, query};
+    }
+}
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java b/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java
index c194706..2df3bfc 100644
--- a/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java
+++ b/core/camel-util/src/main/java/org/apache/camel/util/URISupport.java
@@ -521,7 +521,22 @@ public final class URISupport {
      * @see #RAW_TOKEN_END
      */
     public static String normalizeUri(String uri) throws URISyntaxException, UnsupportedEncodingException {
+        // try to parse using the simpler and faster Camel URI parser
+        String[] parts = CamelURIParser.parseUri(uri);
+        if (parts != null) {
+            // use the faster and more simple normalizer
+            return doFastNormalizeUri(parts);
+        } else {
+            // use the legacy normalizer as the uri is complex and may have unsafe URL characters
+            return doComplexNormalizeUri(uri);
+        }
+    }
 
+    /**
+     * The complex (and Camel 2.x) compatible URI normalizer when the URI is more complex
+     * such as having percent encoded values, or other unsafe URL characters, or have authority user/password, etc.
+     */
+    private static String doComplexNormalizeUri(String uri) throws URISyntaxException {
         URI u = new URI(UnsafeUriCharactersEncoder.encode(uri, true));
         String scheme = u.getScheme();
         String path = u.getSchemeSpecificPart();
@@ -591,6 +606,55 @@ public final class URISupport {
         }
     }
 
+    /**
+     * The fast parser for normalizing Camel endpoint URIs when the URI is not complex and
+     * can be parsed in a much more efficient way.
+     */
+    private static String doFastNormalizeUri(String[] parts) throws URISyntaxException {
+        String scheme = parts[0];
+        String path = parts[1];
+        String query = parts[2];
+
+        // in case there are parameters we should reorder them
+        if (query == null) {
+            // no parameters then just return
+            return buildUri(scheme, path, null);
+        } else {
+            Map<String, Object> parameters = null;
+            if (query.indexOf('&') != -1) {
+                // only parse if there is parameters
+                parameters = URISupport.parseQuery(query, false, false);
+            }
+            if (parameters == null || parameters.size() == 1) {
+                return buildUri(scheme, path, query);
+            } else {
+                // reorder parameters a..z
+                // optimize and only build new query if the keys was resorted
+                boolean sort = false;
+                String prev = null;
+                for (String key : parameters.keySet()) {
+                    if (prev == null) {
+                        prev = key;
+                    } else {
+                        int comp = key.compareTo(prev);
+                        if (comp < 0) {
+                            sort = true;
+                            break;
+                        }
+                    }
+                }
+                if (sort) {
+                    List<String> keys = new ArrayList<>(parameters.keySet());
+                    keys.sort(null);
+                    // rebuild query with sorted parameters
+                    query = URISupport.createQueryString(keys, parameters);
+                }
+
+                return buildUri(scheme, path, query);
+            }
+        }
+    }
+
     private static String buildUri(String scheme, String path, String query) {
         // must include :// to do a correct URI all components can work with
         int len = scheme.length() + 3 + path.length();
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java b/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java
index e681ee9..4b3eb85 100644
--- a/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java
+++ b/core/camel-util/src/main/java/org/apache/camel/util/UnsafeUriCharactersEncoder.java
@@ -25,13 +25,34 @@ import java.util.List;
  * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
  */
 public final class UnsafeUriCharactersEncoder {
+    private static BitSet unsafeCharactersFastParser;
     private static BitSet unsafeCharactersRfc1738;
     private static BitSet unsafeCharactersHttp;
     private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
                                               'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
 
     static {
-        unsafeCharactersRfc1738 = new BitSet(256);
+        unsafeCharactersFastParser = new BitSet(14);
+        unsafeCharactersFastParser.set(' ');
+        unsafeCharactersFastParser.set('"');
+        unsafeCharactersFastParser.set('<');
+        unsafeCharactersFastParser.set('>');
+        unsafeCharactersFastParser.set('%');
+        unsafeCharactersFastParser.set('{');
+        unsafeCharactersFastParser.set('}');
+        unsafeCharactersFastParser.set('|');
+        unsafeCharactersFastParser.set('\\');
+        unsafeCharactersFastParser.set('^');
+        unsafeCharactersFastParser.set('~');
+        unsafeCharactersFastParser.set('[');
+        unsafeCharactersFastParser.set(']');
+        unsafeCharactersFastParser.set('`');
+        // we allow # as a safe when using the fast parser as its used for
+        // looking up beans in the registry (foo=#myBar)
+    }
+
+    static {
+        unsafeCharactersRfc1738 = new BitSet(15);
         unsafeCharactersRfc1738.set(' ');
         unsafeCharactersRfc1738.set('"');
         unsafeCharactersRfc1738.set('<');
@@ -48,9 +69,9 @@ public final class UnsafeUriCharactersEncoder {
         unsafeCharactersRfc1738.set(']');
         unsafeCharactersRfc1738.set('`');
     }
-    
+
     static {
-        unsafeCharactersHttp = new BitSet(256);
+        unsafeCharactersHttp = new BitSet(13);
         unsafeCharactersHttp.set(' ');
         unsafeCharactersHttp.set('"');
         unsafeCharactersHttp.set('<');
@@ -70,6 +91,25 @@ public final class UnsafeUriCharactersEncoder {
         // util class
     }
 
+    public static boolean isSafeFastParser(char ch) {
+        return !unsafeCharactersFastParser.get(ch);
+    }
+
+    public static boolean isSafeFastParser(String s) {
+        int len = s.length();
+        for (int i = 0; i < len; i++) {
+            char ch = s.charAt(i);
+            if (ch > 128) {
+                // must be an ascii char
+                return false;
+            }
+            if (unsafeCharactersFastParser.get(ch)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     public static String encode(String s) {
         return encode(s, unsafeCharactersRfc1738);
     }
diff --git a/core/camel-util/src/test/java/org/apache/camel/util/CamelURIParserTest.java b/core/camel-util/src/test/java/org/apache/camel/util/CamelURIParserTest.java
new file mode 100644
index 0000000..922d4d5
--- /dev/null
+++ b/core/camel-util/src/test/java/org/apache/camel/util/CamelURIParserTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.util;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class CamelURIParserTest {
+
+    @Test
+    public void testParseUri() throws Exception {
+        String[] out1 = CamelURIParser.parseUri("smtp://localhost?username=davsclaus&password=secret");
+        assertEquals("smtp", out1[0]);
+        assertEquals("localhost", out1[1]);
+        assertEquals("username=davsclaus&password=secret", out1[2]);
+    }
+
+    @Test
+    public void testParseNoSlashUri() throws Exception {
+        String[] out1 = CamelURIParser.parseUri("direct:start");
+        assertEquals("direct", out1[0]);
+        assertEquals("start", out1[1]);
+        assertEquals(null, out1[2]);
+    }
+
+    @Test
+    public void testParseUriSlashAndQuery() throws Exception {
+        String[] out1 = CamelURIParser.parseUri("file:/absolute?recursive=true");
+        assertEquals("file", out1[0]);
+        assertEquals("/absolute", out1[1]);
+        assertEquals("recursive=true", out1[2]);
+
+        String[] out2 = CamelURIParser.parseUri("file:///absolute?recursive=true");
+        assertEquals("file", out2[0]);
+        assertEquals("/absolute", out2[1]);
+        assertEquals("recursive=true", out2[2]);
+
+        String[] out3 = CamelURIParser.parseUri("file://relative?recursive=true");
+        assertEquals("file", out3[0]);
+        assertEquals("relative", out3[1]);
+        assertEquals("recursive=true", out3[2]);
+
+        String[] out4 = CamelURIParser.parseUri("file:relative?recursive=true");
+        assertEquals("file", out4[0]);
+        assertEquals("relative", out4[1]);
+        assertEquals("recursive=true", out4[2]);
+    }
+
+    @Test
+    public void testParseUriSlash() throws Exception {
+        String[] out1 = CamelURIParser.parseUri("file:/absolute");
+        assertEquals("file", out1[0]);
+        assertEquals("/absolute", out1[1]);
+        assertEquals(null, out1[2]);
+
+        String[] out2 = CamelURIParser.parseUri("file:///absolute");
+        assertEquals("file", out2[0]);
+        assertEquals("/absolute", out2[1]);
+        assertEquals(null, out2[2]);
+
+        String[] out3 = CamelURIParser.parseUri("file://relative");
+        assertEquals("file", out3[0]);
+        assertEquals("relative", out3[1]);
+        assertEquals(null, out3[2]);
+
+        String[] out4 = CamelURIParser.parseUri("file:relative");
+        assertEquals("file", out4[0]);
+        assertEquals("relative", out4[1]);
+        assertEquals(null, out4[2]);
+    }
+
+    @Test
+    public void testParseInvalid() throws Exception {
+        assertNull(CamelURIParser.parseUri("doesnotexists"));
+        assertNull(CamelURIParser.parseUri("doesnotexists:"));
+        assertNull(CamelURIParser.parseUri("doesnotexists/foo"));
+        assertNull(CamelURIParser.parseUri("doesnotexists?"));
+    }
+
+    @Test
+    public void testParseNoPathButSlash() throws Exception {
+        String[] out1 = CamelURIParser.parseUri("file:/");
+        assertEquals("file", out1[0]);
+        assertEquals("/", out1[1]);
+        assertEquals(null, out1[2]);
+
+        String[] out2 = CamelURIParser.parseUri("file:///");
+        assertEquals("file", out2[0]);
+        assertEquals("/", out2[1]);
+        assertEquals(null, out2[2]);
+    }
+
+    @Test
+    public void testParseEmptyQuery() throws Exception {
+        String[] out1 = CamelURIParser.parseUri("file:relative");
+        assertEquals("file", out1[0]);
+        assertEquals("relative", out1[1]);
+        assertEquals(null, out1[2]);
+
+        String[] out2 = CamelURIParser.parseUri("file:relative?");
+        assertEquals("file", out2[0]);
+        assertEquals("relative", out2[1]);
+        assertEquals(null, out2[2]);
+    }
+
+
+}
diff --git a/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java b/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java
index 60cad52..1837d0f 100644
--- a/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java
+++ b/core/camel-util/src/test/java/org/apache/camel/util/URISupportTest.java
@@ -240,6 +240,17 @@ public class URISupportTest {
     }
 
     @Test
+    public void testNormalizeEndpointUriSort() throws Exception {
+        String out1 = URISupport.normalizeUri("smtp://localhost?to=foo&from=me");
+        assertEquals("smtp://localhost?from=me&to=foo", out1);
+
+        String out2 = URISupport.normalizeUri("smtp://localhost?from=me&to=foo");
+        assertEquals("smtp://localhost?from=me&to=foo", out2);
+
+        assertEquals(out1, out2);
+    }
+
+    @Test
     public void testSanitizeAccessToken() throws Exception {
         String out1 = URISupport.sanitizeUri("google-sheets-stream://spreadsheets?accessToken=MY_TOKEN&clientId=foo&clientSecret=MY_SECRET");
         assertEquals("google-sheets-stream://spreadsheets?accessToken=xxxxxx&clientId=foo&clientSecret=xxxxxx", out1);
diff --git a/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java b/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java
index 8f9a6e2..f0ec54d 100644
--- a/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java
+++ b/tests/camel-jmh/src/test/java/org/apache/camel/itest/jmh/NormalizeUriTest.java
@@ -72,15 +72,52 @@ public class NormalizeUriTest {
     }
 
     @Benchmark
-    public void benchmark(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+    public void benchmarkMixed(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+        // fast
+        bh.consume(URISupport.normalizeUri("log:foo?level=INFO&logMask=false&exchangeFormatter=#myFormatter"));
+        // slow
+        bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
+        // fast
+        bh.consume(URISupport.normalizeUri("file:target/inbox?recursive=true"));
+        // slow
+        bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
+        // fast
+        bh.consume(URISupport.normalizeUri("seda:foo?concurrentConsumer=2"));
+        // slow
+        bh.consume(URISupport.normalizeUri("ftp://us%40r:t%25st@localhost:21000/tmp3/camel?foo=us@r"));
+        // fast
+        bh.consume(URISupport.normalizeUri("http:www.google.com?q=Camel"));
+        // slow
+        bh.consume(URISupport.normalizeUri("ftp://us@r:t%25st@localhost:21000/tmp3/camel?foo=us@r"));
+    }
+
+    @Benchmark
+    public void benchmarkFast(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
         bh.consume(URISupport.normalizeUri("log:foo"));
         bh.consume(URISupport.normalizeUri("log:foo?level=INFO&logMask=false&exchangeFormatter=#myFormatter"));
+        bh.consume(URISupport.normalizeUri("file:target/inbox?recursive=true"));
         bh.consume(URISupport.normalizeUri("smtp://localhost?password=secret&username=davsclaus"));
         bh.consume(URISupport.normalizeUri("seda:foo?concurrentConsumer=2"));
         bh.consume(URISupport.normalizeUri("irc:someserver/#camel?user=davsclaus"));
         bh.consume(URISupport.normalizeUri("http:www.google.com?q=Camel"));
-        bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
         bh.consume(URISupport.normalizeUri("smtp://localhost?to=foo&to=bar&from=me&from=you"));
+    }
+
+    @Benchmark
+    public void benchmarkFastSorted(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+        bh.consume(URISupport.normalizeUri("log:foo"));
+        bh.consume(URISupport.normalizeUri("log:foo?exchangeFormatter=#myFormatter&level=INFO&logMask=false"));
+        bh.consume(URISupport.normalizeUri("file:target/inbox?recursive=true"));
+        bh.consume(URISupport.normalizeUri("smtp://localhost?username=davsclaus&password=secret"));
+        bh.consume(URISupport.normalizeUri("seda:foo?concurrentConsumer=2"));
+        bh.consume(URISupport.normalizeUri("irc:someserver/#camel?user=davsclaus"));
+        bh.consume(URISupport.normalizeUri("http:www.google.com?q=Camel"));
+        bh.consume(URISupport.normalizeUri("smtp://localhost?&from=me&from=you&to=foo&to=bar"));
+    }
+
+    @Benchmark
+    public void benchmarkSlow(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+        bh.consume(URISupport.normalizeUri("http://www.google.com?q=S%C3%B8ren%20Hansen"));
         bh.consume(URISupport.normalizeUri("ftp://us%40r:t%st@localhost:21000/tmp3/camel?foo=us@r"));
         bh.consume(URISupport.normalizeUri("ftp://us%40r:t%25st@localhost:21000/tmp3/camel?foo=us@r"));
         bh.consume(URISupport.normalizeUri("ftp://us@r:t%st@localhost:21000/tmp3/camel?foo=us@r"));
@@ -88,12 +125,10 @@ public class NormalizeUriTest {
         bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=secret&serviceName=someCoolChat"));
         bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW(++?w0rd)&serviceName=some chat"));
         bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW(foo %% bar)&serviceName=some chat"));
-        bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW{++?w0rd}&serviceName=some chat"));
-        bh.consume(URISupport.normalizeUri("xmpp://camel-user@localhost:123/test-user@localhost?password=RAW{foo %% bar}&serviceName=some chat"));
     }
 
     @Benchmark
-    public void sorted(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
+    public void sorting(ContainsIgnoreCaseTest.BenchmarkState state, Blackhole bh) throws Exception {
         bh.consume(URISupport.normalizeUri("log:foo?zzz=123&xxx=222&hhh=444&aaa=tru&d=yes&cc=no&Camel=awesome&foo.hey=bar&foo.bar=blah"));
     }