You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hc.apache.org by ol...@apache.org on 2023/02/25 08:27:42 UTC

[httpcomponents-core] 02/02: Add support for Internationalized Domain Names (IDN)

This is an automated email from the ASF dual-hosted git repository.

olegk pushed a commit to branch 5.3.x
in repository https://gitbox.apache.org/repos/asf/httpcomponents-core.git

commit a990a75f3a7cfaa3bc3970038e0fc1d29ff198a4
Author: Arturo Bernal <ar...@gmail.com>
AuthorDate: Fri Feb 24 22:18:55 2023 +0100

    Add support for Internationalized Domain Names (IDN)
    
    This change updates the URI parsing logic to correctly handle non-ASCII domain names using the Internationalized Domain Names (IDN) system. When parsing a URI, the code now checks if the domain name contains non-ASCII characters, and if so, it converts the domain name to its ASCII-compatible encoding using the IDN.toASCII() method.
    
    This allows applications using this code to correctly handle URLs with non-ASCII domain names, which are increasingly common in today's globalized internet.
---
 .../src/main/java/org/apache/hc/core5/net/Host.java    |  6 +++++-
 .../main/java/org/apache/hc/core5/util/TextUtils.java  | 18 ++++++++++++++++++
 .../http/examples/ClassicPostExecutionExample.java     |  2 +-
 .../core5/http/support/TestBasicMessageBuilders.java   |  8 ++++++++
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/Host.java b/httpcore5/src/main/java/org/apache/hc/core5/net/Host.java
index 3ef6192f1..541692bed 100644
--- a/httpcore5/src/main/java/org/apache/hc/core5/net/Host.java
+++ b/httpcore5/src/main/java/org/apache/hc/core5/net/Host.java
@@ -27,6 +27,7 @@
 package org.apache.hc.core5.net;
 
 import java.io.Serializable;
+import java.net.IDN;
 import java.net.URISyntaxException;
 
 import org.apache.hc.core5.annotation.Contract;
@@ -59,7 +60,7 @@ public final class Host implements NamedEndpoint, Serializable {
 
     static Host parse(final CharSequence s, final Tokenizer.Cursor cursor) throws URISyntaxException {
         final Tokenizer tokenizer = Tokenizer.INSTANCE;
-        final String hostName;
+        String hostName;
         final boolean ipv6Brackets = !cursor.atEnd() && s.charAt(cursor.getPos()) == '[';
         if (ipv6Brackets) {
             cursor.updatePos(cursor.getPos() + 1);
@@ -73,6 +74,9 @@ public final class Host implements NamedEndpoint, Serializable {
             }
         } else {
             hostName = tokenizer.parseContent(s, cursor, URISupport.PORT_SEPARATORS);
+            if (!TextUtils.isAllASCII(hostName)) {
+                hostName = IDN.toASCII(hostName);
+            }
         }
         String portText = null;
         if (!cursor.atEnd() && s.charAt(cursor.getPos()) == ':') {
diff --git a/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java b/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java
index a8b6a6e28..867cb3786 100644
--- a/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java
+++ b/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java
@@ -141,4 +141,22 @@ public final class TextUtils {
         return s.toLowerCase(Locale.ROOT);
     }
 
+
+    /**
+     * Determines whether the given {@link CharSequence} contains only ASCII characters.
+     *
+     * @param s the {@link CharSequence} to check
+     * @return true if the {@link CharSequence} contains only ASCII characters, false otherwise
+     * @throws IllegalArgumentException if the input {@link CharSequence} is null
+     * @since 5.3
+     */
+    public static boolean isAllASCII(final CharSequence s) {
+        for (int i = 0; i < s.length(); i++) {
+            if (s.charAt(i) > 0x7F) {
+                return false;
+            }
+        }
+        return true;
+    }
+
 }
diff --git a/httpcore5/src/test/java/org/apache/hc/core5/http/examples/ClassicPostExecutionExample.java b/httpcore5/src/test/java/org/apache/hc/core5/http/examples/ClassicPostExecutionExample.java
index e574def45..fa79ae635 100644
--- a/httpcore5/src/test/java/org/apache/hc/core5/http/examples/ClassicPostExecutionExample.java
+++ b/httpcore5/src/test/java/org/apache/hc/core5/http/examples/ClassicPostExecutionExample.java
@@ -106,7 +106,7 @@ public class ClassicPostExecutionExample {
 
         final String requestUri = "/post";
         for (int i = 0; i < requestBodies.length; i++) {
-            final ClassicHttpRequest request = ClassicRequestBuilder.post()
+            final ClassicHttpRequest request = ClassicRequestBuilder.get()
                     .setHttpHost(target)
                     .setPath(requestUri)
                     .build();
diff --git a/httpcore5/src/test/java/org/apache/hc/core5/http/support/TestBasicMessageBuilders.java b/httpcore5/src/test/java/org/apache/hc/core5/http/support/TestBasicMessageBuilders.java
index 9c91c8ec6..a3c4705fb 100644
--- a/httpcore5/src/test/java/org/apache/hc/core5/http/support/TestBasicMessageBuilders.java
+++ b/httpcore5/src/test/java/org/apache/hc/core5/http/support/TestBasicMessageBuilders.java
@@ -28,6 +28,7 @@
 package org.apache.hc.core5.http.support;
 
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import org.apache.hc.core5.http.HeaderMatcher;
 import org.apache.hc.core5.http.HeadersMatcher;
@@ -237,4 +238,11 @@ public class TestBasicMessageBuilders {
                 new BasicHeader("h1", "v1"), new BasicHeader("h1", "v2"), new BasicHeader("h2", "v2")));
     }
 
+    @Test
+    void testIDNIntegration() {
+        final String url = "http://müller.example.com:8080/path";
+        final HttpRequest request = new BasicHttpRequest(Method.GET, URI.create(url));
+        assertEquals(new URIAuthority("xn--mller-kva.example.com",8080), request.getAuthority());
+    }
+
 }