You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2019/03/13 00:31:10 UTC

[asterixdb] 02/04: [NO ISSUE][HTTP] Character encoding fixes

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit ac5b498f27ae6e9673a7c5ce1f9e7085075a1f66
Author: Michael Blow <mb...@apache.org>
AuthorDate: Tue Mar 12 08:04:04 2019 -0400

    [NO ISSUE][HTTP] Character encoding fixes
    
    - Support alternate charset for application/x-www-form-urlencoded requests
    - Use a random charset in TestExecutor for each query request
    
    Change-Id: I4a982f7c6c34bb32652c1bdd9b546780a2d967d0
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/3265
    Sonar-Qube: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Murtadha Hubail <mh...@apache.org>
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
---
 .../apache/asterix/test/common/TestExecutor.java   | 60 ++++++++----------
 hyracks-fullstack/hyracks/hyracks-http/pom.xml     |  2 -
 .../hyracks/http/server/FormUrlEncodedRequest.java | 74 ++++------------------
 .../apache/hyracks/http/server/utils/HttpUtil.java | 12 +---
 4 files changed, 41 insertions(+), 107 deletions(-)

diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
index 4129b5a..4d9ceeb 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
@@ -39,16 +39,18 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
@@ -143,6 +145,7 @@ public class TestExecutor {
     public static final int TRUNCATE_THRESHOLD = 16384;
     public static final Set<String> NON_CANCELLABLE =
             Collections.unmodifiableSet(new HashSet<>(Arrays.asList("store", "validate")));
+    private static final int MAX_NON_UTF_8_STATEMENT_SIZE = 64 * 1024;
 
     private final IPollTask plainExecutor = this::executeTestFile;
 
@@ -156,7 +159,8 @@ public class TestExecutor {
     private static Map<String, InetSocketAddress> ncEndPoints;
     private static Map<String, InetSocketAddress> replicationAddress;
 
-    private static final List<Charset> charsetsRemaining = new ArrayList<>();
+    private final List<Charset> allCharsets;
+    private final Queue<Charset> charsetsRemaining = new ArrayDeque<>();
 
     /*
      * Instance members
@@ -181,6 +185,10 @@ public class TestExecutor {
 
     public TestExecutor(List<InetSocketAddress> endpoints) {
         this.endpoints = endpoints;
+        this.allCharsets = Stream
+                .of("UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE", "x-UTF-32BE-BOM",
+                        "x-UTF-32LE-BOM", "x-UTF-16LE-BOM")
+                .filter(Charset::isSupported).map(Charset::forName).collect(Collectors.toList());
     }
 
     public void setLibrarian(IExternalUDFLibrarian librarian) {
@@ -612,33 +620,20 @@ public class TestExecutor {
         return response.getEntity().getContent();
     }
 
-    private Charset selectCharset(File result) throws IOException {
-        // choose an encoding that works for this input
-        return selectCharset(FileUtils.readFileToString(result, UTF_8));
+    public synchronized void setAvailableCharsets(Charset... charsets) {
+        allCharsets.clear();
+        allCharsets.addAll(Arrays.asList(charsets));
+        charsetsRemaining.clear();
     }
 
-    private Charset selectCharset(String payload) {
-        // choose an encoding that works for this input
-        return nextCharset(charset -> canEncodeDecode(charset, payload));
-    }
-
-    public static Charset nextCharset(Predicate<Charset> test) {
-        synchronized (charsetsRemaining) {
-            while (true) {
-                for (Iterator<Charset> iter = charsetsRemaining.iterator(); iter.hasNext();) {
-                    Charset next = iter.next();
-                    if (test.test(next)) {
-                        iter.remove();
-                        return next;
-                    }
-                }
-                List<Charset> allCharsets = Stream
-                        .of("UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE",
-                                "x-UTF-32BE-BOM", "x-UTF-32LE-BOM", "x-UTF-16LE-BOM")
-                        .filter(Charset::isSupported).map(Charset::forName).collect(Collectors.toList());
-                Collections.shuffle(allCharsets);
-                charsetsRemaining.addAll(allCharsets);
+    private synchronized Charset nextCharset() {
+        while (true) {
+            Charset nextCharset = charsetsRemaining.poll();
+            if (nextCharset != null) {
+                return nextCharset;
             }
+            Collections.shuffle(allCharsets);
+            charsetsRemaining.addAll(allCharsets);
         }
     }
 
@@ -739,12 +734,12 @@ public class TestExecutor {
             for (Parameter param : upsertParam(otherParams, stmtParam, ParameterTypeEnum.STRING, statement)) {
                 builder.addParameter(param.getName(), param.getValue());
             }
-            builder.addParameter(stmtParam, statement);
+            builder.setCharset(statement.length() > MAX_NON_UTF_8_STATEMENT_SIZE ? UTF_8 : nextCharset());
         } else {
             // this seems pretty bad - we should probably fix the API and not the client
-            builder.setEntity(new StringEntity(statement, UTF_8));
+            builder.setEntity(new StringEntity(statement,
+                    statement.length() > MAX_NON_UTF_8_STATEMENT_SIZE ? UTF_8 : nextCharset()));
         }
-        builder.setCharset(UTF_8);
         return builder.build();
     }
 
@@ -775,11 +770,12 @@ public class TestExecutor {
             }
         }
         try {
-            builder.setEntity(new StringEntity(om.writeValueAsString(content), ContentType.APPLICATION_JSON));
+            builder.setEntity(new StringEntity(om.writeValueAsString(content),
+                    ContentType.create(ContentType.APPLICATION_JSON.getMimeType(),
+                            statement.length() > MAX_NON_UTF_8_STATEMENT_SIZE ? UTF_8 : nextCharset())));
         } catch (JsonProcessingException e) {
             e.printStackTrace();
         }
-        builder.setCharset(UTF_8);
         return builder.build();
     }
 
@@ -1253,7 +1249,7 @@ public class TestExecutor {
         URI uri = testFile.getName().endsWith("aql") ? getEndpoint(Servlets.QUERY_AQL)
                 : getEndpoint(Servlets.QUERY_SERVICE);
         boolean isJsonEncoded = isJsonEncoded(extractHttpRequestType(statement));
-        Charset responseCharset = expectedResultFile == null ? UTF_8 : selectCharset(expectedResultFile);
+        Charset responseCharset = expectedResultFile == null ? UTF_8 : nextCharset();
         InputStream resultStream;
         if (DELIVERY_IMMEDIATE.equals(delivery)) {
             resultStream = executeQueryService(statement, fmt, uri, params, isJsonEncoded, responseCharset, null,
diff --git a/hyracks-fullstack/hyracks/hyracks-http/pom.xml b/hyracks-fullstack/hyracks/hyracks-http/pom.xml
index 46e2004..9bfbfc2 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-http/pom.xml
@@ -54,12 +54,10 @@
     <dependency>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpcore</artifactId>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpclient</artifactId>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java
index 4609967..05a7e5e 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/FormUrlEncodedRequest.java
@@ -18,82 +18,32 @@
  */
 package org.apache.hyracks.http.server;
 
-import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
+import org.apache.http.client.utils.URLEncodedUtils;
 import org.apache.hyracks.http.api.IServletRequest;
 import org.apache.hyracks.http.server.utils.HttpUtil;
 
 import io.netty.handler.codec.http.FullHttpRequest;
 import io.netty.handler.codec.http.QueryStringDecoder;
-import io.netty.handler.codec.http.multipart.Attribute;
-import io.netty.handler.codec.http.multipart.HttpPostRequestDecoder;
-import io.netty.handler.codec.http.multipart.InterfaceHttpData;
-import io.netty.handler.codec.http.multipart.MixedAttribute;
 
 public class FormUrlEncodedRequest extends BaseRequest implements IServletRequest {
 
-    private final List<String> names;
-    private final List<String> values;
-
-    public static IServletRequest create(FullHttpRequest request) throws IOException {
-        List<String> names = new ArrayList<>();
-        List<String> values = new ArrayList<>();
-        HttpPostRequestDecoder decoder = new HttpPostRequestDecoder(request);
-        try {
-            List<InterfaceHttpData> bodyHttpDatas = decoder.getBodyHttpDatas();
-            for (InterfaceHttpData data : bodyHttpDatas) {
-                if (data.getHttpDataType().equals(InterfaceHttpData.HttpDataType.Attribute)) {
-                    Attribute attr = (MixedAttribute) data;
-                    names.add(data.getName());
-                    values.add(attr.getValue());
-                }
-            }
-        } finally {
-            decoder.destroy();
-        }
-        return new FormUrlEncodedRequest(request, new QueryStringDecoder(request.uri()).parameters(), names, values);
+    public static IServletRequest create(FullHttpRequest request) {
+        Charset charset = HttpUtil.getRequestCharset(request);
+        Map<String, List<String>> parameters = new LinkedHashMap<>();
+        URLEncodedUtils.parse(request.content().toString(charset), charset).forEach(
+                pair -> parameters.computeIfAbsent(pair.getName(), a -> new ArrayList<>()).add(pair.getValue()));
+        new QueryStringDecoder(request.uri()).parameters()
+                .forEach((name, value) -> parameters.computeIfAbsent(name, a -> new ArrayList<>()).addAll(value));
+        return new FormUrlEncodedRequest(request, parameters);
     }
 
-    protected FormUrlEncodedRequest(FullHttpRequest request, Map<String, List<String>> parameters, List<String> names,
-            List<String> values) {
+    private FormUrlEncodedRequest(FullHttpRequest request, Map<String, List<String>> parameters) {
         super(request, parameters);
-        this.names = names;
-        this.values = values;
-    }
-
-    @Override
-    public String getParameter(CharSequence name) {
-        for (int i = 0; i < names.size(); i++) {
-            if (name.equals(names.get(i))) {
-                return values.get(i);
-            }
-        }
-        return HttpUtil.getParameter(parameters, name);
-    }
-
-    @Override
-    public Set<String> getParameterNames() {
-        HashSet<String> paramNames = new HashSet<>();
-        paramNames.addAll(parameters.keySet());
-        paramNames.addAll(names);
-        return Collections.unmodifiableSet(paramNames);
-    }
-
-    @Override
-    public Map<String, String> getParameters() {
-        HashMap<String, String> paramMap = new HashMap<>();
-        paramMap.putAll(super.getParameters());
-        for (int i = 0; i < names.size(); i++) {
-            paramMap.put(names.get(i), values.get(i));
-        }
-
-        return Collections.unmodifiableMap(paramMap);
     }
 }
diff --git a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
index 6e4a273..5326019 100644
--- a/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-http/src/main/java/org/apache/hyracks/http/server/utils/HttpUtil.java
@@ -73,17 +73,7 @@ public class HttpUtil {
 
     public static String getParameter(Map<String, List<String>> parameters, CharSequence name) {
         List<String> parameter = parameters.get(String.valueOf(name));
-        if (parameter == null) {
-            return null;
-        } else if (parameter.size() == 1) {
-            return parameter.get(0);
-        } else {
-            StringBuilder aString = new StringBuilder(parameter.get(0));
-            for (int i = 1; i < parameter.size(); i++) {
-                aString.append(",").append(parameter.get(i));
-            }
-            return aString.toString();
-        }
+        return parameter == null ? null : String.join(",", parameter);
     }
 
     public static IServletRequest toServletRequest(FullHttpRequest request) throws IOException {