You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/03/30 08:56:37 UTC

[impala] branch master updated: IMPALA-10946: Replaced URLDecoder with Hive's unescape

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new b1c1be1  IMPALA-10946: Replaced URLDecoder with Hive's unescape
b1c1be1 is described below

commit b1c1be12f3cceed48e93eddae8b9512737e3e0d2
Author: Gergely Fürnstáhl <gf...@cloudera.com>
AuthorDate: Mon Mar 28 14:46:08 2022 +0200

    IMPALA-10946: Replaced URLDecoder with Hive's unescape
    
    Hive is using its own implementation of escaping and unescaping
    partition pathes, which is similar to URL decoding, but not the same,
    for example it does not escape '+'.
    
    Modified Impala to use the same unescaping algorithm to prevent creating
    phantom partitions in HMS.
    
    Testing:
    
    The reproduction in the Jira is fixed, "abc " is no longer created in
    case "abc+" partition is recovered. "abc\" still works as intended.
    
    Change-Id: Ie821a0795d99eb9aa95131323917c9a3d0e8e1ec
    Reviewed-on: http://gerrit.cloudera.org:8080/18274
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/catalog/HdfsTable.java  | 46 +++++++++-------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 4978e39..95917a1 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -19,8 +19,6 @@ package org.apache.impala.catalog;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -53,6 +51,7 @@ import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.utils.FileUtils;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.LiteralExpr;
@@ -2469,8 +2468,7 @@ public class HdfsTable extends Table implements FeFsTable {
    * keys of this table. This is useful to convert the string values which are received
    * from metastore events to {@link LiteralExpr}.
    */
-  public List<LiteralExpr> getTypeCompatiblePartValues(List<String> values)
-      throws UnsupportedEncodingException {
+  public List<LiteralExpr> getTypeCompatiblePartValues(List<String> values) {
     List<LiteralExpr> result = new ArrayList<>();
     List<Column> partitionColumns = getClusteringColumns();
     Preconditions.checkState(partitionColumns.size() == values.size());
@@ -2494,8 +2492,8 @@ public class HdfsTable extends Table implements FeFsTable {
    * original value, the second element is the LiteralExpr created from the original
    * value.
    */
-  private Pair<String, LiteralExpr> getTypeCompatibleValue(Path path,
-      String partitionKey) throws UnsupportedEncodingException {
+  private Pair<String, LiteralExpr> getTypeCompatibleValue(
+      Path path, String partitionKey) {
     String partName[] = path.getName().split("=");
     if (partName.length != 2 || !partName[0].equals(partitionKey)) return null;
 
@@ -2513,13 +2511,12 @@ public class HdfsTable extends Table implements FeFsTable {
    * @param type Type of the partition column
    * @return Pair which contains the partition value and its equivalent
    * {@link LiteralExpr} according to the type provided.
-   * @throws UnsupportedEncodingException
    */
-  private Pair<String, LiteralExpr> getPartitionExprFromValue(String partValue, Type type)
-      throws UnsupportedEncodingException {
+  private Pair<String, LiteralExpr> getPartitionExprFromValue(
+      String partValue, Type type) {
     LiteralExpr expr;
     // URL decode the partition value since it may contain encoded URL.
-    String value = URLDecoder.decode(partValue, StandardCharsets.UTF_8.name());
+    String value = FileUtils.unescapePathName(partValue);
     if (!value.equals(getNullPartitionKeyValue())) {
       try {
         expr = LiteralExpr.createFromUnescapedStr(value, type);
@@ -2745,7 +2742,7 @@ public class HdfsTable extends Table implements FeFsTable {
       // them in the result of getPartitionsByNames.
       throw new TableLoadingException(
           "Error when reloading partitions for table " + getFullName(), e);
-    } catch (TException | UnsupportedEncodingException e2) {
+    } catch (TException e2) {
       throw new CatalogException(
           "Unexpected error while retrieving partitions for table " + getFullName(), e2);
     }
@@ -2769,24 +2766,19 @@ public class HdfsTable extends Table implements FeFsTable {
         + "held before reloadPartitionsFromEvent");
     LOG.info("Reloading partition metadata for table: {} ({})", getFullName(), reason);
     Map<Partition, HdfsPartition> hmsPartToHdfsPart = new HashMap<>();
-    try {
-      for (Partition partition : partsFromEvent) {
-        List<LiteralExpr> partExprs = getTypeCompatiblePartValues(partition.getValues());
-        HdfsPartition hdfsPartition = getPartition(partExprs);
-        // only reload partitions that have more recent write id
-        if (hdfsPartition != null &&
-            (!AcidUtils.isTransactionalTable(msTable_.getParameters())
-                || hdfsPartition.getWriteId() <= MetastoreShim
-                .getWriteIdFromMSPartition(partition))) {
-          hmsPartToHdfsPart.put(partition, hdfsPartition);
-        }
+    for (Partition partition : partsFromEvent) {
+      List<LiteralExpr> partExprs = getTypeCompatiblePartValues(partition.getValues());
+      HdfsPartition hdfsPartition = getPartition(partExprs);
+      // only reload partitions that have more recent write id
+      if (hdfsPartition != null
+          && (!AcidUtils.isTransactionalTable(msTable_.getParameters())
+                 || hdfsPartition.getWriteId()
+                     <= MetastoreShim.getWriteIdFromMSPartition(partition))) {
+        hmsPartToHdfsPart.put(partition, hdfsPartition);
       }
-      reloadPartitions(client, hmsPartToHdfsPart, loadFileMetadata);
-      return hmsPartToHdfsPart.size();
-    } catch (UnsupportedEncodingException e) {
-      throw new CatalogException(
-          "Unexpected error while retrieving partitions for table " + getFullName(), e);
     }
+    reloadPartitions(client, hmsPartToHdfsPart, loadFileMetadata);
+    return hmsPartToHdfsPart.size();
   }
 
   /**