You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/03/20 05:26:29 UTC

[impala] 03/03: IMPALA-9492: Fix test_unescaped_string_partition failing on S3

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 030c12ab2c995a8618dcf601296e310203f70f8c
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Wed Mar 18 20:23:33 2020 +0800

    IMPALA-9492: Fix test_unescaped_string_partition failing on S3
    
    test_unescaped_string_partition in metadata/test_recover_partitions.py
    use hdfs clients to create four partition directories with special
    characters, i.e. single quote, double quotes and back slash. It aims to
    test on whether ALTER TABLE RECOVER PARTITIONS can recognize those
    directories correctly. However, when running against s3, only two
    directories are created as expected, which causes the failure.
    
    The reason is that when running against s3, we use hadoop cli for
    operations. A shell command will be launched for each operation. Passing
    arguments through shell results in duplicate unescaping. So the 4 dirs,
    [p=', p=", p=\', p=\"] finally became [p=', p=", p=', p="], resulting in
    two distinct directories. When the test running against hdfs, we use
    webhdfs_client so don't have this issue.
    
    Actually, we shouldn't use special characters in partition paths. Hive
    converts them to their ascii hex values when creating partition
    directories. E.g. partition paths of [p=', p=", p=\', p=\"] are
    [p=%27, p=%22, p=%5C%27, p=%5C%22]. We should follow this rule when
    creating directories in test. Also we won't have the above shell issue
    on s3 anymore.
    
    Tests:
     - Added two more special partitions in test_unescaped_string_partition.
     - Ran test_unescaped_string_partition in S3.
    
    Change-Id: I63d149c9bdec52c2e1c0b25c8c3f0448cf7bdadb
    Reviewed-on: http://gerrit.cloudera.org:8080/15475
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/metadata/test_recover_partitions.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/metadata/test_recover_partitions.py b/tests/metadata/test_recover_partitions.py
index dcd393e..5d8d736 100644
--- a/tests/metadata/test_recover_partitions.py
+++ b/tests/metadata/test_recover_partitions.py
@@ -18,6 +18,7 @@
 # Impala tests for ALTER TABLE RECOVER PARTITIONS statement
 
 import os
+from six.moves import urllib
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfLocal, SkipIfS3, SkipIfCatalogV2
 from tests.common.test_dimensions import ALL_NODES_ONLY
@@ -363,16 +364,18 @@ class TestRecoverPartitions(ImpalaTestSuite):
 
     self.execute_query_expect_success(
         self.client, "CREATE TABLE %s (i int) PARTITIONED BY (p string)" % fq_tbl_name)
-    self.create_fs_partition(tbl_location, 'p=\"', "file_000", "1")
-    self.create_fs_partition(tbl_location, 'p=\'', "file_000", "2")
-    self.create_fs_partition(tbl_location, 'p=\\\"', "file_000", "3")
-    self.create_fs_partition(tbl_location, 'p=\\\'', "file_000", "4")
+    parts = ["\'", "\"", "\\\'", "\\\"", "\\\\\'", "\\\\\""]
+    for i in range(len(parts)):
+      # When creating partition directories, Hive replaces special characters in
+      # partition value string using the %xx escape. e.g. p=' will become p=%27.
+      hex_part = urllib.parse.quote(parts[i])
+      self.create_fs_partition(tbl_location, "p=%s" % hex_part, "file_%d" % i, str(i))
+
     self.execute_query_expect_success(
         self.client, "ALTER TABLE %s RECOVER PARTITIONS" % fq_tbl_name)
     result = self.execute_query_expect_success(
         self.client, "SHOW PARTITIONS %s" % fq_tbl_name)
-    assert self.count_partition(result.data) == 4
-    self.verify_partitions(['\"', '\'', '\\\"', '\\\''], result.data)
+    self.verify_partitions(parts, result.data)
 
   @SkipIfLocal.hdfs_client
   @SkipIfS3.empty_directory