You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/15 14:02:28 UTC

[impala] branch master updated (d5d3ace -> fafb2c9)

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from d5d3ace  IMPALA-8856: Deflake TestKuduHMSIntegration
     new 0ff4f45  IMPALA-8847: Ignore add partition events with empty partition list
     new 1f90471  IMPALA-7770: SPLIT_PART to support negative indexes
     new 3e9cac0  IMPALA-8854: fix acid insert tests
     new fafb2c9  IMPALA-8864: Handle py ssl library incompatibility in http mode

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exprs/expr-test.cc                          |  9 ++-
 be/src/exprs/string-functions-ir.cc                | 45 +++++++----
 .../impala/catalog/events/MetastoreEvents.java     | 32 +++++---
 shell/impala_client.py                             |  6 ++
 .../functional/functional_schema_template.sql      | 23 ------
 .../queries/QueryTest/acid-insert.test             | 63 +++++++--------
 tests/common/test_dimensions.py                    |  6 ++
 tests/custom_cluster/test_event_processing.py      | 49 ++++++++++++
 tests/query_test/test_insert.py                    | 29 +++----
 tests/util/event_processor_utils.py                | 91 ++++++++++++++++++++++
 10 files changed, 255 insertions(+), 98 deletions(-)
 create mode 100644 tests/util/event_processor_utils.py


[impala] 03/04: IMPALA-8854: fix acid insert tests

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3e9cac0cac0063dda5d73ce00ccc8bd2332f50a1
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Wed Aug 14 18:26:13 2019 +0200

    IMPALA-8854: fix acid insert tests
    
    test_acid_nonacid_insert has been failing lately. HMS became more
    strict about checking the capabilities of its clients. Seems like
    the Python client doesn't set any capabilities for itself therefore
    HMS rejects its attempts of creating and dropping tables.
    
    Now instead of using the RESET utility from the e2e test framework
    (to drop and re-create tables), the test is using a unique database
    and creates the tables through Impala. Different file formats are
    exercised with the help of the DEFAULT_FILE_FORMAT query option.
    
    Change-Id: I3a82338a7820d0ee748c961c8656fa3319c3929c
    Reviewed-on: http://gerrit.cloudera.org:8080/14064
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../functional/functional_schema_template.sql      | 23 --------
 .../queries/QueryTest/acid-insert.test             | 63 +++++++++++-----------
 tests/query_test/test_insert.py                    | 29 ++++------
 3 files changed, 41 insertions(+), 74 deletions(-)

diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index ab59a5c..d460eaa 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -2649,26 +2649,3 @@ date_part DATE
 id_col INT
 date_col DATE
 ====
----- DATASET
-functional
----- BASE_TABLE_NAME
-insertonly_nopart_insert
----- HIVE_MAJOR_VERSION
-3
----- CREATE
-CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (i int)
-STORED AS {file_format}
-TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only');
-====
----- DATASET
-functional
----- BASE_TABLE_NAME
-insertonly_part_insert
----- HIVE_MAJOR_VERSION
-3
----- CREATE
-CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (i int)
-PARTITIONED BY (p int)
-STORED AS {file_format}
-TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only');
-====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/acid-insert.test b/testdata/workloads/functional-query/queries/QueryTest/acid-insert.test
index 3e0baae..094b463 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/acid-insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/acid-insert.test
@@ -1,11 +1,11 @@
 ====
----- SETUP
-RESET insertonly_nopart_insert
 ---- QUERY
-insert into insertonly_nopart_insert values (1), (2);
+create table insertonly_nopart (i int)
+tblproperties('transactional'='true', 'transactional_properties'='insert_only');
+insert into insertonly_nopart values (1), (2);
 ====
 ---- QUERY
-select i from insertonly_nopart_insert order by i;
+select i from insertonly_nopart order by i;
 ---- RESULTS
 1
 2
@@ -13,10 +13,10 @@ select i from insertonly_nopart_insert order by i;
 INT
 ====
 ---- QUERY
-insert into insertonly_nopart_insert values (3);
+insert into insertonly_nopart values (3);
 ====
 ---- QUERY
-select i from insertonly_nopart_insert order by i;
+select i from insertonly_nopart order by i;
 ---- RESULTS
 1
 2
@@ -25,43 +25,44 @@ select i from insertonly_nopart_insert order by i;
 INT
 ====
 ---- QUERY
-insert overwrite insertonly_nopart_insert values (10);
+insert overwrite insertonly_nopart values (10);
 ====
 ---- QUERY
-select i from insertonly_nopart_insert order by i;
+select i from insertonly_nopart order by i;
 ---- RESULTS
 10
 ---- TYPES
 INT
 ====
 ---- QUERY
-insert overwrite insertonly_nopart_insert select 100;
+insert overwrite insertonly_nopart select 100;
 ====
 ---- QUERY
-select i from insertonly_nopart_insert order by i;
+select i from insertonly_nopart order by i;
 ---- RESULTS
 100
 ---- TYPES
 INT
 ====
 ---- QUERY
-insert overwrite insertonly_nopart_insert
-select * from insertonly_nopart_insert limit 0;
+insert overwrite insertonly_nopart
+select * from insertonly_nopart limit 0;
 ====
 ---- QUERY
-select i from insertonly_nopart_insert order by i;
+select i from insertonly_nopart order by i;
 ---- RESULTS
 ---- TYPES
 INT
 ====
----- SETUP
-RESET insertonly_part_insert
 ---- QUERY
-insert into insertonly_part_insert partition (p=1) values (10), (11);
-insert into insertonly_part_insert partition (p=2) values (20);
+create table if not exists insertonly_part (i int)
+partitioned by (p int)
+tblproperties('transactional'='true', 'transactional_properties'='insert_only');
+insert into insertonly_part partition (p=1) values (10), (11);
+insert into insertonly_part partition (p=2) values (20);
 ====
 ---- QUERY
-select p, i from insertonly_part_insert order by i;
+select p, i from insertonly_part order by i;
 ---- RESULTS
 1,10
 1,11
@@ -70,11 +71,11 @@ select p, i from insertonly_part_insert order by i;
 INT,INT
 ====
 ---- QUERY
-insert into insertonly_part_insert partition (p=2) values (21);
-insert into insertonly_part_insert partition (p=3) values (30);
+insert into insertonly_part partition (p=2) values (21);
+insert into insertonly_part partition (p=3) values (30);
 ====
 ---- QUERY
-select p, i from insertonly_part_insert order by i;
+select p, i from insertonly_part order by i;
 ---- RESULTS
 1,10
 1,11
@@ -85,11 +86,11 @@ select p, i from insertonly_part_insert order by i;
 INT,INT
 ====
 ---- QUERY
-insert overwrite insertonly_part_insert partition (p=2) values (22);
-insert overwrite insertonly_part_insert partition (p=3) values (31);
+insert overwrite insertonly_part partition (p=2) values (22);
+insert overwrite insertonly_part partition (p=3) values (31);
 ====
 ---- QUERY
-select p, i from insertonly_part_insert order by i;
+select p, i from insertonly_part order by i;
 ---- RESULTS
 1,10
 1,11
@@ -99,24 +100,24 @@ select p, i from insertonly_part_insert order by i;
 INT,INT
 ====
 ---- QUERY
-insert overwrite insertonly_part_insert partition (p=1)
-select * from insertonly_nopart_insert limit 0;
-insert overwrite insertonly_part_insert partition (p=2)
-select * from insertonly_nopart_insert limit 0;
+insert overwrite insertonly_part partition (p=1)
+select * from insertonly_nopart limit 0;
+insert overwrite insertonly_part partition (p=2)
+select * from insertonly_nopart limit 0;
 ====
 ---- QUERY
-select p, i from insertonly_part_insert order by i;
+select p, i from insertonly_part order by i;
 ---- RESULTS
 3,31
 ---- TYPES
 INT,INT
 ====
 ---- QUERY
-insert overwrite insertonly_part_insert partition (p)
+insert overwrite insertonly_part partition (p)
 values (1000, 1), (2000, 2), (4000, 4), (5000, 5), (5001, 5);
 ====
 ---- QUERY
-select p, i from insertonly_part_insert order by p, i;
+select p, i from insertonly_part order by p, i;
 ---- RESULTS
 1,1000
 2,2000
diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py
index a630b3e..17ee7d8 100644
--- a/tests/query_test/test_insert.py
+++ b/tests/query_test/test_insert.py
@@ -136,27 +136,16 @@ class TestInsertQueries(ImpalaTestSuite):
     self.run_test_case('QueryTest/insert', vector,
         multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
 
-  @pytest.mark.execute_serially
   @SkipIfHive2.acid
-  def test_acid_insert(self, vector):
-    pytest.skip("IMPALA-8854: skipping to unbreak builds")
-    if (vector.get_value('table_format').file_format == 'parquet'):
-      vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
-          vector.get_value('compression_codec')
-    # We need to turn off capability checks. Otherwise we get an error from HMS because
-    # this python client doesn't have the capability for handling ACID tables. But we only
-    # need to drop and create such tables, and table properties are preserved during
-    # those operations and this is enough for the tests (A table is ACID if it has the
-    # relevant table properties).
-    CAPABILITY_CHECK_CONF = "hive.metastore.client.capability.check"
-    capability_check = self.hive_client.getMetaConf(CAPABILITY_CHECK_CONF)
-    try:
-      self.hive_client.setMetaConf(CAPABILITY_CHECK_CONF, "false")
-      self.run_test_case('QueryTest/acid-insert', vector,
-          multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
-    finally:
-      # Reset original state.
-      self.hive_client.setMetaConf(CAPABILITY_CHECK_CONF, capability_check)
+  @UniqueDatabase.parametrize(sync_ddl=True)
+  def test_acid_insert(self, vector, unique_database):
+    exec_options = vector.get_value('exec_option')
+    file_format = vector.get_value('table_format').file_format
+    if (file_format == 'parquet'):
+      exec_options['COMPRESSION_CODEC'] = vector.get_value('compression_codec')
+    exec_options['DEFAULT_FILE_FORMAT'] = file_format
+    self.run_test_case('QueryTest/acid-insert', vector, unique_database,
+        multiple_impalad=exec_options['sync_ddl'] == 1)
 
   @SkipIfHive2.acid
   @UniqueDatabase.parametrize(sync_ddl=True)


[impala] 01/04: IMPALA-8847: Ignore add partition events with empty partition list

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0ff4f450e3f76eb3ac8622588fe0824e367c2b03
Author: Vihang Karajgaonkar <vi...@cloudera.com>
AuthorDate: Fri Aug 9 14:00:20 2019 -0700

    IMPALA-8847: Ignore add partition events with empty partition list
    
    Certain Hive queries like "alter table <table> add if not exists
    partition (<part_spec>)" generate a add_partition event even if the
    partition did not really exists. Such events have a empty partition list
    in the event message which trips on the Precondition check in the
    AddPartitionEvent. This causes event processor to go into error state.
    The only way to recover is to issue invalidate metadata in such a case.
    
    The patch adds logic to ignore such events.
    
    Testing:
    1. Added a test case which reproduces the issue. The test case works
    after the patch is applied.
    
    Change-Id: I877ce6233934e7090cd18e497f748bc6479838cb
    Reviewed-on: http://gerrit.cloudera.org:8080/14049
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
 .../impala/catalog/events/MetastoreEvents.java     | 32 +++++---
 tests/custom_cluster/test_event_processing.py      | 49 ++++++++++++
 tests/util/event_processor_utils.py                | 91 ++++++++++++++++++++++
 3 files changed, 162 insertions(+), 10 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 315a38c..51d9ac2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -1331,7 +1331,7 @@ public class MetastoreEvents {
   }
 
   public static class AddPartitionEvent extends TableInvalidatingEvent {
-    private final Partition lastAddedPartition_;
+    private Partition lastAddedPartition_;
     private final List<Partition> addedPartitions_;
 
     /**
@@ -1350,12 +1350,16 @@ public class MetastoreEvents {
                 .getAddPartitionMessage(event.getMessage());
         addedPartitions_ =
             Lists.newArrayList(addPartitionMessage_.getPartitionObjs());
-        Preconditions.checkState(addedPartitions_.size() > 0);
-        // when multiple partitions are added in HMS they are all added as one transaction
-        // Hence all the partitions which are present in the message must have the same
-        // serviceId and version if it is set. hence it is fine to just look at the
-        // last added partition in the list and use it for the self-event ids
-        lastAddedPartition_ = addedPartitions_.get(addedPartitions_.size() - 1);
+        // it is possible that the added partitions is empty in certain cases. See
+        // IMPALA-8847 for example
+        if (!addedPartitions_.isEmpty()) {
+          // when multiple partitions are added in HMS they are all added as one
+          // transaction Hence all the partitions which are present in the message must
+          // have the same serviceId and version if it is set. hence it is fine to just
+          // look at the last added partition in the list and use it for the self-event
+          // ids
+          lastAddedPartition_ = addedPartitions_.get(addedPartitions_.size() - 1);
+        }
         msTbl_ = addPartitionMessage_.getTableObj();
       } catch (Exception ex) {
         throw new MetastoreNotificationException(ex);
@@ -1364,12 +1368,15 @@ public class MetastoreEvents {
 
     @Override
     public void process() throws MetastoreNotificationException, CatalogException {
+      // bail out early if there are not partitions to process
+      if (addedPartitions_.isEmpty()) {
+        infoLog("Partition list is empty. Ignoring this event.");
+        return;
+      }
       if (isSelfEvent()) {
         infoLog("Not processing the event as it is a self-event");
         return;
       }
-      // Notification is created for newly created partitions only. We need not worry
-      // about "IF NOT EXISTS".
       try {
         // Reload the whole table if it's a transactional table.
         if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
@@ -1541,7 +1548,6 @@ public class MetastoreEvents {
         msTbl_ = Preconditions.checkNotNull(dropPartitionMessage.getTableObj());
         droppedPartitions_ = dropPartitionMessage.getPartitions();
         Preconditions.checkNotNull(droppedPartitions_);
-        Preconditions.checkState(droppedPartitions_.size() > 0);
       } catch (Exception ex) {
         throw new MetastoreNotificationException(
             debugString("Could not parse event message. "
@@ -1553,6 +1559,12 @@ public class MetastoreEvents {
 
     @Override
     public void process() throws MetastoreNotificationException {
+      // we have seen cases where a add_partition event is generated with empty
+      // partition list (see IMPALA-8547 for details. Make sure that droppedPartitions
+      // list is not empty
+      if (droppedPartitions_.isEmpty()) {
+        infoLog("Partition list is empty. Ignoring this event.");
+      }
       // We do not need self event as dropPartition() call is a no-op if the directory
       // doesn't exist.
       try {
diff --git a/tests/custom_cluster/test_event_processing.py b/tests/custom_cluster/test_event_processing.py
index 2aa22d4..f099978 100644
--- a/tests/custom_cluster/test_event_processing.py
+++ b/tests/custom_cluster/test_event_processing.py
@@ -26,6 +26,7 @@ from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, \
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.skip import SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon, SkipIfLocal
 from tests.util.hive_utils import HiveDbWrapper
+from tests.util.event_processor_utils import EventProcessorUtils
 
 
 @SkipIfS3.hive
@@ -129,6 +130,54 @@ class TestEventProcessing(CustomClusterTestSuite):
          " year=2019 and id=101" % (db_name, TBL_INSERT_PART))
      assert data.split('\t') == ['101', 'z', '28', '3', '2019']
 
+  @CustomClusterTestSuite.with_args(
+    catalogd_args="--hms_event_polling_interval_s=1"
+  )
+  @SkipIfHive2.acid
+  def test_empty_partition_events_transactional(self, unique_database):
+    self._run_test_empty_partition_events(unique_database, True)
+
+  @CustomClusterTestSuite.with_args(
+    catalogd_args="--hms_event_polling_interval_s=1"
+  )
+  def test_empty_partition_events(self, unique_database):
+    self._run_test_empty_partition_events(unique_database, False)
+
+  def _run_test_empty_partition_events(self, unique_database, is_transactional):
+    TBLPROPERTIES = ""
+    if is_transactional:
+       TBLPROPERTIES = "TBLPROPERTIES ('transactional'='true'," \
+           "'transactional_properties'='insert_only')"
+    test_tbl = unique_database + ".test_events"
+    self.run_stmt_in_hive("create table {0} (key string, value string) \
+      partitioned by (year int) {1} stored as parquet".format(test_tbl, TBLPROPERTIES))
+    EventProcessorUtils.wait_for_event_processing(self.hive_client)
+    self.client.execute("describe {0}".format(test_tbl))
+
+    self.run_stmt_in_hive(
+      "alter table {0} add partition (year=2019)".format(test_tbl))
+    EventProcessorUtils.wait_for_event_processing(self.hive_client)
+    assert [('2019',)] == self.get_impala_partition_info(test_tbl, 'year')
+
+    self.run_stmt_in_hive(
+      "alter table {0} add if not exists partition (year=2019)".format(test_tbl))
+    EventProcessorUtils.wait_for_event_processing(self.hive_client)
+    assert [('2019',)] == self.get_impala_partition_info(test_tbl, 'year')
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+    self.run_stmt_in_hive(
+      "alter table {0} drop partition (year=2019)".format(test_tbl))
+    EventProcessorUtils.wait_for_event_processing(self.hive_client)
+    assert ('2019') not in self.get_impala_partition_info(test_tbl, 'year')
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+    self.run_stmt_in_hive(
+      "alter table {0} drop if exists partition (year=2019)".format(test_tbl))
+    EventProcessorUtils.wait_for_event_processing(self.hive_client)
+    assert ('2019') not in self.get_impala_partition_info(test_tbl, 'year')
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+
   def wait_for_insert_event_processing(self, previous_event_id):
     """Waits until the event processor has finished processing insert events. Since two
     events are created for every insert done through hive, we wait until the event id is
diff --git a/tests/util/event_processor_utils.py b/tests/util/event_processor_utils.py
new file mode 100644
index 0000000..78123e3
--- /dev/null
+++ b/tests/util/event_processor_utils.py
@@ -0,0 +1,91 @@
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Impala tests for Hive Metastore, covering the expected propagation
+# of metadata from Hive to Impala or Impala to Hive. Each test
+# modifies the metadata via Hive and checks that the modification
+# succeeded by querying Impala, or vice versa.
+
+import requests
+import time
+import json
+from tests.common.environ import build_flavor_timeout
+
+
+class EventProcessorUtils(object):
+
+  DEFAULT_CATALOG_URL = "http://localhost:25020"
+
+  @staticmethod
+  def wait_for_event_processing(hive_client, timeout=10):
+      """Waits till the event processor has synced to the latest event id from metastore
+         or the timeout value in seconds whichever is earlier"""
+      success = False
+      assert timeout > 0
+      assert hive_client is not None
+      current_event_id = EventProcessorUtils.get_current_notification_id(hive_client)
+      end_time = time.time() + timeout
+      while time.time() < end_time:
+        new_event_id = EventProcessorUtils.get_last_synced_event_id()
+        if new_event_id >= current_event_id:
+          success = True
+          break
+        time.sleep(0.1)
+      if not success:
+        raise Exception(
+          "Event processor did not sync till last known event id{0} \
+          within {1} seconds".format(current_event_id, timeout))
+      # Wait for catalog update to be propagated.
+      time.sleep(build_flavor_timeout(6, slow_build_timeout=10))
+      return success
+
+  @staticmethod
+  def get_event_processor_metrics():
+     """Scrapes the catalog's /events webpage and return a dictionary with the event
+     processor metrics."""
+     response = requests.get("%s/events?json" % EventProcessorUtils.DEFAULT_CATALOG_URL)
+     assert response.status_code == requests.codes.ok
+     varz_json = json.loads(response.text)
+     metrics = varz_json["event_processor_metrics"].strip().splitlines()
+
+     # Helper to strip a pair of elements
+     def strip_pair(p):
+       return (p[0].strip(), p[1].strip())
+
+     pairs = [strip_pair(kv.split(':')) for kv in metrics if kv]
+     return dict(pairs)
+
+  @staticmethod
+  def get_last_synced_event_id():
+    """Returns the last_synced_event_id."""
+    metrics = EventProcessorUtils.get_event_processor_metrics()
+    assert 'last-synced-event-id' in metrics.keys()
+    return int(metrics['last-synced-event-id'])
+
+  @staticmethod
+  def get_event_processor_status():
+    """
+    Returns the current status of the EventsProcessor
+    """
+    metrics = EventProcessorUtils.get_event_processor_metrics()
+    assert 'status' in metrics.keys()
+    return metrics['status']
+
+  @staticmethod
+  def get_current_notification_id(hive_client):
+    """Returns the current notification from metastore"""
+    assert hive_client is not None
+    return hive_client.get_current_notificationEventId()


[impala] 02/04: IMPALA-7770: SPLIT_PART to support negative indexes

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1f904719e4c7e398a7a8e152d82c8da1590b6d25
Author: norbertluksa <no...@cloudera.com>
AuthorDate: Thu Jul 18 18:20:28 2019 +0200

    IMPALA-7770: SPLIT_PART to support negative indexes
    
    Third parameter of SPLIT_PART (nth field) accepts now
    negative values, and searches the string backwards.
    
    Testing:
     * Added unit tests to expr-test.cc
    
    Change-Id: I2db762989a90bd95661a59eb9c11a29eb2edfafb
    Reviewed-on: http://gerrit.cloudera.org:8080/13880
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/expr-test.cc           |  9 +++++++-
 be/src/exprs/string-functions-ir.cc | 45 ++++++++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index be80c2b..a406d29 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -4152,18 +4152,25 @@ TEST_P(ExprTest, StringFunctions) {
   TestIsNull("chr(NULL)", TYPE_STRING);
 
   TestStringValue("split_part('abc~!def~!ghi', '~!', 1)", "abc");
+  TestStringValue("split_part('abc~!def~!ghi', '~!', -1)", "ghi");
   TestStringValue("split_part('abc~!~def~!~ghi', '~!', 2)", "~def");
+  TestStringValue("split_part('abc~!~def~!~ghi', '~!', -2)", "~def");
   TestStringValue("split_part('abc@@def@@ghi', '@@', 3)", "ghi");
+  TestStringValue("split_part('abc@@def@@ghi', '@@', -3)", "abc");
   TestStringValue("split_part('abc@@def@@@@ghi', '@@', 4)", "ghi");
+  TestStringValue("split_part('abc@@def@@@@ghi', '@@', -4)", "abc");
   TestStringValue("split_part('abc@@def@@ghi', '@@', 4)", "");
+  TestStringValue("split_part('abc@@def@@ghi', '@@', -4)", "");
   TestStringValue("split_part('', '@@', 1)", "");
+  TestStringValue("split_part('', '@@', -1)", "");
   TestStringValue("split_part('abcdef', '', 1)", "abcdef");
+  TestStringValue("split_part('abcdef', '', -1)", "abcdef");
   TestStringValue("split_part('', '', 1)", "");
+  TestStringValue("split_part('', '', -1)", "");
   TestIsNull("split_part(NULL, NULL, 1)", TYPE_STRING);
   TestIsNull("split_part('abcdefabc', NULL, 1)", TYPE_STRING);
   TestIsNull("split_part(NULL, 'xyz', 1)", TYPE_STRING);
   TestError("split_part('abc@@def@@ghi', '@@', 0)");
-  TestError("split_part('abc@@def@@ghi', '@@', -1)");
 
   TestStringValue("lower('')", "");
   TestStringValue("lower('HELLO')", "hello");
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index 5606fcb..11336e3 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -1018,22 +1018,28 @@ StringVal StringFunctions::Chr(FunctionContext* ctx, const IntVal& val) {
 }
 
 // Similar to strstr() except that the strings are not null-terminated
-static char* LocateSubstring(char* haystack, int hay_len, const char* needle, int needle_len) {
+// Parameter 'direction' controls the direction of searching, can be either 1 or -1
+static char* LocateSubstring(char* haystack, const int hay_len, const char* needle,
+    const int needle_len, const int direction = 1) {
   DCHECK_GT(needle_len, 0);
   DCHECK(needle != NULL);
   DCHECK(hay_len == 0 || haystack != NULL);
+  DCHECK(direction == 1 || direction == -1);
+  if (hay_len < needle_len) return nullptr;
+  char* start = haystack;
+  if (direction == -1) start += hay_len - needle_len;
   for (int i = 0; i < hay_len - needle_len + 1; ++i) {
-    char* possible_needle = haystack + i;
+    char* possible_needle = start + direction * i;
     if (strncmp(possible_needle, needle, needle_len) == 0) return possible_needle;
   }
-  return NULL;
+  return nullptr;
 }
 
 StringVal StringFunctions::SplitPart(FunctionContext* context,
     const StringVal& str, const StringVal& delim, const BigIntVal& field) {
   if (str.is_null || delim.is_null || field.is_null) return StringVal::null();
   int field_pos = field.val;
-  if (field_pos <= 0) {
+  if (field_pos == 0) {
     stringstream ss;
     ss << "Invalid field position: " << field.val;
     context->SetError(ss.str().c_str());
@@ -1041,23 +1047,36 @@ StringVal StringFunctions::SplitPart(FunctionContext* context,
   }
   if (delim.len == 0) return str;
   char* str_start = reinterpret_cast<char*>(str.ptr);
-  char* str_part = str_start;
   char* delimiter = reinterpret_cast<char*>(delim.ptr);
-  for (int cur_pos = 1; ; ++cur_pos) {
-    int remaining_len = str.len - (str_part - str_start);
-    char* delim_ref = LocateSubstring(str_part, remaining_len, delimiter, delim.len);
-    if (delim_ref == NULL) {
+  const int DIRECTION = field_pos > 0 ? 1 : -1;
+  char* window_start = str_start;
+  char* window_end = str_start + str.len;
+  for (int cur_pos = DIRECTION; ; cur_pos += DIRECTION) {
+    int remaining_len = window_end - window_start;
+    char* delim_ref = LocateSubstring(window_start, remaining_len, delimiter, delim.len,
+        DIRECTION);
+    if (delim_ref == nullptr) {
       if (cur_pos == field_pos) {
-        return StringVal(reinterpret_cast<uint8_t*>(str_part), remaining_len);
+        return StringVal(reinterpret_cast<uint8_t*>(window_start), remaining_len);
       }
       // Return empty string if required field position is not found.
       return StringVal();
     }
     if (cur_pos == field_pos) {
-      return StringVal(reinterpret_cast<uint8_t*>(str_part),
-          delim_ref - str_part);
+      if (DIRECTION < 0) {
+        window_start = delim_ref + delim.len;
+      }
+      else {
+        window_end = delim_ref;
+      }
+      return StringVal(reinterpret_cast<uint8_t*>(window_start),
+          window_end - window_start);
+    }
+    if (DIRECTION < 0) {
+      window_end = delim_ref;
+    } else {
+      window_start = delim_ref + delim.len;
     }
-    str_part = delim_ref + delim.len;
   }
   return StringVal();
 }


[impala] 04/04: IMPALA-8864: Handle py ssl library incompatibility in http mode

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fafb2c9786467ebe582b92b27e2990f5789c9c00
Author: Bharath Vissapragada <bh...@cloudera.com>
AuthorDate: Wed Aug 14 22:11:17 2019 -0700

    IMPALA-8864: Handle py ssl library incompatibility in http mode
    
    Older python versions shipped ssl libraries that did not implement
    SSLContext class. THttpClient relies on it. This patch,
    
    - Fails the shell gracefully when such a python version is used.
    - Skips the http test dimension when running the test suite on a
    machine that ships such a python verison (centos 6).
    
    Change-Id: I28846bde0b8bb8f787e6330cddf91645dba4160e
    Reviewed-on: http://gerrit.cloudera.org:8080/14069
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 shell/impala_client.py          | 6 ++++++
 tests/common/test_dimensions.py | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/shell/impala_client.py b/shell/impala_client.py
index c6915d3..1ba632e 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -360,6 +360,12 @@ class ImpalaClient(object):
 
   def _get_http_transport(self, connect_timeout_ms):
     """Creates a transport with HTTP as the base."""
+    # Older python versions do not support SSLContext needed by THttpClient. More
+    # context in IMPALA-8864. CentOs 6 ships such an incompatible python version
+    # out of the box.
+    if not hasattr(ssl, "create_default_context"):
+      print_to_stderr("Python version too old. SSLContext not supported.")
+      raise NotImplementedError()
     # Current implementation of THttpClient does a close() and open() of the underlying
     # http connection on every flush() (THRIFT-4600). Due to this, setting a connect
     # timeout does not achieve the desirable result as the subsequent open() could block
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 5f26c81..b08fa83 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -122,6 +122,12 @@ def create_beeswax_hs2_dimension():
 # 'hs2-http' dimension is only covered for shell based tests, since they
 # do not rely on Impyla for connections.
 def create_beeswax_hs2_hs2http_dimension():
+  # Older python versions do not support SSLContext object that the thrift
+  # http client implementation depends on. Falls back to a dimension without
+  # http transport. More context in IMPALA-8864.
+  import ssl
+  if not hasattr(ssl, "create_default_context"):
+    return create_beeswax_hs2_dimension()
   return ImpalaTestDimension('protocol', 'beeswax', 'hs2', 'hs2-http')