You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2020/03/13 17:21:04 UTC

[impala] branch master updated (6fdc644 -> 3fd6f60)

This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 6fdc644  IMPALA-8800: Added support of Kudu DATE type to Impala
     new a20edd5  IMPALA-9490 [DOCS] State support for reading Apache Hudi optimized table
     new 8337131  IMPALA-9369: Make createInsertEvents() async.
     new c3d65ca  IMPALA-9414 (part 1): Copy THttpClient from Thrift into Impala
     new 3fd6f60  IMPALA-9414 (part 2): Support the 'Expect: 100-continue' http header

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/transport/THttpServer.cpp                   |   4 +
 be/src/transport/THttpTransport.cpp                |  14 ++
 be/src/transport/THttpTransport.h                  |   5 +
 docs/impala.ditamap                                |   1 +
 docs/topics/impala_file_formats.xml                |  10 +
 docs/topics/impala_hudi.xml                        |  81 ++++++++
 .../apache/impala/service/CatalogOpExecutor.java   |  47 +++--
 .../java/org/apache/impala/util/MetaStoreUtil.java |  64 +++++--
 .../events/MetastoreEventsProcessorTest.java       |   6 +-
 shell/ImpalaHttpClient.py                          | 211 +++++++++++++++++++++
 shell/impala_client.py                             |  64 ++-----
 shell/impala_shell.py                              |   7 +-
 shell/make_shell_tarball.sh                        |   2 +
 shell/packaging/make_python_package.sh             |   2 +
 .../common/errors.py => shell/shell_exceptions.py  |  39 +++-
 tests/shell/test_shell_commandline.py              |  11 +-
 16 files changed, 468 insertions(+), 100 deletions(-)
 create mode 100644 docs/topics/impala_hudi.xml
 create mode 100644 shell/ImpalaHttpClient.py
 copy tests/common/errors.py => shell/shell_exceptions.py (54%)


[impala] 02/04: IMPALA-9369: Make createInsertEvents() async.

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8337131c1ee29210c6a0b13a5ef57afef4fdb4f4
Author: Anurag Mantripragada <an...@cloudera.com>
AuthorDate: Sun Mar 1 17:39:02 2020 -0800

    IMPALA-9369: Make createInsertEvents() async.
    
    This patch makes the createInsertEvents() method async to avoid
    blocking the insert code path for long periods for tables with
    large number of partitions and files.
    
    Currently the createInsertEvents() method fires the HMS insert
    event one partition at a time. This makes insert statements
    with thousands of new files significantly slower. This change
    makes the createInsertEvent() call asynchronous by making it
    run in a separate thread.
    
    Testing:
    - Ran MetastoreEventsProcessorTest#testInsertEvents.
    - Ran test_events_processing::test_insert_events.
    
    Change-Id: I97802a5c03abc067fccf9e3a9d0047324626706e
    Reviewed-on: http://gerrit.cloudera.org:8080/15263
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/service/CatalogOpExecutor.java   | 47 ++++++++++++----
 .../java/org/apache/impala/util/MetaStoreUtil.java | 64 +++++++++++++++-------
 .../events/MetastoreEventsProcessorTest.java       |  6 +-
 3 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index f9c589c..92d8820 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -34,6 +34,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
@@ -189,6 +192,7 @@ import org.apache.impala.util.FunctionUtils;
 import org.apache.impala.util.HdfsCachingUtil;
 import org.apache.impala.util.KuduUtil;
 import org.apache.impala.util.MetaStoreUtil;
+import org.apache.impala.util.MetaStoreUtil.InsertEventInfo;
 import org.slf4j.Logger;
 import org.apache.thrift.TException;
 
@@ -4396,9 +4400,9 @@ public class CatalogOpExecutor {
   }
 
   /**
-   * Populates insert event data and calls fireInsertEvent() if external event processing
-   * is enabled. This is no-op if event processing is disabled or there are no existing
-   * partitions affected by this insert.
+   * Populates insert event data and calls fireInsertEventAysnc() if external event
+   * processing is enabled. This is no-op if event processing is disabled or there are
+   * no existing partitions affected by this insert.
    *
    * @param affectedExistingPartitions List of existing partitions touched by the insert.
    * @param isInsertOverwrite indicates if the operation was an insert overwrite. If it
@@ -4409,6 +4413,9 @@ public class CatalogOpExecutor {
     if (!catalog_.isEventProcessingActive() ||
         affectedExistingPartitions.size() == 0) return;
 
+    // List of all insert events that we call HMS fireInsertEvent() on.
+    List<InsertEventInfo> insertEventInfos = new ArrayList<>();
+
     // Map of partition names to file names of all existing partitions touched by the
     // insert.
     Map<String, Set<String>> partitionFilesMapBeforeInsert = new HashMap<>();
@@ -4454,20 +4461,38 @@ public class CatalogOpExecutor {
             filesPostInsert.size(), table.getTableName(), part.getPartitionName());
       }
       if (deltaFiles != null || isInsertOverwrite) {
+        // Collect all the insert events.
+        insertEventInfos.add(new InsertEventInfo(table.getDb().getName(),
+            table.getName(), partVals, deltaFiles, isInsertOverwrite));
+      } else {
+        LOG.info("No new files were created, and is not a replace. Skipping "
+            + "generating INSERT event.");
+      }
+    }
+
+    // Firing insert events by making calls to HMS APIs can be slow for tables with
+    // large number of partitions. Hence, we fire the insert events asynchronously.
+    fireInsertEventsAsync(insertEventInfos);
+  }
+
+  /**
+   * Helper method to fire insert events asynchronously. This creates a single thread
+   * to execute the fireInsertEvent method and shuts down the thread after it has
+   * finished. In case of any exception, we just log the failure of firing insert events.
+   */
+  private void fireInsertEventsAsync(List<InsertEventInfo> insertEventInfos) {
+    ExecutorService fireInsertEventThread = Executors.newSingleThreadExecutor();
+    CompletableFuture.runAsync(() -> {
+      for (InsertEventInfo info : insertEventInfos) {
         try (MetaStoreClient metaStoreClient = catalog_.getMetaStoreClient()) {
-          MetaStoreUtil
-              .fireInsertEvent(metaStoreClient.getHiveClient(), table.getDb().getName(),
-                  table.getName(), partVals, deltaFiles, isInsertOverwrite);
+          MetaStoreUtil.fireInsertEvent(metaStoreClient.getHiveClient(), info);
         } catch (Exception e) {
           LOG.error("Failed to fire insert event. Some tables might not be"
               + " refreshed on other impala clusters.", e);
         }
       }
-      else {
-        LOG.info("No new files were created, and is not a replace. Skipping "
-            + "generating INSERT event.");
-      }
-    }
+    }, Executors.newSingleThreadExecutor()).thenRun(() ->
+        fireInsertEventThread.shutdown());
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java b/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
index 8c8c527..7bc463f 100644
--- a/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/MetaStoreUtil.java
@@ -317,36 +317,60 @@ public class MetaStoreUtil {
   }
 
   /**
+   * A helper class that encapsulates all the information needed to fire and insert event
+   * with HMS.
+   */
+  public static class InsertEventInfo {
+    private String dbName;
+    private String tableName;
+
+    // List of partition values corresponding to the partition keys in
+    // a partitioned table. This is null for non-partitioned table.
+    private List<String> partVals;
+
+    // Set of all the 'new' files added by this insert. This is empty in
+    // case of insert overwrite.
+    private Collection<String> newFiles;
+
+    // If true, sets the 'replace' flag to true indicating that the
+    // operation was an insert overwrite in the notification log. Will set the same to
+    // false otherwise.
+    private boolean isOverwrite;
+
+    public InsertEventInfo(String dbName, String tableName, List<String> partVals,
+        Collection<String> newFiles, boolean isOverwrite) {
+      this.dbName = dbName;
+      this.tableName = tableName;
+      this.partVals = partVals;
+      this.newFiles = newFiles;
+      this.isOverwrite = isOverwrite;
+    }
+  }
+
+  /**
    *  Fires an insert event to HMS notification log. For partitioned table, each
    *  existing partition touched by the insert will fire a separate insert event.
    *
-   * @param msClient Metastore client
-   * @param newFiles Set of all the 'new' files added by this insert. This is empty in
-   * case of insert overwrite.
-   * @param partVals List of partition values corresponding to the partition keys in
-   * a partitioned table. This is null for non-partitioned table.
-   * @param isOverwrite If true, sets the 'replace' flag to true indicating that the
-   * operation was an insert overwrite in the notification log. Will set the same to
-   * false otherwise.
+   * @param msClient Metastore client,
+   * @param info A singe insert event encapsulating the information needed to fire insert
+   * event with HMS.
    */
   public static void fireInsertEvent(IMetaStoreClient msClient,
-      String dbName, String tblName, List<String> partVals,
-      Collection<String> newFiles, boolean isOverwrite) throws TException {
+      InsertEventInfo info) throws TException {
     Preconditions.checkNotNull(msClient);
-    Preconditions.checkNotNull(dbName);
-    Preconditions.checkNotNull(tblName);
-    Preconditions.checkNotNull(newFiles);
-    LOG.debug("Firing an insert event for {}", tblName);
+    Preconditions.checkNotNull(info.dbName);
+    Preconditions.checkNotNull(info.tableName);
+    Preconditions.checkNotNull(info.newFiles);
+    LOG.debug("Firing an insert event for {}", info.tableName);
     FireEventRequestData data = new FireEventRequestData();
     InsertEventRequestData insertData = new InsertEventRequestData();
     data.setInsertData(insertData);
     FireEventRequest rqst = new FireEventRequest(true, data);
-    rqst.setDbName(dbName);
-    rqst.setTableName(tblName);
-    insertData.setFilesAdded(new ArrayList<>(newFiles));
-    insertData.setReplace(isOverwrite);
-    if (partVals != null) rqst.setPartitionVals(partVals);
-
+    rqst.setDbName(info.dbName);
+    rqst.setTableName(info.tableName);
+    insertData.setFilesAdded(new ArrayList<>(info.newFiles));
+    insertData.setReplace(info.isOverwrite);
+    if (info.partVals != null) rqst.setPartitionVals(info.partVals);
     msClient.fireListenerEvent(rqst);
   }
 
diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
index 6b4dbb9..41995a8 100644
--- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
@@ -135,6 +135,7 @@ import org.apache.impala.thrift.TTypeNode;
 import org.apache.impala.thrift.TTypeNodeType;
 import org.apache.impala.thrift.TUniqueId;
 import org.apache.impala.util.MetaStoreUtil;
+import org.apache.impala.util.MetaStoreUtil.InsertEventInfo;
 import org.apache.thrift.TException;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -834,8 +835,9 @@ public class MetastoreEventsProcessorTest {
     List <String> newFiles = addFilesToDirectory(parentPath, "testFile.",
         totalNumberOfFilesToAdd, isOverwrite);
     try (MetaStoreClient metaStoreClient = catalog_.getMetaStoreClient()) {
-      MetaStoreUtil.fireInsertEvent(metaStoreClient.getHiveClient(), msTbl.getDbName(),
-          msTbl.getTableName(), null, newFiles, isOverwrite);
+      MetaStoreUtil.fireInsertEvent(metaStoreClient.getHiveClient(),
+          new InsertEventInfo(msTbl.getDbName(), msTbl.getTableName(), null,
+          newFiles, isOverwrite));
     }
   }
 


[impala] 04/04: IMPALA-9414 (part 2): Support the 'Expect: 100-continue' http header

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3fd6f60b2219d31d61bd838e94a7f8f9df3d15e2
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Fri Feb 21 14:37:35 2020 -0800

    IMPALA-9414 (part 2): Support the 'Expect: 100-continue' http header
    
    The 'Expect: 100-continue' http header allows http clients to send
    only the headers for their request, get a confirmation back from the
    server that the headers are valid, and only then send the body of the
    request, avoiding the overhead of sending large requests that will
    ultimately fail.
    
    This patch adds support for this in the HS2 HTTP server by having
    THttpServer look for the header, and if it's present and the request
    is validated returning a '100 Continue' response before reading the
    body of the request.
    
    It also adds supports for using this header on large requests sent by
    impala-shell.
    
    Testing:
    - This case is covered by the existing test_large_sql, however that
      test was previously broken and passing spuriously. This patch fixes
      the test.
    - Passed all other shell tests.
    
    Change-Id: I4153968551acd58b25c7923c2ebf75ee29a7e76b
    Reviewed-on: http://gerrit.cloudera.org:8080/15284
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
---
 be/src/transport/THttpServer.cpp              |  4 ++
 be/src/transport/THttpTransport.cpp           | 14 ++++++
 be/src/transport/THttpTransport.h             |  5 +++
 shell/{THttpClient.py => ImpalaHttpClient.py} | 42 ++++++++++++++----
 shell/impala_client.py                        | 64 +++++----------------------
 shell/impala_shell.py                         |  7 ++-
 shell/make_shell_tarball.sh                   |  3 +-
 shell/packaging/make_python_package.sh        |  3 +-
 shell/shell_exceptions.py                     | 52 ++++++++++++++++++++++
 tests/shell/test_shell_commandline.py         | 11 +++--
 10 files changed, 134 insertions(+), 71 deletions(-)

diff --git a/be/src/transport/THttpServer.cpp b/be/src/transport/THttpServer.cpp
index 120efe2..a3b6712 100644
--- a/be/src/transport/THttpServer.cpp
+++ b/be/src/transport/THttpServer.cpp
@@ -110,6 +110,10 @@ void THttpServer::parseHeader(char* header) {
     auth_value_ = string(value);
   } else if (use_cookies_ && THRIFT_strncasecmp(header, "Cookie", sz) == 0) {
     cookie_value_ = string(value);
+  } else if (THRIFT_strncasecmp(header, "Expect", sz) == 0) {
+    if (THRIFT_strcasestr(value, "100-continue")){
+      continue_ = true;
+    }
   }
 }
 
diff --git a/be/src/transport/THttpTransport.cpp b/be/src/transport/THttpTransport.cpp
index c3e1aa8..e8925d2 100644
--- a/be/src/transport/THttpTransport.cpp
+++ b/be/src/transport/THttpTransport.cpp
@@ -222,6 +222,7 @@ void THttpTransport::readHeaders() {
   chunked_ = false;
   chunkedDone_ = false;
   chunkSize_ = 0;
+  continue_ = false;
 
   // Control state flow
   bool statusLine = true;
@@ -249,7 +250,20 @@ void THttpTransport::readHeaders() {
     }
   }
 
+  // Perform any validation of the headers needed.
   headersDone();
+
+  // The headers have been validated, so if the client included the 'Expect: 100-continue"
+  // header, we respond that it can continue with sending the request. See Section 8.2.3
+  // of RFC2616 for more details: https://www.w3.org/Protocols/rfc2616/rfc2616-sec8.html
+  if (continue_) {
+    std::ostringstream h;
+    h << "HTTP/1.1 100 Continue" << CRLF << CRLF;
+    string header = h.str();
+    transport_->write(
+        (const uint8_t*)header.c_str(), static_cast<uint32_t>(header.size()));
+    transport_->flush();
+  }
 }
 
 void THttpTransport::write(const uint8_t* buf, uint32_t len) {
diff --git a/be/src/transport/THttpTransport.h b/be/src/transport/THttpTransport.h
index 03b2a71..891d14e 100644
--- a/be/src/transport/THttpTransport.h
+++ b/be/src/transport/THttpTransport.h
@@ -78,6 +78,11 @@ protected:
   uint32_t httpBufLen_;
   uint32_t httpBufSize_;
 
+  // Set to 'true' for a request if the "Expect: 100-continue" header was present.
+  // Indicates that we should return a "100 Continue" response if the headers are
+  // successfully validated before reading the contents of the request.
+  bool continue_ = false;
+
   void init();
 
   uint32_t readMoreData();
diff --git a/shell/THttpClient.py b/shell/ImpalaHttpClient.py
similarity index 76%
rename from shell/THttpClient.py
rename to shell/ImpalaHttpClient.py
index d131577..cd79971 100644
--- a/shell/THttpClient.py
+++ b/shell/ImpalaHttpClient.py
@@ -28,17 +28,29 @@ from six.moves import urllib
 from six.moves import http_client
 
 from thrift.transport.TTransport import TTransportBase
+from shell_exceptions import RPCException
 import six
 
 
-class THttpClient(TTransportBase):
+# This was taken from THttpClient.py in Thrift to allow making changes Impala needs.
+# The current changes that have been applied:
+# - Added logic for the 'Expect: 100-continue' header on large requests
+# - If an error code is received back in flush(), an exception is thrown.
+class ImpalaHttpClient(TTransportBase):
   """Http implementation of TTransport base."""
 
-  def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=None, key_file=None, ssl_context=None):
-    """THttpClient supports two different types of construction:
+  # When sending requests larger than this size, include the 'Expect: 100-continue' header
+  # to indicate to the server to validate the request before reading the contents. This
+  # value was chosen to match curl's behavior. See Section 8.2.3 of RFC2616.
+  MIN_REQUEST_SIZE_FOR_EXPECT = 1024
 
-    THttpClient(host, port, path) - deprecated
-    THttpClient(uri, [port=<n>, path=<s>, cafile=<filename>, cert_file=<filename>, key_file=<filename>, ssl_context=<context>])
+  def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=None,
+      key_file=None, ssl_context=None):
+    """ImpalaHttpClient supports two different types of construction:
+
+    ImpalaHttpClient(host, port, path) - deprecated
+    ImpalaHttpClient(uri, [port=<n>, path=<s>, cafile=<filename>, cert_file=<filename>,
+        key_file=<filename>, ssl_context=<context>])
 
     Only the second supports https.  To properly authenticate against the server,
     provide the client's identity by specifying cert_file and key_file.  To properly
@@ -47,7 +59,7 @@ class THttpClient(TTransportBase):
     """
     if port is not None:
       warnings.warn(
-          "Please use the THttpClient('http{s}://host:port/path') constructor",
+          "Please use the ImpalaHttpClient('http{s}://host:port/path') constructor",
           DeprecationWarning,
           stacklevel=2)
       self.host = uri_or_host
@@ -65,7 +77,8 @@ class THttpClient(TTransportBase):
         self.port = parsed.port or http_client.HTTPS_PORT
         self.certfile = cert_file
         self.keyfile = key_file
-        self.context = ssl.create_default_context(cafile=cafile) if (cafile and not ssl_context) else ssl_context
+        self.context = ssl.create_default_context(cafile=cafile) \
+            if (cafile and not ssl_context) else ssl_context
       self.host = parsed.hostname
       self.path = parsed.path
       if parsed.query:
@@ -160,12 +173,18 @@ class THttpClient(TTransportBase):
 
     # Write headers
     self.__http.putheader('Content-Type', 'application/x-thrift')
-    self.__http.putheader('Content-Length', str(len(data)))
+    data_len = len(data)
+    self.__http.putheader('Content-Length', str(data_len))
+    if data_len > ImpalaHttpClient.MIN_REQUEST_SIZE_FOR_EXPECT:
+      # Add the 'Expect' header to large requests. Note that we do not explicitly wait for
+      # the '100 continue' response before sending the data - HTTPConnection simply
+      # ignores these types of responses, but we'll get the right behavior anyways.
+      self.__http.putheader("Expect", "100-continue")
     if self.using_proxy() and self.scheme == "http" and self.proxy_auth is not None:
       self.__http.putheader("Proxy-Authorization", self.proxy_auth)
 
     if not self.__custom_headers or 'User-Agent' not in self.__custom_headers:
-      user_agent = 'Python/THttpClient'
+      user_agent = 'Python/ImpalaHttpClient'
       script = os.path.basename(sys.argv[0])
       if script:
         user_agent = '%s (%s)' % (user_agent, urllib.parse.quote(script))
@@ -185,3 +204,8 @@ class THttpClient(TTransportBase):
     self.code = self.__http_response.status
     self.message = self.__http_response.reason
     self.headers = self.__http_response.msg
+
+    if self.code >= 300:
+      # Report any http response code that is not 1XX (informational response) or
+      # 2XX (successful).
+      raise RPCException("HTTP code {}: {}".format(self.code, self.message))
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 145cee5..fbfca32 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -38,12 +38,14 @@ from TCLIService.TCLIService import (TExecuteStatementReq, TOpenSessionReq,
     TCloseSessionReq, TProtocolVersion, TStatusCode, TGetOperationStatusReq,
     TOperationState, TFetchResultsReq, TFetchOrientation, TGetLogReq,
     TGetResultSetMetadataReq, TTypeId, TCancelOperationReq)
-from THttpClient import THttpClient
+from ImpalaHttpClient import ImpalaHttpClient
 from thrift.protocol import TBinaryProtocol
 from thrift_sasl import TSaslClientTransport
 from thrift.transport.TSocket import TSocket
 from thrift.transport.TTransport import TBufferedTransport, TTransportException
 from thrift.Thrift import TApplicationException, TException
+from shell_exceptions import (RPCException, QueryStateException, DisconnectedException,
+    QueryCancelledByShellException, MissingThriftMethodException)
 
 
 # Helpers to extract and convert HS2's representation of values to the display version.
@@ -99,47 +101,6 @@ class QueryOptionLevels:
     """Return the integral value based on the string. Defaults to DEVELOPMENT."""
     return cls.NAME_TO_VALUES.get(string.upper(), cls.DEVELOPMENT)
 
-class RPCException(Exception):
-    def __init__(self, value=""):
-      self.value = value
-    def __str__(self):
-      return self.value
-
-class QueryStateException(Exception):
-    def __init__(self, value=""):
-      self.value = value
-    def __str__(self):
-      return self.value
-
-class DisconnectedException(Exception):
-  def __init__(self, value=""):
-      self.value = value
-  def __str__(self):
-      return self.value
-
-class QueryCancelledByShellException(Exception): pass
-
-
-class MissingThriftMethodException(Exception):
-  """Thrown if a Thrift method that the client tried to call is missing."""
-  def __init__(self, value=""):
-      self.value = value
-
-  def __str__(self):
-      return self.value
-
-
-class CodeCheckingHttpClient(THttpClient):
-  """Add HTTP response code handling to THttpClient."""
-  def flush(self):
-    THttpClient.flush(self)
-    # At this point the http call has completed.
-    if self.code >= 300:
-      # Report any http response code that is not 1XX (informational response) or
-      # 2XX (successful).
-      raise RPCException("HTTP code {}: {}".format(self.code, self.message))
-
-
 def print_to_stderr(message):
   print >> sys.stderr, message
 
@@ -371,17 +332,17 @@ class ImpalaClient(object):
 
   def _get_http_transport(self, connect_timeout_ms):
     """Creates a transport with HTTP as the base."""
-    # Older python versions do not support SSLContext needed by THttpClient. More
+    # Older python versions do not support SSLContext needed by ImpalaHttpClient. More
     # context in IMPALA-8864. CentOs 6 ships such an incompatible python version
     # out of the box.
     if not hasattr(ssl, "create_default_context"):
       print_to_stderr("Python version too old. SSLContext not supported.")
       raise NotImplementedError()
-    # Current implementation of THttpClient does a close() and open() of the underlying
-    # http connection on every flush() (THRIFT-4600). Due to this, setting a connect
-    # timeout does not achieve the desirable result as the subsequent open() could block
-    # similary in case of problematic remote end points.
-    # TODO: Investigate connection reuse in THttpClient and revisit this.
+    # Current implementation of ImpalaHttpClient does a close() and open() of the
+    # underlying http connection on every flush() (THRIFT-4600). Due to this, setting a
+    # connect timeout does not achieve the desirable result as the subsequent open() could
+    # block similary in case of problematic remote end points.
+    # TODO: Investigate connection reuse in ImpalaHttpClient and revisit this.
     if connect_timeout_ms > 0:
       print_to_stderr("Warning: --connect_timeout_ms is currently ignored with" +
           " HTTP transport.")
@@ -396,7 +357,7 @@ class ImpalaClient(object):
 
     host_and_port = "{0}:{1}".format(self.impalad_host, self.impalad_port)
     assert self.http_path
-    # THttpClient relies on the URI scheme (http vs https) to open an appropriate
+    # ImpalaHttpClient relies on the URI scheme (http vs https) to open an appropriate
     # connection to the server.
     if self.use_ssl:
       ssl_ctx = ssl.create_default_context(cafile=self.ca_cert)
@@ -405,11 +366,10 @@ class ImpalaClient(object):
       else:
         ssl_ctx.check_hostname = False  # Mandated by the SSL lib for CERT_NONE mode.
         ssl_ctx.verify_mode = ssl.CERT_NONE
-      transport = CodeCheckingHttpClient(
+      transport = ImpalaHttpClient(
           "https://{0}/{1}".format(host_and_port, self.http_path), ssl_context=ssl_ctx)
     else:
-      transport = CodeCheckingHttpClient("http://{0}/{1}".
-          format(host_and_port, self.http_path))
+      transport = ImpalaHttpClient("http://{0}/{1}".format(host_and_port, self.http_path))
 
     if self.use_ldap:
       # Set the BASIC auth header
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 4c3a3cf..e05a76f 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -35,15 +35,14 @@ import sys
 import textwrap
 import time
 
-from impala_client import (ImpalaHS2Client, ImpalaBeeswaxClient, DisconnectedException,
-                           QueryStateException, RPCException,
-                           QueryCancelledByShellException, QueryOptionLevels,
-                           MissingThriftMethodException)
+from impala_client import ImpalaHS2Client, ImpalaBeeswaxClient, QueryOptionLevels
 from impala_shell_config_defaults import impala_shell_defaults
 from option_parser import get_option_parser, get_config_from_file
 from shell_output import DelimitedOutputFormatter, OutputStream, PrettyOutputFormatter
 from shell_output import OverwritingStdErrOutputStream
 from subprocess import call
+from shell_exceptions import (RPCException, DisconnectedException, QueryStateException,
+    QueryCancelledByShellException, MissingThriftMethodException)
 
 
 VERSION_FORMAT = "Impala Shell v%(version)s (%(git_hash)s) built on %(build_date)s"
diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh
index d3771d5..982b4dd 100755
--- a/shell/make_shell_tarball.sh
+++ b/shell/make_shell_tarball.sh
@@ -119,7 +119,8 @@ cp ${SHELL_HOME}/option_parser.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala_shell_config_defaults.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala_client.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/TSSLSocketWithWildcardSAN.py ${TARBALL_ROOT}/lib
-cp ${SHELL_HOME}/THttpClient.py ${TARBALL_ROOT}/lib
+cp ${SHELL_HOME}/ImpalaHttpClient.py ${TARBALL_ROOT}/lib
+cp ${SHELL_HOME}/shell_exceptions.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/shell_output.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/pkg_resources.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala-shell ${TARBALL_ROOT}
diff --git a/shell/packaging/make_python_package.sh b/shell/packaging/make_python_package.sh
index deb263f..591823b 100755
--- a/shell/packaging/make_python_package.sh
+++ b/shell/packaging/make_python_package.sh
@@ -55,7 +55,8 @@ assemble_package_files() {
   cp "${SHELL_HOME}/shell_output.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/impala_shell_config_defaults.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/TSSLSocketWithWildcardSAN.py" "${MODULE_LIB_DIR}"
-  cp "${SHELL_HOME}/THttpClient.py" "${MODULE_LIB_DIR}"
+  cp "${SHELL_HOME}/ImpalaHttpClient.py" "${MODULE_LIB_DIR}"
+  cp "${SHELL_HOME}/shell_exceptions.py" "${MODULE_LIB_DIR}"
 
   cp "${SHELL_HOME}/packaging/README.md" "${PACKAGE_DIR}"
   cp "${SHELL_HOME}/packaging/MANIFEST.in" "${PACKAGE_DIR}"
diff --git a/shell/shell_exceptions.py b/shell/shell_exceptions.py
new file mode 100644
index 0000000..efff4f7
--- /dev/null
+++ b/shell/shell_exceptions.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+class RPCException(Exception):
+  def __init__(self, value=""):
+    self.value = value
+
+  def __str__(self):
+    return self.value
+
+
+class QueryStateException(Exception):
+  def __init__(self, value=""):
+    self.value = value
+
+  def __str__(self):
+    return self.value
+
+
+class DisconnectedException(Exception):
+  def __init__(self, value=""):
+      self.value = value
+
+  def __str__(self):
+      return self.value
+
+
+class QueryCancelledByShellException(Exception): pass
+
+
+class MissingThriftMethodException(Exception):
+  """Thrown if a Thrift method that the client tried to call is missing."""
+  def __init__(self, value=""):
+      self.value = value
+
+  def __str__(self):
+      return self.value
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 85294f1..ac8e07b 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -837,7 +837,8 @@ class TestImpalaShell(ImpalaTestSuite):
     # the performance of Impala in general. So, this test will execute a large query
     # from a non-existent table since this will make the query execution time negligible.
     sql_file, sql_path = tempfile.mkstemp()
-    num_cols = 10000
+    # This generates a sql file size of ~50K.
+    num_cols = 1000
     os.write(sql_file, "select \n")
     for i in xrange(num_cols):
       if i < num_cols:
@@ -849,11 +850,13 @@ class TestImpalaShell(ImpalaTestSuite):
     os.close(sql_file)
 
     try:
-      args = ['-q', '-f', sql_path, '-d', unique_database]
+      args = ['-f', sql_path, '-d', unique_database]
       start_time = time()
-      run_impala_shell_cmd(vector, args, expect_success=False)
+      result = run_impala_shell_cmd(vector, args, expect_success=False)
+      assert "Could not resolve table reference: 'non_existence_large_table'" \
+          in result.stderr
       end_time = time()
-      time_limit_s = 10
+      time_limit_s = 20
       actual_time_s = end_time - start_time
       assert actual_time_s <= time_limit_s, (
           "It took {0} seconds to execute the query. Time limit is {1} seconds.".format(


[impala] 01/04: IMPALA-9490 [DOCS] State support for reading Apache Hudi optimized table

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a20edd57d9da037a6123961b753fe26e558b7433
Author: Kris Hahn <kh...@cloudera.com>
AuthorDate: Wed Mar 11 20:54:19 2020 -0700

    IMPALA-9490 [DOCS] State support for reading Apache Hudi optimized table
    
    Added a row impala_file_format.xml, created impala_hudi.xml, added new file to ditamap, changed id of concept tag from orc to hudi.
    
    Change-Id: If210cd545a8deb059e66fd36e62e0df4402fc96c
    Reviewed-on: http://gerrit.cloudera.org:8080/15418
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 docs/impala.ditamap                 |  1 +
 docs/topics/impala_file_formats.xml | 10 +++++
 docs/topics/impala_hudi.xml         | 81 +++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index ec8e03a..b1d8e85 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -316,6 +316,7 @@ under the License.
     <topicref href="topics/impala_parquet.xml"/>
     <topicref href="topics/impala_orc.xml"/>
     <topicref href="topics/impala_avro.xml"/>
+    <topicref href="topics/impala_hudi.xml"/>
     <topicref href="topics/impala_rcfile.xml"/>
     <topicref href="topics/impala_seqfile.xml"/>
   </topicref>
diff --git a/docs/topics/impala_file_formats.xml b/docs/topics/impala_file_formats.xml
index 7ada1f8..7ce5de7 100644
--- a/docs/topics/impala_file_formats.xml
+++ b/docs/topics/impala_file_formats.xml
@@ -179,6 +179,16 @@ under the License.
             </entry>
 <!-- <entry rev="2.0.0">Yes, in Impala 2.0 and higher. For earlier Impala releases, load data through <codeph>LOAD DATA</codeph> on data files already in the right format, or use <codeph>INSERT</codeph> in Hive.</entry> -->
           </row>
+          <row id="hudi_support">
+            <entry>
+              <xref href="impala_hudi.xml#hudi">Hudi</xref>
+            </entry>
+            <entry>Structured</entry>
+            <entry>Snappy, gzip, zstd, lz4; currently Snappy by default </entry>
+            <entry>Yes, support for Read Optimized Queries is experimental.</entry>
+            <entry>No. Create an external table in Impala. Set the table location to the Hudi table
+              directory. Alternatively, create the Hudi table in Hive. </entry>
+          </row>
           <row id="rcfile_support">
             <entry>
               <xref href="impala_rcfile.xml#rcfile">RCFile</xref>
diff --git a/docs/topics/impala_hudi.xml b/docs/topics/impala_hudi.xml
new file mode 100644
index 0000000..806d399
--- /dev/null
+++ b/docs/topics/impala_hudi.xml
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="hudi">
+  
+  <title>Using the Hudi File Format</title>
+  <titlealts audience="PDF"><navtitle>Hudi Data Files</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <!-- <data name="Category" value="ORC"/> -->
+      <data name="Category" value="File Formats"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+  
+  <conbody>
+    
+    <p>
+      <indexterm audience="hidden">Hudi support in Impala</indexterm> Impala supports Read Optimized
+      Queries on Hudi tables as an experimental feature. Read Optimized Queries treat the latest
+      table snapshot as the commit or compaction action, expose only base/columnar files in latest
+      file slices, and guarantee the same columnar query performance as a non-Hudi columnar
+      table.</p>
+    <p/>
+    
+    <table>
+      <title>Hudi Format Support in Impala</title>
+      <tgroup cols="5">
+        <colspec colname="1" colwidth="10*"/>
+        <colspec colname="2" colwidth="10*"/>
+        <colspec colname="3" colwidth="20*"/>
+        <colspec colname="4" colwidth="30*"/>
+        <colspec colname="5" colwidth="30*"/>
+        <thead>
+          <row>
+            <entry>
+              File Type
+            </entry>
+            <entry>
+              Format
+            </entry>
+            <entry>
+              Compression Codecs
+            </entry>
+            <entry>
+              Impala Can CREATE?
+            </entry>
+            <entry>
+              Impala Can INSERT?
+            </entry>
+          </row>
+        </thead>
+        <tbody>
+          <row conref="impala_file_formats.xml#file_formats/hudi_support">
+            <entry/>
+          </row>
+        </tbody>
+      </tgroup>
+    </table>
+  </conbody>
+</concept>


[impala] 03/04: IMPALA-9414 (part 1): Copy THttpClient from Thrift into Impala

Posted by tm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c3d65cab55789b3cc6950eec072b9931db57e4fe
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Fri Feb 21 15:29:58 2020 -0800

    IMPALA-9414 (part 1): Copy THttpClient from Thrift into Impala
    
    This is a prelimary patch that simply copies THttpClient.py from
    Thrift master into Impala, changes imports as appropriate, and adjusts
    the formatting from 4 spaces to 2 spaces.
    
    This is to allow us to make modifications to THttpClient in future
    patches. There are no functional changes in this patch.
    
    Change-Id: I2662f1d4d455120442ef7c0c198685c07207aeed
    Reviewed-on: http://gerrit.cloudera.org:8080/15283
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Reviewed-by: David Knupp <dk...@cloudera.com>
    Tested-by: Thomas Tauber-Marshall <tm...@cloudera.com>
---
 shell/THttpClient.py                   | 187 +++++++++++++++++++++++++++++++++
 shell/impala_client.py                 |   2 +-
 shell/make_shell_tarball.sh            |   1 +
 shell/packaging/make_python_package.sh |   1 +
 4 files changed, 190 insertions(+), 1 deletion(-)

diff --git a/shell/THttpClient.py b/shell/THttpClient.py
new file mode 100644
index 0000000..d131577
--- /dev/null
+++ b/shell/THttpClient.py
@@ -0,0 +1,187 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from io import BytesIO
+import os
+import ssl
+import sys
+import warnings
+import base64
+
+from six.moves import urllib
+from six.moves import http_client
+
+from thrift.transport.TTransport import TTransportBase
+import six
+
+
+class THttpClient(TTransportBase):
+  """Http implementation of TTransport base."""
+
+  def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=None, key_file=None, ssl_context=None):
+    """THttpClient supports two different types of construction:
+
+    THttpClient(host, port, path) - deprecated
+    THttpClient(uri, [port=<n>, path=<s>, cafile=<filename>, cert_file=<filename>, key_file=<filename>, ssl_context=<context>])
+
+    Only the second supports https.  To properly authenticate against the server,
+    provide the client's identity by specifying cert_file and key_file.  To properly
+    authenticate the server, specify either cafile or ssl_context with a CA defined.
+    NOTE: if both cafile and ssl_context are defined, ssl_context will override cafile.
+    """
+    if port is not None:
+      warnings.warn(
+          "Please use the THttpClient('http{s}://host:port/path') constructor",
+          DeprecationWarning,
+          stacklevel=2)
+      self.host = uri_or_host
+      self.port = port
+      assert path
+      self.path = path
+      self.scheme = 'http'
+    else:
+      parsed = urllib.parse.urlparse(uri_or_host)
+      self.scheme = parsed.scheme
+      assert self.scheme in ('http', 'https')
+      if self.scheme == 'http':
+        self.port = parsed.port or http_client.HTTP_PORT
+      elif self.scheme == 'https':
+        self.port = parsed.port or http_client.HTTPS_PORT
+        self.certfile = cert_file
+        self.keyfile = key_file
+        self.context = ssl.create_default_context(cafile=cafile) if (cafile and not ssl_context) else ssl_context
+      self.host = parsed.hostname
+      self.path = parsed.path
+      if parsed.query:
+        self.path += '?%s' % parsed.query
+    try:
+      proxy = urllib.request.getproxies()[self.scheme]
+    except KeyError:
+      proxy = None
+    else:
+      if urllib.request.proxy_bypass(self.host):
+        proxy = None
+    if proxy:
+      parsed = urllib.parse.urlparse(proxy)
+      self.realhost = self.host
+      self.realport = self.port
+      self.host = parsed.hostname
+      self.port = parsed.port
+      self.proxy_auth = self.basic_proxy_auth_header(parsed)
+    else:
+      self.realhost = self.realport = self.proxy_auth = None
+    self.__wbuf = BytesIO()
+    self.__http = None
+    self.__http_response = None
+    self.__timeout = None
+    self.__custom_headers = None
+
+  @staticmethod
+  def basic_proxy_auth_header(proxy):
+    if proxy is None or not proxy.username:
+      return None
+    ap = "%s:%s" % (urllib.parse.unquote(proxy.username),
+                    urllib.parse.unquote(proxy.password))
+    cr = base64.b64encode(ap).strip()
+    return "Basic " + cr
+
+  def using_proxy(self):
+    return self.realhost is not None
+
+  def open(self):
+    if self.scheme == 'http':
+      self.__http = http_client.HTTPConnection(self.host, self.port,
+                                               timeout=self.__timeout)
+    elif self.scheme == 'https':
+      self.__http = http_client.HTTPSConnection(self.host, self.port,
+                                                key_file=self.keyfile,
+                                                cert_file=self.certfile,
+                                                timeout=self.__timeout,
+                                                context=self.context)
+    if self.using_proxy():
+      self.__http.set_tunnel(self.realhost, self.realport,
+                             {"Proxy-Authorization": self.proxy_auth})
+
+  def close(self):
+    self.__http.close()
+    self.__http = None
+    self.__http_response = None
+
+  def isOpen(self):
+    return self.__http is not None
+
+  def setTimeout(self, ms):
+    if ms is None:
+      self.__timeout = None
+    else:
+      self.__timeout = ms / 1000.0
+
+  def setCustomHeaders(self, headers):
+    self.__custom_headers = headers
+
+  def read(self, sz):
+    return self.__http_response.read(sz)
+
+  def write(self, buf):
+    self.__wbuf.write(buf)
+
+  def flush(self):
+    if self.isOpen():
+      self.close()
+    self.open()
+
+    # Pull data out of buffer
+    data = self.__wbuf.getvalue()
+    self.__wbuf = BytesIO()
+
+    # HTTP request
+    if self.using_proxy() and self.scheme == "http":
+      # need full URL of real host for HTTP proxy here (HTTPS uses CONNECT tunnel)
+      self.__http.putrequest('POST', "http://%s:%s%s" %
+                             (self.realhost, self.realport, self.path))
+    else:
+      self.__http.putrequest('POST', self.path)
+
+    # Write headers
+    self.__http.putheader('Content-Type', 'application/x-thrift')
+    self.__http.putheader('Content-Length', str(len(data)))
+    if self.using_proxy() and self.scheme == "http" and self.proxy_auth is not None:
+      self.__http.putheader("Proxy-Authorization", self.proxy_auth)
+
+    if not self.__custom_headers or 'User-Agent' not in self.__custom_headers:
+      user_agent = 'Python/THttpClient'
+      script = os.path.basename(sys.argv[0])
+      if script:
+        user_agent = '%s (%s)' % (user_agent, urllib.parse.quote(script))
+      self.__http.putheader('User-Agent', user_agent)
+
+    if self.__custom_headers:
+      for key, val in six.iteritems(self.__custom_headers):
+        self.__http.putheader(key, val)
+
+    self.__http.endheaders()
+
+    # Write payload
+    self.__http.send(data)
+
+    # Get reply to flush the request
+    self.__http_response = self.__http.getresponse()
+    self.code = self.__http_response.status
+    self.message = self.__http_response.reason
+    self.headers = self.__http_response.msg
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 6040925..145cee5 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -38,9 +38,9 @@ from TCLIService.TCLIService import (TExecuteStatementReq, TOpenSessionReq,
     TCloseSessionReq, TProtocolVersion, TStatusCode, TGetOperationStatusReq,
     TOperationState, TFetchResultsReq, TFetchOrientation, TGetLogReq,
     TGetResultSetMetadataReq, TTypeId, TCancelOperationReq)
+from THttpClient import THttpClient
 from thrift.protocol import TBinaryProtocol
 from thrift_sasl import TSaslClientTransport
-from thrift.transport.THttpClient import THttpClient
 from thrift.transport.TSocket import TSocket
 from thrift.transport.TTransport import TBufferedTransport, TTransportException
 from thrift.Thrift import TApplicationException, TException
diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh
index 2047bae..d3771d5 100755
--- a/shell/make_shell_tarball.sh
+++ b/shell/make_shell_tarball.sh
@@ -119,6 +119,7 @@ cp ${SHELL_HOME}/option_parser.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala_shell_config_defaults.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala_client.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/TSSLSocketWithWildcardSAN.py ${TARBALL_ROOT}/lib
+cp ${SHELL_HOME}/THttpClient.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/shell_output.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/pkg_resources.py ${TARBALL_ROOT}/lib
 cp ${SHELL_HOME}/impala-shell ${TARBALL_ROOT}
diff --git a/shell/packaging/make_python_package.sh b/shell/packaging/make_python_package.sh
index ba95148..deb263f 100755
--- a/shell/packaging/make_python_package.sh
+++ b/shell/packaging/make_python_package.sh
@@ -55,6 +55,7 @@ assemble_package_files() {
   cp "${SHELL_HOME}/shell_output.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/impala_shell_config_defaults.py" "${MODULE_LIB_DIR}"
   cp "${SHELL_HOME}/TSSLSocketWithWildcardSAN.py" "${MODULE_LIB_DIR}"
+  cp "${SHELL_HOME}/THttpClient.py" "${MODULE_LIB_DIR}"
 
   cp "${SHELL_HOME}/packaging/README.md" "${PACKAGE_DIR}"
   cp "${SHELL_HOME}/packaging/MANIFEST.in" "${PACKAGE_DIR}"