You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/11/29 01:24:11 UTC

[impala] branch master updated (b13a17b -> f566e7d)

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from b13a17b  IMPALA-11029: DescriptorTable.copyTupleDescriptor throw exception for Kudu table
     new b2c51a0  IMPALA-11027: Adding flag to enable support for ShellBasedUnixGroupsMapping
     new 8082d85  IMPALA-10954: Make create, drop methods of kudu catalog service public
     new da53428  IMPALA-11022: Impala uses wrong file descriptors for Iceberg tables in local catalog mode
     new f566e7d  IMPALA-10994: Normalize the pip package name part of download URL.

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/service/frontend.cc                         |  5 +++
 be/src/util/backend-gflag-util.cc                  |  3 ++
 common/thrift/BackendGflags.thrift                 |  2 ++
 docs/topics/impala_delegation.xml                  |  3 +-
 .../org/apache/impala/catalog/FeIcebergTable.java  | 35 +++++++++++++++++--
 .../org/apache/impala/catalog/IcebergTable.java    |  7 +++-
 .../impala/catalog/local/CatalogdMetaProvider.java | 29 ++++++++++++++--
 .../impala/catalog/local/DirectMetaProvider.java   |  4 ++-
 .../impala/catalog/local/LocalIcebergTable.java    | 15 +++++---
 .../apache/impala/catalog/local/LocalTable.java    |  2 ++
 .../apache/impala/catalog/local/MetaProvider.java  |  5 ++-
 .../org/apache/impala/service/BackendConfig.java   |  4 +++
 .../org/apache/impala/service/JniFrontend.java     |  3 +-
 .../impala/service/KuduCatalogOpExecutor.java      |  7 ++--
 .../impala/catalog/local/LocalCatalogTest.java     | 40 ++++++++++++++++++++++
 infra/python/deps/pip_download.py                  |  3 +-
 16 files changed, 147 insertions(+), 20 deletions(-)

[impala] 03/04: IMPALA-11022: Impala uses wrong file descriptors for Iceberg tables in local catalog mode

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit da53428abc84ee351367258ff26d20fecd4c37c9
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Mon Nov 8 15:24:23 2021 +0100

    IMPALA-11022: Impala uses wrong file descriptors for Iceberg tables in local catalog mode
    
    When local catalog mode is used, Impala retrieves the Iceberg
    snapshot from CatalogD. The response contains a map of the file
    descriptors. The file descriptors contain block location information,
    but the hosts are only referred to by indexes. In the Coordinator's
    local catalog the host indexes might refer to different hosts than in
    CatalogD. This might lead to unnecessary remote reads as scan ranges
    are scheduled to random hosts.
    
    This patch properly translates the host index to the coordinators host
    list, so block locations remain consistent.
    
    Testing:
     * tested manually on a 6-node cluster, and verified that the file
       locations are consistent with HDFS
     * added unit test to LocalCatalogTest
    
    Change-Id: I253b505846e1cf4d1be445c0d06b2552dc4ba1f8
    Reviewed-on: http://gerrit.cloudera.org:8080/18041
    Reviewed-by: Qifan Chen <qc...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/catalog/FeIcebergTable.java  | 35 +++++++++++++++++--
 .../org/apache/impala/catalog/IcebergTable.java    |  7 +++-
 .../impala/catalog/local/CatalogdMetaProvider.java | 29 ++++++++++++++--
 .../impala/catalog/local/DirectMetaProvider.java   |  4 ++-
 .../impala/catalog/local/LocalIcebergTable.java    | 15 +++++---
 .../apache/impala/catalog/local/LocalTable.java    |  2 ++
 .../apache/impala/catalog/local/MetaProvider.java  |  5 ++-
 .../impala/catalog/local/LocalCatalogTest.java     | 40 ++++++++++++++++++++++
 8 files changed, 123 insertions(+), 14 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index e362808..1cb632b 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -63,11 +63,14 @@ import org.apache.impala.util.TResultRowBuilder;
 import com.google.common.base.Preconditions;
 import com.google.common.primitives.Ints;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * Frontend interface for interacting with an Iceberg-backed table.
  */
 public interface FeIcebergTable extends FeFsTable {
-
+  final static Logger LOG = LoggerFactory.getLogger(FeIcebergTable.class);
   /**
    * FileDescriptor map
    */
@@ -266,6 +269,18 @@ public interface FeIcebergTable extends FeFsTable {
   long snapshotId();
 
   /**
+   * Utility class to hold information about Iceberg snapshots.
+   */
+  public static class Snapshot {
+    public Snapshot(long snapshotId, Map<String, FileDescriptor> pathHashToFileDescMap) {
+      this.snapshotId = snapshotId;
+      this.pathHashToFileDescMap = pathHashToFileDescMap;
+    }
+    public long snapshotId;
+    public Map<String, FileDescriptor> pathHashToFileDescMap;
+  }
+
+  /**
    * Utility functions
    */
   public static abstract class Utils {
@@ -423,12 +438,26 @@ public interface FeIcebergTable extends FeFsTable {
       return ret;
     }
 
+    /**
+     * Load the file descriptors from the thrift-encoded 'tFileDescMap'. Optionally
+     * translate the file descriptors with the given 'networkAddresses'/'hostIndex'.
+     */
     public static Map<String, FileDescriptor> loadFileDescMapFromThrift(
-        Map<String, THdfsFileDesc> tFileDescMap) {
+        Map<String, THdfsFileDesc> tFileDescMap,
+        List<TNetworkAddress> networkAddresses,
+        ListMap<TNetworkAddress> hostIndex) {
       Map<String, FileDescriptor> fileDescMap = new HashMap<>();
       if (tFileDescMap == null) return fileDescMap;
       for (Map.Entry<String, THdfsFileDesc> entry : tFileDescMap.entrySet()) {
-        fileDescMap.put(entry.getKey(), FileDescriptor.fromThrift(entry.getValue()));
+        FileDescriptor fd = FileDescriptor.fromThrift(entry.getValue());
+        Preconditions.checkNotNull(fd);
+        if (networkAddresses == null) {
+          fileDescMap.put(entry.getKey(), fd);
+        } else {
+          Preconditions.checkNotNull(hostIndex);
+          fileDescMap.put(entry.getKey(),
+              fd.cloneWithNewHostIndex(networkAddresses, hostIndex));
+        }
       }
       return fileDescMap;
     }
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index 91af691..42a6ae9 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -418,8 +418,10 @@ public class IcebergTable extends Table implements FeIcebergTable {
     icebergParquetDictPageSize_ = ticeberg.getParquet_dict_page_size();
     partitionSpecs_ = loadPartitionBySpecsFromThrift(ticeberg.getPartition_spec());
     defaultPartitionSpecId_ = ticeberg.getDefault_partition_spec_id();
+    // Load file descriptors for the Iceberg snapshot. We are using the same host index,
+    // so there's no need for translation.
     pathHashToFileDescMap_ = FeIcebergTable.Utils.loadFileDescMapFromThrift(
-        ticeberg.getPath_hash_to_file_descriptor());
+        ticeberg.getPath_hash_to_file_descriptor(), null, null);
     snapshotId_ = ticeberg.getSnapshot_id();
     hdfsTable_.loadFromThrift(thriftTable);
     TableMetadata metadata = IcebergUtil.getIcebergTableMetadata(this);
@@ -483,6 +485,9 @@ public class IcebergTable extends Table implements FeIcebergTable {
     if (req.table_info_selector.want_iceberg_snapshot) {
       resp.table_info.setIceberg_snapshot(
           FeIcebergTable.Utils.createTIcebergSnapshot(this));
+      if (!resp.table_info.isSetNetwork_addresses()) {
+        resp.table_info.setNetwork_addresses(getHostIndex().getList());
+      }
     }
     return resp;
   }
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
index 5531e31..70f1116 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/CatalogdMetaProvider.java
@@ -34,6 +34,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableCollection;
 import com.google.common.collect.ImmutableMap;
 import org.apache.commons.lang3.StringUtils;
@@ -52,6 +53,7 @@ import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.CatalogDeltaLog;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.catalog.CatalogObjectCache;
+import org.apache.impala.catalog.FeIcebergTable;
 import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.HdfsCachePool;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
@@ -83,6 +85,7 @@ import org.apache.impala.thrift.THdfsFileDesc;
 import org.apache.impala.thrift.TIcebergSnapshot;
 import org.apache.impala.thrift.TNetworkAddress;
 import org.apache.impala.thrift.TPartialPartitionInfo;
+import org.apache.impala.thrift.TPartialTableInfo;
 import org.apache.impala.thrift.TTable;
 import org.apache.impala.thrift.TTableInfoSelector;
 import org.apache.impala.thrift.TUniqueId;
@@ -1002,14 +1005,34 @@ public class CatalogdMetaProvider implements MetaProvider {
     return ret;
   }
 
-  @Override
-  public TIcebergSnapshot loadIcebergSnapshot(final TableMetaRef table)
+  /**
+   * Utility function for to retrieve table info with Iceberg snapshot. Exists for testing
+   * purposes, use loadIcebergSnapshot() which translates the file descriptors to the
+   * table's host index.
+   */
+  @VisibleForTesting
+  TPartialTableInfo loadTableInfoWithIcebergSnapshot(final TableMetaRef table)
       throws TException {
     Preconditions.checkArgument(table instanceof TableMetaRefImpl);
     TGetPartialCatalogObjectRequest req = newReqForTable(table);
     req.table_info_selector.want_iceberg_snapshot = true;
     TGetPartialCatalogObjectResponse resp = sendRequest(req);
-    return resp.table_info.getIceberg_snapshot();
+    return resp.table_info;
+  }
+
+  @Override
+  public FeIcebergTable.Snapshot loadIcebergSnapshot(final TableMetaRef table,
+      ListMap<TNetworkAddress> hostIndex)
+      throws TException {
+    TPartialTableInfo tableInfo = loadTableInfoWithIcebergSnapshot(table);
+    Map<String, FileDescriptor> pathToFds =
+        FeIcebergTable.Utils.loadFileDescMapFromThrift(
+            tableInfo.getIceberg_snapshot().getIceberg_file_desc_map(),
+            tableInfo.getNetwork_addresses(),
+            hostIndex);
+    return new FeIcebergTable.Snapshot(
+        tableInfo.getIceberg_snapshot().getSnapshot_id(),
+        pathToFds);
   }
 
   private ImmutableList<FileDescriptor> convertThriftFdList(List<THdfsFileDesc> thriftFds,
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
index 001723f..24411a2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/DirectMetaProvider.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.TableMeta;
 import org.apache.impala.authorization.AuthorizationPolicy;
 import org.apache.impala.catalog.CatalogException;
+import org.apache.impala.catalog.FeIcebergTable;
 import org.apache.impala.catalog.FileMetadataLoader;
 import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.HdfsCachePool;
@@ -501,7 +502,8 @@ class DirectMetaProvider implements MetaProvider {
   }
 
   @Override
-  public TIcebergSnapshot loadIcebergSnapshot(final TableMetaRef table)
+  public FeIcebergTable.Snapshot loadIcebergSnapshot(final TableMetaRef table,
+      ListMap<TNetworkAddress> hostIndex)
       throws TException {
     throw new NotImplementedException(
         "loadIcebergSnapshot() is not implemented for DirectMetaProvider");
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
index 4c4a49a..09bec8f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
@@ -43,6 +43,7 @@ import org.apache.impala.thrift.THdfsTable;
 import org.apache.impala.thrift.TIcebergCatalog;
 import org.apache.impala.thrift.TIcebergFileFormat;
 import org.apache.impala.thrift.TIcebergSnapshot;
+import org.apache.impala.thrift.TNetworkAddress;
 import org.apache.impala.thrift.TTableDescriptor;
 import org.apache.impala.thrift.TTableType;
 import org.apache.impala.util.IcebergSchemaConverter;
@@ -52,10 +53,13 @@ import org.apache.thrift.TException;
 import com.google.common.base.Preconditions;
 import com.google.errorprone.annotations.Immutable;
 
+import org.apache.log4j.Logger;
+
 /**
  * Iceberg table for LocalCatalog
  */
 public class LocalIcebergTable extends LocalTable implements FeIcebergTable {
+  private static final Logger LOG = Logger.getLogger(LocalIcebergTable.class);
   private TableParams tableParams_;
   private TIcebergFileFormat icebergFileFormat_;
   private TCompressionCodec icebergParquetCompressionCodec_;
@@ -102,19 +106,20 @@ public class LocalIcebergTable extends LocalTable implements FeIcebergTable {
     super(db, msTable, ref, cmap);
     localFsTable_ = LocalFsTable.load(db, msTable, ref);
     tableParams_ = new TableParams(msTable);
-    TIcebergSnapshot tSnapshot;
+    FeIcebergTable.Snapshot snapshot;
     try {
-      tSnapshot = db_.getCatalog().getMetaProvider().loadIcebergSnapshot(ref);
+      snapshot = db_.getCatalog().getMetaProvider().loadIcebergSnapshot(
+          ref, getHostIndex());
+      Preconditions.checkNotNull(snapshot);
+      snapshotId_ = snapshot.snapshotId;
+      pathHashToFileDescMap_ = snapshot.pathHashToFileDescMap;
     } catch (TException e) {
       throw new TableLoadingException(String.format(
           "Failed to load table: %s.%s", msTable.getDbName(), msTable.getTableName()), e);
     }
-    snapshotId_ = tSnapshot.getSnapshot_id();
     partitionSpecs_ = Utils.loadPartitionSpecByIceberg(metadata);
     defaultPartitionSpecId_ = metadata.defaultSpecId();
     icebergSchema_ = metadata.schema();
-    pathHashToFileDescMap_ = FeIcebergTable.Utils.loadFileDescMapFromThrift(
-        tSnapshot.getIceberg_file_desc_map());
     icebergFileFormat_ = IcebergUtil.getIcebergFileFormat(msTable);
     icebergParquetCompressionCodec_ = Utils.getIcebergParquetCompressionCodec(msTable);
     icebergParquetRowGroupSize_ = Utils.getIcebergParquetRowGroupSize(msTable);
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalTable.java
index 44928a8..f09b656 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalTable.java
@@ -23,6 +23,7 @@ import java.util.List;
 
 import javax.annotation.Nullable;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -86,6 +87,7 @@ abstract class LocalTable implements FeTable {
    * about them.
    */
   @Nullable
+  @VisibleForTesting
   protected final TableMetaRef ref_;
 
   public static LocalTable load(LocalDb db, String tblName) throws TableLoadingException {
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java b/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
index 146a6dd..4fc0aa0 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/MetaProvider.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.impala.authorization.AuthorizationPolicy;
 import org.apache.impala.catalog.CatalogException;
+import org.apache.impala.catalog.FeIcebergTable;
 import org.apache.impala.catalog.Function;
 import org.apache.impala.catalog.HdfsCachePool;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
@@ -127,7 +128,9 @@ public interface MetaProvider {
   /**
    * Loads Iceberg snapshot information, i.e. snapshot id and file descriptors.
    */
-  public TIcebergSnapshot loadIcebergSnapshot(final TableMetaRef table) throws TException;
+  public FeIcebergTable.Snapshot loadIcebergSnapshot(final TableMetaRef table,
+      ListMap<TNetworkAddress> hostIndex)
+      throws TException;
 
   /**
    * Reference to a table as returned by loadTable(). This reference must be passed
diff --git a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
index 5ecb9f8..61721e4 100644
--- a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
@@ -20,6 +20,7 @@ package org.apache.impala.catalog.local;
 import static org.junit.Assert.*;
 
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
@@ -36,9 +37,11 @@ import org.apache.impala.catalog.FeCatalogUtils;
 import org.apache.impala.catalog.FeDb;
 import org.apache.impala.catalog.FeFsPartition;
 import org.apache.impala.catalog.FeFsTable;
+import org.apache.impala.catalog.FeIcebergTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.fb.FbFileBlock;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.service.BackendConfig;
 import org.apache.impala.service.FeSupport;
@@ -48,8 +51,11 @@ import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.TMetadataOpRequest;
 import org.apache.impala.thrift.TMetadataOpcode;
+import org.apache.impala.thrift.TNetworkAddress;
+import org.apache.impala.thrift.TPartialTableInfo;
 import org.apache.impala.thrift.TResultRow;
 import org.apache.impala.thrift.TResultSet;
+import org.apache.impala.util.ListMap;
 import org.apache.impala.util.MetaStoreUtil;
 import org.apache.impala.util.PatternMatcher;
 import org.hamcrest.CoreMatchers;
@@ -249,6 +255,40 @@ public class LocalCatalogTest {
     assertTrue(t.getHostIndex().size() > 0);
   }
 
+  /**
+   * This test verifies that the network adresses used by the LocalIcebergTable are
+   * the same used by CatalogD.
+   */
+  @Test
+  public void testLoadIcebergFileDescriptors() throws Exception {
+    LocalIcebergTable t = (LocalIcebergTable)catalog_.getTable(
+        "functional_parquet", "iceberg_partitioned");
+    Map<String, FileDescriptor> localTblFdMap = t.getPathHashToFileDescMap();
+    TPartialTableInfo tblInfo = provider_.loadTableInfoWithIcebergSnapshot(t.ref_);
+    ListMap<TNetworkAddress> catalogdHostIndexes = new ListMap<>();
+    catalogdHostIndexes.populate(tblInfo.getNetwork_addresses());
+    Map<String, FileDescriptor> catalogFdMap =
+        FeIcebergTable.Utils.loadFileDescMapFromThrift(
+            tblInfo.getIceberg_snapshot().getIceberg_file_desc_map(),
+            null, null);
+    for (Map.Entry<String, FileDescriptor> entry : localTblFdMap.entrySet()) {
+      String path = entry.getKey();
+      FileDescriptor localFd = entry.getValue();
+      FileDescriptor catalogFd = catalogFdMap.get(path);
+      assertEquals(localFd.getNumFileBlocks(), 1);
+      FbFileBlock localBlock = localFd.getFbFileBlock(0);
+      FbFileBlock catalogBlock = catalogFd.getFbFileBlock(0);
+      assertEquals(localBlock.replicaHostIdxsLength(), 3);
+      for (int i = 0; i < localBlock.replicaHostIdxsLength(); ++i) {
+        TNetworkAddress localAddr = t.getHostIndex().getEntry(
+            localBlock.replicaHostIdxs(i));
+        TNetworkAddress catalogAddr = catalogdHostIndexes.getEntry(
+            catalogBlock.replicaHostIdxs(i));
+        assertEquals(localAddr, catalogAddr);
+      }
+    }
+  }
+
   @Test
   public void testLoadFileDescriptorsUnpartitioned() throws Exception {
     FeFsTable t = (FeFsTable) catalog_.getTable("tpch",  "region");

[impala] 04/04: IMPALA-10994: Normalize the pip package name part of download URL.

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f566e7dee7b64dcf309bfd497b0296f8e547dfb7
Author: yx91490 <yx...@126.com>
AuthorDate: Mon Nov 1 12:33:50 2021 +0800

    IMPALA-10994: Normalize the pip package name part of download URL.
    
    According to PEP-0503, pip repo server doesn't support unnormalized URL
    access, and some package name within
    'infra/python/deps/*requirements.txt' are unnormalized, e.g. 'Cython',
    and pip_download.py will concat $PYPI_MIRROR and package name to get
    download URL directly, which maybe unnormalized.
    
    Fix this by normalize package name in download URL using the
    recommanded method in PEP-0503.
    
    Change-Id: I479df0ad7acf3c650b8f5317372261d5e2840864
    Reviewed-on: http://gerrit.cloudera.org:8080/17987
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 infra/python/deps/pip_download.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index b289ce1..d56e028 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -82,7 +82,8 @@ def get_package_info(pkg_name, pkg_version):
   # to sort them and return the first value in alphabetical order. This ensures that the
   # same result is always returned even if the ordering changed on the server.
   candidates = []
-  url = '{0}/simple/{1}/'.format(PYPI_MIRROR, pkg_name)
+  normalized_name = re.sub(r"[-_.]+", "-", pkg_name).lower()
+  url = '{0}/simple/{1}/'.format(PYPI_MIRROR, normalized_name)
   print('Getting package info from {0}'.format(url))
   # The web page should be in PEP 503 format (https://www.python.org/dev/peps/pep-0503/).
   # We parse the page with regex instead of an html parser because that requires

[impala] 02/04: IMPALA-10954: Make create, drop methods of kudu catalog service public

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8082d85273702c4abda22c3a2771726221f61c2c
Author: Deepti Sehrawat <de...@cloudera.com>
AuthorDate: Tue Oct 26 17:34:11 2021 -0700

    IMPALA-10954: Make create, drop methods of kudu catalog service public
    
    External frontend needs access to some private methods of kudu catalog
    service. Hence, they are made public here to add support for CREATE and
    DROP tables for Kudu from external frontend.
    
    Change-Id: Ib60142424d8e758031de596831d98aed69d488ef
    Reviewed-on: http://gerrit.cloudera.org:8080/17981
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Aman Sinha <am...@cloudera.com>
    Reviewed-by: Aman Sinha <am...@cloudera.com>
---
 .../main/java/org/apache/impala/service/KuduCatalogOpExecutor.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
index 1c49f72..1ac8783 100644
--- a/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
@@ -74,8 +74,9 @@ public class KuduCatalogOpExecutor {
    * Throws an exception if 'msTbl' represents an external table or if the table couldn't
    * be created in Kudu.
    */
-  static void createSynchronizedTable(org.apache.hadoop.hive.metastore.api.Table msTbl,
-      TCreateTableParams params) throws ImpalaRuntimeException {
+  public static void createSynchronizedTable(
+          org.apache.hadoop.hive.metastore.api.Table msTbl,
+          TCreateTableParams params) throws ImpalaRuntimeException {
     Preconditions.checkState(KuduTable.isSynchronizedTable(msTbl));
     Preconditions.checkState(
         msTbl.getParameters().get(KuduTable.KEY_TABLE_ID) == null);
@@ -263,7 +264,7 @@ public class KuduCatalogOpExecutor {
    * TableNotFoundException is thrown. If the table exists and could not be dropped,
    * an ImpalaRuntimeException is thrown.
    */
-  static void dropTable(org.apache.hadoop.hive.metastore.api.Table msTbl,
+  public static void dropTable(org.apache.hadoop.hive.metastore.api.Table msTbl,
       boolean ifExists) throws ImpalaRuntimeException, TableNotFoundException {
     Preconditions.checkState(KuduTable.isSynchronizedTable(msTbl));
     String tableName = msTbl.getParameters().get(KuduTable.KEY_TABLE_NAME);

[impala] 01/04: IMPALA-11027: Adding flag to enable support for ShellBasedUnixGroupsMapping

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b2c51a0cefb00eccad98d020f5b16cf18a56c979
Author: Amogh Margoor <am...@cloudera.com>
AuthorDate: Thu Nov 11 06:27:07 2021 -0800

    IMPALA-11027: Adding flag to enable support for ShellBasedUnixGroupsMapping
    
    Currently, Impala doesn't support ShellBasedUnixGroupsMapping and
    ShellBasedUnixGroupsNetgroupMapping to fetch Hadoop groups as they
    spawn a new process and run shell command to fetch group info.
    In Impala, this would happen for every session being created
    when user delegation is enabled via impala.doas.user and
    authorized_proxy_group_config. It can have many gotcha's like
    spawning many processes together in a highly concurrent setting,
    creation of zombie processes on abrupt crashing of impalad etc.
    
    However, not everyone in ecosystem have moved away from shell based
    group mapping. For instance, in cloudera distribution many components
    still rely on it. So we need a way to allow users to use shell based
    mapping instead of not allowing it altogether.
    This patch provides flag which would allow  the support for users
    that are aware about the gotchas it comes with.
    
    Change-Id: I023f396a79f3aa27ad6ac80e91f527058a5a5470
    Reviewed-on: http://gerrit.cloudera.org:8080/18019
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/frontend.cc                                    | 5 +++++
 be/src/util/backend-gflag-util.cc                             | 3 +++
 common/thrift/BackendGflags.thrift                            | 2 ++
 docs/topics/impala_delegation.xml                             | 3 ++-
 fe/src/main/java/org/apache/impala/service/BackendConfig.java | 4 ++++
 fe/src/main/java/org/apache/impala/service/JniFrontend.java   | 3 ++-
 6 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc
index 608ba68..8105be7 100644
--- a/be/src/service/frontend.cc
+++ b/be/src/service/frontend.cc
@@ -74,6 +74,11 @@ DEFINE_string(authorized_proxy_group_config, "",
     "all users. For example: hue=group1,group2;admin=*");
 DEFINE_string(authorized_proxy_group_config_delimiter, ",",
     "Specifies the delimiter used in authorized_proxy_group_config. ");
+DEFINE_bool(enable_shell_based_groups_mapping_support, false,
+    "Enables support for Hadoop groups mapping "
+    "org.apache.hadoop.security.ShellBasedUnixGroupsMapping. By default this support "
+    "is not enabled as it can lead to many process getting spawned to fetch groups for "
+    "user using shell command.");
 DEFINE_string(kudu_master_hosts, "", "Specifies the default Kudu master(s). The given "
     "value should be a comma separated list of hostnames or IP addresses; ports are "
     "optional.");
diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc
index 1cc089d..f210ddb 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -48,6 +48,7 @@ DECLARE_string(principal);
 DECLARE_string(local_library_dir);
 DECLARE_string(server_name);
 DECLARE_string(authorized_proxy_group_config);
+DECLARE_bool(enable_shell_based_groups_mapping_support);
 DECLARE_string(catalog_topic_mode);
 DECLARE_string(kudu_master_hosts);
 DECLARE_string(reserved_words_version);
@@ -230,6 +231,8 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
   cfg.__set_max_filter_error_rate(FLAGS_max_filter_error_rate);
   cfg.__set_min_buffer_size(FLAGS_min_buffer_size);
   cfg.__set_authorized_proxy_group_config(FLAGS_authorized_proxy_group_config);
+  cfg.__set_enable_shell_based_groups_mapping_support(
+      FLAGS_enable_shell_based_groups_mapping_support);
   cfg.__set_disable_catalog_data_ops_debug_only(
       FLAGS_disable_catalog_data_ops_debug_only);
   cfg.__set_catalog_topic_mode(FLAGS_catalog_topic_mode);
diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift
index 88017f6..198f1f2 100644
--- a/common/thrift/BackendGflags.thrift
+++ b/common/thrift/BackendGflags.thrift
@@ -217,4 +217,6 @@ struct TBackendGflags {
   96: required string startup_filesystem_check_directories
 
   97: required bool hms_event_incremental_refresh_transactional_table
+
+  98: required bool enable_shell_based_groups_mapping_support
 }
diff --git a/docs/topics/impala_delegation.xml b/docs/topics/impala_delegation.xml
index dd535fb..03914c4 100644
--- a/docs/topics/impala_delegation.xml
+++ b/docs/topics/impala_delegation.xml
@@ -172,7 +172,8 @@ under the License.
 
       <li>
         ShellBasedUnixGroupsNetgroupMapping and ShellBasedUnixGroupsMapping Hadoop group mapping
-        providers are not supported in Impala group delegation.
+        providers are not supported in Impala group delegation by default. To enable them, flag
+        <codeph>enable_shell_based_groups_mapping</codeph> needs to be enabled.
       </li>
 
       <li>
diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index b0614ae..898ae90 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -103,6 +103,10 @@ public class BackendConfig {
     return !Strings.isNullOrEmpty(backendCfg_.authorized_proxy_group_config);
   }
 
+  public boolean isShellBasedGroupsMappingEnabled() {
+    return backendCfg_.enable_shell_based_groups_mapping_support;
+  }
+
   public boolean disableCatalogDataOpsDebugOnly() {
     return backendCfg_.disable_catalog_data_ops_debug_only;
   }
diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
index f8c7f2c..663673e 100644
--- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
+++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
@@ -823,7 +823,8 @@ public class JniFrontend {
     output.append(checkLogFilePermission());
     output.append(checkFileSystem(CONF));
     output.append(checkShortCircuitRead(CONF));
-    if (BackendConfig.INSTANCE.isAuthorizedProxyGroupEnabled()) {
+    if (BackendConfig.INSTANCE.isAuthorizedProxyGroupEnabled() &&
+        !BackendConfig.INSTANCE.isShellBasedGroupsMappingEnabled()) {
       output.append(checkGroupsMappingProvider(CONF));
     }
     return output.toString();