You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2017/05/26 15:00:10 UTC

[2/3] incubator-impala git commit: IMPALA-5325: Do not update totalHdfsBytes_/numHdfsFiles_ on Catalogd

IMPALA-5325: Do not update totalHdfsBytes_/numHdfsFiles_ on Catalogd

We need not account these hdfs table metrics on the Catalog
server as they are eventually calculated again on the Impalads
while unpacking the corresponding thrift table.

This fix can potentially affect the frontend tests that directly load
the Catalog's version of HdfsTable without the loadFromThrift() call
paths that do the accounting. That is fixed by adding a separate call
that computes these values and is called from
ImpaladTestCatalog.getTable().

Testing: Enough tests already cover these code paths like show stats/
table or partition refresh tests etc. No new tests are added.

Change-Id: I03cc6f9e9b2c03cafb87029ea0802dfdb2745be1
Reviewed-on: http://gerrit.cloudera.org:8080/6970
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5d59d854
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5d59d854
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5d59d854

Branch: refs/heads/master
Commit: 5d59d854d0274c89debb6f654f5cb3a0d2ef48e5
Parents: 4e54979
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Tue May 23 17:45:20 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri May 26 09:52:46 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/catalog/HdfsTable.java    | 33 +++++++++++++-------
 .../impala/testutil/ImpaladTestCatalog.java     |  7 ++++-
 2 files changed, 27 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d59d854/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 23a8c96..d8d8d4a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -165,10 +165,12 @@ public class HdfsTable extends Table {
 
   private HdfsPartitionLocationCompressor partitionLocationCompressor_;
 
-  // Total number of Hdfs files in this table. Set in load().
+  // Total number of Hdfs files in this table. Accounted only in the Impalad catalog
+  // cache. Set to -1 on Catalogd.
   private long numHdfsFiles_;
 
-  // Sum of sizes of all Hdfs files in this table. Set in load().
+  // Sum of sizes of all Hdfs files in this table. Accounted only in the Impalad
+  // catalog cache. Set to -1 on Catalogd.
   private long totalHdfsBytes_;
 
   // True iff the table's partitions are located on more than one filesystem.
@@ -231,6 +233,21 @@ public class HdfsTable extends Table {
   }
 
   /**
+   * Updates numHdfsFiles_ and totalHdfsBytes_ based on the partition information.
+   * This is used only for the frontend tests that do not spawn a separate Catalog
+   * instance.
+   */
+  public void computeHdfsStatsForTesting() {
+    Preconditions.checkState(numHdfsFiles_ == -1 && totalHdfsBytes_ == -1);
+    numHdfsFiles_ = 0;
+    totalHdfsBytes_ = 0;
+    for (HdfsPartition partition: partitionMap_.values()) {
+      numHdfsFiles_ += partition.getNumFileDescriptors();
+      totalHdfsBytes_ += partition.getSize();
+    }
+  }
+
+  /**
    * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose
    * location is under the given 'dirPath'. It involves the following steps:
    * - Clear the current block metadata of the partitions.
@@ -305,8 +322,6 @@ public class HdfsTable extends Table {
         // Update the partitions' metadata that this file belongs to.
         for (HdfsPartition partition: partitions) {
           partition.getFileDescriptors().add(fd);
-          numHdfsFiles_++;
-          totalHdfsBytes_ += fd.getFileLength();
         }
       }
 
@@ -369,8 +384,6 @@ public class HdfsTable extends Table {
       // Update the partitions' metadata that this file belongs to.
       for (HdfsPartition partition: partitions) {
         partition.getFileDescriptors().add(fd);
-        numHdfsFiles_++;
-        totalHdfsBytes_ += fd.getFileLength();
       }
     }
   }
@@ -726,8 +739,6 @@ public class HdfsTable extends Table {
         newPartSizeBytes += fileStatus.getLen();
       }
       partition.setFileDescriptors(newFileDescs);
-      numHdfsFiles_ += newFileDescs.size();
-      totalHdfsBytes_ += newPartSizeBytes;
     } catch(IOException e) {
       throw new CatalogException("Error loading block metadata for partition " +
           partition.toString(), e);
@@ -1072,6 +1083,8 @@ public class HdfsTable extends Table {
       }
       if (loadTableSchema) setAvroSchema(client, msTbl);
       updateStatsFromHmsTable(msTbl);
+      numHdfsFiles_ = -1;
+      totalHdfsBytes_ = -1;
     } catch (TableLoadingException e) {
       throw e;
     } catch (Exception e) {
@@ -1477,10 +1490,6 @@ public class HdfsTable extends Table {
     Path partDirPath = new Path(storageDescriptor.getLocation());
     FileSystem fs = partDirPath.getFileSystem(CONF);
     if (!fs.exists(partDirPath)) return;
-
-    numHdfsFiles_ -= partition.getNumFileDescriptors();
-    totalHdfsBytes_ -= partition.getSize();
-    Preconditions.checkState(numHdfsFiles_ >= 0 && totalHdfsBytes_ >= 0);
     refreshFileMetadata(partition);
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5d59d854/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
index b3167ce..fdc64e6 100644
--- a/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
+++ b/fe/src/test/java/org/apache/impala/testutil/ImpaladTestCatalog.java
@@ -24,6 +24,7 @@ import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.HdfsCachePool;
 import org.apache.impala.catalog.ImpaladCatalog;
 import org.apache.impala.catalog.Table;
+import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.util.PatternMatcher;
 import com.google.common.base.Preconditions;
 
@@ -86,6 +87,10 @@ public class ImpaladTestCatalog extends ImpaladCatalog {
     Db db = getDb(dbName);
     Preconditions.checkNotNull(db);
     db.addTable(newTbl);
-    return super.getTable(dbName, tableName);
+    Table resultTable = super.getTable(dbName, tableName);
+    if (resultTable instanceof HdfsTable) {
+      ((HdfsTable) resultTable).computeHdfsStatsForTesting();
+    }
+    return resultTable;
   }
 }