You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2017/03/18 00:28:51 UTC
[2/4] incubator-impala git commit: IMPALA-5042: Use a HashSet instead of ArrayList for O(1) look ups

IMPALA-5042: Use a HashSet instead of ArrayList for O(1) look ups

Testing: Ran the metadata perf benchmark. No regressions and
found good gains in the following cases.

100K-PARTITIONS-1M-FILES-CUSTOM-05-QUERY-AFTER-INVALIDATE ~81.3%
100K-PARTITIONS-1M-FILES-CUSTOM-07-REFRESH ~81.3%
100K-PARTITIONS-1M-FILES-CUSTOM-10-REFRESH-AFTER-ADD-PARTITION ~81.7

Change-Id: Ia9eccfe853583a0b78a5280f1b9525ce97f88cb5
Reviewed-on: http://gerrit.cloudera.org:8080/6319
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6dff9066
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6dff9066
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6dff9066

Branch: refs/heads/master
Commit: 6dff90661c07241794de5c24f4f27e7712dca82c
Parents: 6951030
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Wed Mar 8 11:10:27 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Mar 17 10:20:57 2017 +0000

----------------------------------------------------------------------
 fe/src/main/java/org/apache/impala/catalog/HdfsTable.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6dff9066/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 3e51cba..30241b0 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -718,7 +718,7 @@ public class HdfsTable extends Table {
     // separately.
     // TODO: We can still do some advanced optimization by grouping all the partition
     // directories under the same ancestor path up the tree.
-    List<Path> dirsToLoad = Lists.newArrayList(tblLocation);
+    Set<Path> dirsToLoad = Sets.newHashSet(tblLocation);
 
     if (msTbl.getPartitionKeysSize() == 0) {
       Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());
@@ -839,7 +839,7 @@ public class HdfsTable extends Table {
    * and filtering only the paths from 'partsByPath'. Also loads the disk IDs
    * corresponding to these block locations.
    */
-  private void loadMetadataAndDiskIds(List<Path> locations,
+  private void loadMetadataAndDiskIds(Set<Path> locations,
       HashMap<Path, List<HdfsPartition>> partsByPath) {
     LOG.info(String.format(
         "Loading file and block metadata for %s partitions from %s paths: %s",