You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/07/03 00:40:01 UTC

[impala] branch master updated: IMPALA-6876: Fix possible memory leak in CatalogTableMetrics after reset metadata.

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new d52edc5e4 IMPALA-6876: Fix possible memory leak in CatalogTableMetrics after reset metadata.
d52edc5e4 is described below

commit d52edc5e425b50f0b59ec1f8cc10698eb8f4282e
Author: Xiang Yang <yx...@126.com>
AuthorDate: Sun Jun 18 12:44:03 2023 +0000

    IMPALA-6876: Fix possible memory leak in CatalogTableMetrics after reset metadata.
    
    The CatalogTableMetrics maintains a cache of 'org.apache.impala.catalog.
    Table' which may obtain references to CatalogServiceCatalog 'dbCache_'
    field's old value after 'invalidate metadata' triggered, in which case
    the old db cache can't be recycled by GC, and causing a possible memory
    leak in catalogd.
    
    The solution is simpling the cache element's type to a new class
    'TableMetric' which only contains necessary value fields and doesn't
    contains complex reference such as 'org.apache.impala.catalog.Db'.
    
    Testing:
     - Manually test:
       1. trigger CatalogTableMetrics's cache by executing general query on
          some tables and other SQL such as 'create table like' SQL.
       2. execute 'invalidate metadata' to trigger catalog version upgrade.
       3. dump catalogd's JVM memory image by command 'jmap -dump:live,
          format=b,file=<path> <catalog_pid>'.
       4. count the 'org.apache.impala.catalog.Db' class's object number
          by analyze the dump file using Eclipse Memory Analyzer, and check
          whether the Db's object number match the actual.
     - Add FE test 'CatalogTableMetricsTest'.
    
    Change-Id: Ib91635cc40878cbb76b71ee7d81418ea753892c3
    Reviewed-on: http://gerrit.cloudera.org:8080/20096
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/catalog/CatalogServiceCatalog.java      |  46 ++++---
 .../catalog/monitor/CatalogTableMetrics.java       | 149 ++++++++++++++-------
 .../catalog/monitor/TableLoadingTimeHistogram.java |  40 ++++++
 .../main/java/org/apache/impala/common/Pair.java   |   7 +
 .../java/org/apache/impala/util/TopNCache.java     |  10 +-
 .../catalog/monitor/CatalogTableMetricsTest.java   |  56 ++++++++
 6 files changed, 240 insertions(+), 68 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 1e7ab5ac5..9b404dc98 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -17,6 +17,11 @@
 
 package org.apache.impala.catalog;
 
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P100;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P50;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P75;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P95;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P99;
 import static org.apache.impala.thrift.TCatalogObjectType.HDFS_PARTITION;
 import static org.apache.impala.thrift.TCatalogObjectType.TABLE;
 
@@ -72,6 +77,7 @@ import org.apache.impala.catalog.monitor.CatalogMonitor;
 import org.apache.impala.catalog.monitor.CatalogTableMetrics;
 import org.apache.impala.catalog.metastore.HmsApiNameEnum;
 import org.apache.impala.catalog.metastore.ICatalogMetastoreServer;
+import org.apache.impala.catalog.monitor.TableLoadingTimeHistogram;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.Pair;
@@ -2382,6 +2388,9 @@ public class CatalogServiceCatalog extends Catalog {
     versionLock_.writeLock().lock();
     try {
       Table removedTable = parentDb.removeTable(tblName);
+      if (removedTable != null && !removedTable.isStoredInImpaladCatalogCache()) {
+        CatalogMonitor.INSTANCE.getCatalogTableMetrics().removeTable(removedTable);
+      }
       if (removedTable != null) {
         removedTable.setCatalogVersion(incrementAndGetCatalogVersion());
         deleteLog_.addRemovedObject(removedTable.toMinimalTCatalogObject());
@@ -3360,39 +3369,42 @@ public class CatalogServiceCatalog extends Catalog {
     usage.setFrequently_accessed_tables(new ArrayList<>());
     usage.setHigh_file_count_tables(new ArrayList<>());
     usage.setLong_metadata_loading_tables(new ArrayList<>());
-    for (Table largeTable : catalogTableMetrics.getLargestTables()) {
+    for (Pair<TTableName, Long> largeTable : catalogTableMetrics.getLargestTables()) {
       TTableUsageMetrics tableUsageMetrics =
-          new TTableUsageMetrics(largeTable.getTableName().toThrift());
-      tableUsageMetrics.setMemory_estimate_bytes(largeTable.getEstimatedMetadataSize());
+          new TTableUsageMetrics(largeTable.getFirst());
+      tableUsageMetrics.setMemory_estimate_bytes(largeTable.getSecond());
       usage.addToLarge_tables(tableUsageMetrics);
     }
-    for (Table frequentTable : catalogTableMetrics.getFrequentlyAccessedTables()) {
+    for (Pair<TTableName, Long> frequentTable :
+            catalogTableMetrics.getFrequentlyAccessedTables()) {
       TTableUsageMetrics tableUsageMetrics =
-          new TTableUsageMetrics(frequentTable.getTableName().toThrift());
-      tableUsageMetrics.setNum_metadata_operations(frequentTable.getMetadataOpsCount());
+          new TTableUsageMetrics(frequentTable.getFirst());
+      tableUsageMetrics.setNum_metadata_operations(frequentTable.getSecond());
       usage.addToFrequently_accessed_tables(tableUsageMetrics);
     }
-    for (Table mostFilesTable : catalogTableMetrics.getHighFileCountTables()) {
+    for (Pair<TTableName, Long> mostFilesTable :
+            catalogTableMetrics.getHighFileCountTables()) {
       TTableUsageMetrics tableUsageMetrics =
-          new TTableUsageMetrics(mostFilesTable.getTableName().toThrift());
-      tableUsageMetrics.setNum_files(mostFilesTable.getNumFiles());
+          new TTableUsageMetrics(mostFilesTable.getFirst());
+      tableUsageMetrics.setNum_files(mostFilesTable.getSecond());
       usage.addToHigh_file_count_tables(tableUsageMetrics);
     }
-    for (Table longestLoadingTable : catalogTableMetrics.getLongMetadataLoadingTables()) {
+    for (Pair<TTableName, TableLoadingTimeHistogram> longestLoadingTable :
+            catalogTableMetrics.getLongMetadataLoadingTables()) {
       TTableUsageMetrics tableUsageMetrics =
-          new TTableUsageMetrics(longestLoadingTable.getTableName().toThrift());
+          new TTableUsageMetrics(longestLoadingTable.getFirst());
       tableUsageMetrics.setMedian_table_loading_ns(
-          longestLoadingTable.getMedianTableLoadingTime());
+          longestLoadingTable.getSecond().getQuantile(P50));
       tableUsageMetrics.setMax_table_loading_ns(
-          longestLoadingTable.getMaxTableLoadingTime());
+          longestLoadingTable.getSecond().getQuantile(P100));
       tableUsageMetrics.setP75_loading_time_ns(
-          longestLoadingTable.get75TableLoadingTime());
+          longestLoadingTable.getSecond().getQuantile(P75));
       tableUsageMetrics.setP95_loading_time_ns(
-          longestLoadingTable.get95TableLoadingTime());
+          longestLoadingTable.getSecond().getQuantile(P95));
       tableUsageMetrics.setP99_loading_time_ns(
-          longestLoadingTable.get99TableLoadingTime());
+          longestLoadingTable.getSecond().getQuantile(P99));
       tableUsageMetrics.setNum_table_loading(
-          longestLoadingTable.getTableLoadingCounts());
+          longestLoadingTable.getSecond().getCount());
       usage.addToLong_metadata_loading_tables(tableUsageMetrics);
     }
     return usage;
diff --git a/fe/src/main/java/org/apache/impala/catalog/monitor/CatalogTableMetrics.java b/fe/src/main/java/org/apache/impala/catalog/monitor/CatalogTableMetrics.java
index 077886fe0..eab7ba0ca 100644
--- a/fe/src/main/java/org/apache/impala/catalog/monitor/CatalogTableMetrics.java
+++ b/fe/src/main/java/org/apache/impala/catalog/monitor/CatalogTableMetrics.java
@@ -17,31 +17,40 @@
 
 package org.apache.impala.catalog.monitor;
 
-import com.google.common.base.Function;
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.impala.catalog.Table;
+import org.apache.impala.common.Pair;
+import org.apache.impala.thrift.TTableName;
 import org.apache.impala.util.TopNCache;
 
 import java.util.List;
 
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P100;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P50;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P75;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P95;
+import static org.apache.impala.catalog.monitor.TableLoadingTimeHistogram.Quantile.P99;
+
 /**
  * Class that monitors catalog table usage. Currently, it tracks,
- *  - the most frequently accessed tables (in terms of number of metadata operations)
- *  - the tables with the highest (estimated) memory requirements
- *  - the tables with the highest number of files
- *  - the tables with the longest table metadata loading time
- *
- *  This class is thread-safe.
+ * - the most frequently accessed tables (in terms of number of metadata operations)
+ * - the tables with the highest (estimated) memory requirements
+ * - the tables with the highest number of files
+ * - the tables with the longest table metadata loading time
+ * <p>
+ * This class is thread-safe.
  */
 public final class CatalogTableMetrics {
   public final static CatalogTableMetrics INSTANCE = new CatalogTableMetrics();
 
-  private final TopNCache<Table, Long> frequentlyAccessedTables_;
+  private final TopNCache<TableMetric<Long>, Long> frequentlyAccessedTables_;
 
-  private final TopNCache<Table, Long> largestTables_;
+  private final TopNCache<TableMetric<Long>, Long> largestTables_;
 
-  private final TopNCache<Table, Long> highFileCountTables_;
+  private final TopNCache<TableMetric<Long>, Long> highFileCountTables_;
 
-  private final TopNCache<Table, Long> longMetadataLoadingTables_;
+  private final TopNCache<TableMetric<TableLoadingTimeHistogram>, Long>
+      longMetadataLoadingTables_;
 
   private CatalogTableMetrics() {
     final int num_tables_tracked = Integer.getInteger(
@@ -49,68 +58,108 @@ public final class CatalogTableMetrics {
     final int num_loading_time_tables_tracked = Integer.getInteger(
         "org.apache.impala.catalog.CatalogUsageMonitor.NUM_LOADING_TIME_TABLES_TRACKED",
         100);
-    frequentlyAccessedTables_ = new TopNCache<Table, Long>(new Function<Table, Long>() {
-      @Override
-      public Long apply(Table tbl) {
-        return tbl.getMetadataOpsCount();
-      }
-    }, num_tables_tracked, true);
-
-    largestTables_ = new TopNCache<Table, Long>(new Function<Table, Long>() {
-      @Override
-      public Long apply(Table tbl) {
-        return tbl.getEstimatedMetadataSize();
-      }
-    }, num_tables_tracked, false);
-
-    highFileCountTables_ = new TopNCache<Table, Long>(new Function<Table, Long>() {
-      @Override
-      public Long apply(Table tbl) {
-        return tbl.getNumFiles();
-      }
-    }, num_tables_tracked, false);
+    frequentlyAccessedTables_ =
+        new TopNCache<>(TableMetric::getSecond, num_tables_tracked, true);
+
+    largestTables_ = new TopNCache<>(TableMetric::getSecond, num_tables_tracked, false);
+
+    highFileCountTables_ =
+        new TopNCache<>(TableMetric::getSecond, num_tables_tracked, false);
 
     // sort by maximum loading time by default
-    longMetadataLoadingTables_ = new TopNCache<Table, Long>(new Function<Table, Long>() {
-      @Override
-      public Long apply(Table tbl) {
-        return tbl.getMaxTableLoadingTime();
-      }
-    }, num_loading_time_tables_tracked, false);
+    longMetadataLoadingTables_ = new TopNCache<>(metric
+        -> metric.getSecond().getQuantile(P100),
+        num_loading_time_tables_tracked, false);
   }
 
   public void updateFrequentlyAccessedTables(Table tbl) {
-    frequentlyAccessedTables_.putOrUpdate(tbl);
+    TableMetric<Long> metric = TableMetric.of(tbl, tbl.getMetadataOpsCount());
+    frequentlyAccessedTables_.putOrUpdate(metric);
   }
 
-  public void updateLargestTables(Table tbl) { largestTables_.putOrUpdate(tbl); }
+  public void updateLargestTables(Table tbl) {
+    TableMetric<Long> metric = TableMetric.of(tbl, tbl.getEstimatedMetadataSize());
+    largestTables_.putOrUpdate(metric);
+  }
 
   public void updateHighFileCountTables(Table tbl) {
-    highFileCountTables_.putOrUpdate(tbl);
+    TableMetric<Long> metric = TableMetric.of(tbl, tbl.getNumFiles());
+    highFileCountTables_.putOrUpdate(metric);
   }
 
   public void updateLongMetadataLoadingTables(Table tbl) {
-    longMetadataLoadingTables_.putOrUpdate(tbl);
+    TableLoadingTimeHistogram histogram = new TableLoadingTimeHistogram();
+    histogram.setQuantile(P50, tbl.getMedianTableLoadingTime());
+    histogram.setQuantile(P75, tbl.get75TableLoadingTime());
+    histogram.setQuantile(P95, tbl.get95TableLoadingTime());
+    histogram.setQuantile(P99, tbl.get99TableLoadingTime());
+    histogram.setQuantile(P100, tbl.getMaxTableLoadingTime());
+    histogram.setCount(tbl.getTableLoadingCounts());
+    TableMetric<TableLoadingTimeHistogram> metric = TableMetric.of(tbl, histogram);
+    longMetadataLoadingTables_.putOrUpdate(metric);
   }
 
+  @SuppressWarnings({"unchecked", "rawtypes"})
   public void removeTable(Table tbl) {
-    frequentlyAccessedTables_.remove(tbl);
-    largestTables_.remove(tbl);
-    highFileCountTables_.remove(tbl);
-    longMetadataLoadingTables_.remove(tbl);
+    TableMetric metric = TableMetric.emptyOf(tbl);
+    frequentlyAccessedTables_.remove(metric);
+    largestTables_.remove(metric);
+    highFileCountTables_.remove(metric);
+    longMetadataLoadingTables_.remove(metric);
   }
 
-  public List<Table> getFrequentlyAccessedTables() {
+  /**
+   * Removes all tables from the underlying TopNCache.
+   */
+  @VisibleForTesting
+  synchronized void removeAllTables() {
+    frequentlyAccessedTables_.removeAll();
+    largestTables_.removeAll();
+    highFileCountTables_.removeAll();
+    longMetadataLoadingTables_.removeAll();
+  }
+
+  public List<? extends Pair<TTableName, Long>> getFrequentlyAccessedTables() {
     return frequentlyAccessedTables_.listEntries();
   }
 
-  public List<Table> getLargestTables() { return largestTables_.listEntries(); }
+  public List<? extends Pair<TTableName, Long>> getLargestTables() {
+    return largestTables_.listEntries();
+  }
 
-  public List<Table> getHighFileCountTables() {
+  public List<? extends Pair<TTableName, Long>> getHighFileCountTables() {
     return highFileCountTables_.listEntries();
   }
 
-  public List<Table> getLongMetadataLoadingTables() {
+  public List<? extends Pair<TTableName, TableLoadingTimeHistogram>>
+      getLongMetadataLoadingTables() {
     return longMetadataLoadingTables_.listEntries();
   }
-}
\ No newline at end of file
+
+  /**
+   * a data class to isolate the implicit refernce to
+   * {@link org.apache.impala.catalog.Db}, see IMPALA-6876.
+   */
+  static class TableMetric<T> extends Pair<TTableName, T> {
+    TableMetric(TTableName tableName, T value) { super(tableName, value); }
+
+    static <T> TableMetric<T> of(Table tbl, T value) {
+      TTableName tTableName = tbl.getTableName().toThrift();
+      return new TableMetric<>(tTableName, value);
+    }
+
+    static TableMetric<?> emptyOf(Table tbl) { return of(tbl, null); }
+
+    @Override
+    public int hashCode() {
+      return first.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == null) return false;
+      if (!(obj instanceof TableMetric)) return false;
+      return first.equals(((TableMetric<?>) obj).first);
+    }
+  }
+}
diff --git a/fe/src/main/java/org/apache/impala/catalog/monitor/TableLoadingTimeHistogram.java b/fe/src/main/java/org/apache/impala/catalog/monitor/TableLoadingTimeHistogram.java
new file mode 100644
index 000000000..98be9ec97
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/catalog/monitor/TableLoadingTimeHistogram.java
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.catalog.monitor;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * a Histogram to count table loading time.
+ */
+public class TableLoadingTimeHistogram {
+  public enum Quantile { P50, P75, P95, P99, P100 }
+
+  private long count;
+
+  private final Map<Quantile, Long> map = new HashMap<>(8);
+
+  void setQuantile(Quantile quantile, Long val) { map.put(quantile, val); }
+
+  public Long getQuantile(Quantile quantile) { return map.get(quantile); }
+
+  void setCount(long count) { this.count = count; }
+
+  public long getCount() { return this.count; }
+}
diff --git a/fe/src/main/java/org/apache/impala/common/Pair.java b/fe/src/main/java/org/apache/impala/common/Pair.java
index aa2f104e9..2692b2800 100644
--- a/fe/src/main/java/org/apache/impala/common/Pair.java
+++ b/fe/src/main/java/org/apache/impala/common/Pair.java
@@ -29,6 +29,13 @@ public class Pair<F, S> {
     this.second = second;
   }
 
+  public F getFirst() {
+    return first;
+  }
+  public S getSecond() {
+    return second;
+  }
+
   @Override
   /**
    * A pair is equal if both parts are equal().
diff --git a/fe/src/main/java/org/apache/impala/util/TopNCache.java b/fe/src/main/java/org/apache/impala/util/TopNCache.java
index 9f6b97215..ef1d64f2d 100644
--- a/fe/src/main/java/org/apache/impala/util/TopNCache.java
+++ b/fe/src/main/java/org/apache/impala/util/TopNCache.java
@@ -21,6 +21,7 @@ import java.util.Comparator;
 import java.util.List;
 import java.util.PriorityQueue;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -100,9 +101,16 @@ public final class TopNCache<T, R extends Long>  {
    */
   public synchronized void remove(T item) { heap_.remove(item); }
 
+  /**
+   * Removes all items from the cache.
+   */
+  @VisibleForTesting
+  public synchronized void removeAll() {
+    heap_.clear();
+  }
+
   /**
    * Returns the list of all the items in the cache.
    */
   public synchronized List<T> listEntries() { return ImmutableList.copyOf(heap_); }
 }
-
diff --git a/fe/src/test/java/org/apache/impala/catalog/monitor/CatalogTableMetricsTest.java b/fe/src/test/java/org/apache/impala/catalog/monitor/CatalogTableMetricsTest.java
new file mode 100644
index 000000000..d6d9515d1
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/catalog/monitor/CatalogTableMetricsTest.java
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.catalog.monitor;
+
+import org.apache.impala.catalog.DatabaseNotFoundException;
+import org.apache.impala.catalog.Table;
+import org.apache.impala.testutil.CatalogServiceTestCatalog;
+import org.junit.Test;
+
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static org.apache.impala.catalog.monitor.CatalogTableMetrics.INSTANCE;
+import static org.junit.Assert.assertEquals;
+
+public class CatalogTableMetricsTest {
+  private final CatalogServiceTestCatalog catalog_ = CatalogServiceTestCatalog.create();
+
+  @Test
+  public void testAddRemoveInverse() throws DatabaseNotFoundException {
+    INSTANCE.removeAllTables();
+    Table table = catalog_.getTable("functional", "alltypes");
+    assertAddRemoveInverse(table, INSTANCE::updateFrequentlyAccessedTables,
+        () -> INSTANCE.getFrequentlyAccessedTables().size());
+    assertAddRemoveInverse(
+        table, INSTANCE::updateLargestTables, () -> INSTANCE.getLargestTables().size());
+    assertAddRemoveInverse(table, INSTANCE::updateHighFileCountTables,
+        () -> INSTANCE.getHighFileCountTables().size());
+    assertAddRemoveInverse(table, INSTANCE::updateLongMetadataLoadingTables,
+        () -> INSTANCE.getLongMetadataLoadingTables().size());
+  }
+
+  private void assertAddRemoveInverse(
+      Table table, Consumer<Table> metricUpdater, Supplier<Integer> tableCounter) {
+    assertEquals(0, tableCounter.get().intValue());
+    metricUpdater.accept(table);
+    assertEquals(1, tableCounter.get().intValue());
+    INSTANCE.removeTable(table);
+    assertEquals(0, tableCounter.get().intValue());
+  }
+}
\ No newline at end of file