You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2021/11/09 12:23:58 UTC

[hive] branch master updated: HIVE-24590: Operation logging still leaks log4j appenders (Stamatis Zampetakis, reviewed by David Mollitor)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 2dcd72f  HIVE-24590: Operation logging still leaks log4j appenders (Stamatis Zampetakis, reviewed by David Mollitor)
2dcd72f is described below

commit 2dcd72f22d0edccfb5a21b56b7c9a60f5371837e
Author: Stamatis Zampetakis <za...@gmail.com>
AuthorDate: Mon Jan 11 16:53:50 2021 +0100

    HIVE-24590: Operation logging still leaks log4j appenders (Stamatis Zampetakis, reviewed by David Mollitor)
    
    1. Handle dynamic routing appenders via Log4j's IdlePurgePolicy
    2. Remove in-house management of appenders.
    3. Add Hive property to control lifespan of inactive operation log
    appenders.
    
    The commit solve (file) descriptor leak described in the JIRA issue by
    delegating the burden of managing appenders to Log4j.
    
    Closes #2432
---
 .../org/apache/hadoop/hive/common/LogUtils.java    | 32 ----------------------
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  5 ++++
 data/conf/hive-site.xml                            |  4 +++
 .../cli/operation/TestOperationLoggingLayout.java  |  3 ++
 .../hadoop/hive/ql/log/LogDivertAppender.java      | 12 +++++++-
 .../hive/ql/log/LogDivertAppenderForTest.java      | 12 +++++++-
 .../hive/service/cli/operation/Operation.java      |  6 ----
 7 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/LogUtils.java b/common/src/java/org/apache/hadoop/hive/common/LogUtils.java
index 3857e2d..89c991d 100644
--- a/common/src/java/org/apache/hadoop/hive/common/LogUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/LogUtils.java
@@ -19,10 +19,7 @@
 package org.apache.hadoop.hive.common;
 
 import java.io.File;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
 import java.net.URL;
-import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -31,11 +28,8 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.core.Appender;
 import org.apache.logging.log4j.core.appender.FileAppender;
 import org.apache.logging.log4j.core.appender.RollingFileAppender;
-import org.apache.logging.log4j.core.LoggerContext;
 import org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender;
-import org.apache.logging.log4j.core.appender.routing.RoutingAppender;
 import org.apache.logging.log4j.core.config.Configurator;
-import org.apache.logging.log4j.core.config.LoggerConfig;
 import org.apache.logging.log4j.core.impl.Log4jContextFactory;
 import org.apache.logging.log4j.spi.DefaultThreadContextMap;
 import org.slf4j.Logger;
@@ -265,30 +259,4 @@ public class LogUtils {
     return logFilePath;
   }
 
-  /**
-   * Stop the subordinate appender for the operation log so it will not leak a file descriptor.
-   * @param routingAppenderName the name of the RoutingAppender
-   * @param queryId the id of the query that is closing
-   */
-  public static void stopQueryAppender(String routingAppenderName, String queryId) {
-    LoggerContext context = (LoggerContext) LogManager.getContext(false);
-    org.apache.logging.log4j.core.config.Configuration configuration = context.getConfiguration();
-    LoggerConfig loggerConfig = configuration.getRootLogger();
-    Map<String, Appender> appenders = loggerConfig.getAppenders();
-    RoutingAppender routingAppender = (RoutingAppender) appenders.get(routingAppenderName);
-    // routingAppender can be null if it has not been registered
-    if (routingAppender != null) {
-      // The appender is configured to use ${ctx:queryId} by registerRoutingAppender()
-      try {
-        Class<? extends RoutingAppender> clazz = routingAppender.getClass();
-        Method method = clazz.getDeclaredMethod("deleteAppender", String.class);
-        method.setAccessible(true);
-        method.invoke(routingAppender, queryId);
-      } catch (NoSuchMethodException | SecurityException | IllegalAccessException |
-          IllegalArgumentException | InvocationTargetException e) {
-        l4j.warn("Unable to close the operation log appender for query id " + queryId, e);
-      }
-    }
-  }
-
 }
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 35f4b93..47fb831 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3920,6 +3920,11 @@ public class HiveConf extends Configuration {
     HIVE_SERVER2_OPERATION_LOG_CLEANUP_DELAY("hive.server2.operation.log.cleanup.delay", "300s",
       new TimeValidator(TimeUnit.SECONDS), "When a query is cancelled (via kill query, query timeout or triggers),\n" +
       " operation logs gets cleaned up after this delay"),
+    HIVE_SERVER2_OPERATION_LOG_PURGEPOLICY_TIMETOLIVE("hive.server2.operation.log.purgePolicy.timeToLive",
+        "60s", new TimeValidator(TimeUnit.SECONDS), 
+        "Number of seconds the appender, which has been dynamically created by Log4J framework for the " + 
+        "operation log, should survive without having any events sent to it. For more details, check " + 
+        "Log4J's IdlePurgePolicy."),
     HIVE_SERVER2_HISTORIC_OPERATION_LOG_ENABLED("hive.server2.historic.operation.log.enabled", false,
         "Keep the operation log for some time until the operation's query info is evicted from QueryInfoCache."),
     HIVE_SERVER2_HISTORIC_OPERATION_LOG_CHECK_INTERVAL("hive.server2.historic.operation.log.check.interval", "15m",
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 0f21062..a47ba97 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -400,4 +400,8 @@
   </description>
 </property>
 
+<property>
+  <name>hive.server2.operation.log.purgePolicy.timeToLive</name>
+  <value>5s</value>
+</property>
 </configuration>
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingLayout.java b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingLayout.java
index d4d53d4..e2a7d33 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingLayout.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingLayout.java
@@ -152,6 +152,9 @@ public class TestOperationLoggingLayout {
     checkAppenderState("before operation close ", LogDivertAppender.QUERY_ROUTING_APPENDER, queryId, expectedStopped);
     checkAppenderState("before operation close ", LogDivertAppenderForTest.TEST_QUERY_ROUTING_APPENDER, queryId, expectedStopped);
     client.closeOperation(operationHandle);
+    checkAppenderState("after operation close ", LogDivertAppender.QUERY_ROUTING_APPENDER, queryId, expectedStopped);
+    checkAppenderState("after operation close ", LogDivertAppenderForTest.TEST_QUERY_ROUTING_APPENDER, queryId, expectedStopped);
+    Thread.sleep(8000);
     checkAppenderState("after operation close ", LogDivertAppender.QUERY_ROUTING_APPENDER, queryId, true);
     checkAppenderState("after operation close ", LogDivertAppenderForTest.TEST_QUERY_ROUTING_APPENDER, queryId, true);
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java b/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java
index 5787e2b..baf6f05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.log;
 
+import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.hive.common.LogUtils;
@@ -29,6 +30,8 @@ import org.apache.hadoop.hive.ql.stats.BasicStatsTask;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.core.LogEvent;
 import org.apache.logging.log4j.core.LoggerContext;
+import org.apache.logging.log4j.core.appender.routing.IdlePurgePolicy;
+import org.apache.logging.log4j.core.appender.routing.PurgePolicy;
 import org.apache.logging.log4j.core.appender.routing.Route;
 import org.apache.logging.log4j.core.appender.routing.Routes;
 import org.apache.logging.log4j.core.appender.routing.RoutingAppender;
@@ -241,12 +244,19 @@ public class LogDivertAppender {
     LoggerContext context = (LoggerContext) LogManager.getContext(false);
     Configuration configuration = context.getConfiguration();
 
+    String timeToLive = String.valueOf(HiveConf
+        .getTimeVar(conf, HiveConf.ConfVars.HIVE_SERVER2_OPERATION_LOG_PURGEPOLICY_TIMETOLIVE, TimeUnit.SECONDS));
+    PurgePolicy purgePolicy = IdlePurgePolicy.createPurgePolicy(timeToLive, null, "SECONDS", configuration);
+    // Hack: due to the (non-standard) way that log4j configuration is extended to introduce the routing appender
+    // the life-cycle methods are not called as expected leading to initialization problems (such as the scheduler)
+    configuration.getScheduler().incrementScheduledItems();
+
     RoutingAppender routingAppender = RoutingAppender.createAppender(QUERY_ROUTING_APPENDER,
         "true",
         routes,
         configuration,
         null,
-        null,
+        purgePolicy,
         null);
 
     LoggerConfig loggerConfig = configuration.getRootLogger();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java b/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java
index 8078d90..fd26ac8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java
@@ -22,6 +22,8 @@ import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.core.LogEvent;
 import org.apache.logging.log4j.core.LoggerContext;
+import org.apache.logging.log4j.core.appender.routing.IdlePurgePolicy;
+import org.apache.logging.log4j.core.appender.routing.PurgePolicy;
 import org.apache.logging.log4j.core.appender.routing.Route;
 import org.apache.logging.log4j.core.appender.routing.Routes;
 import org.apache.logging.log4j.core.appender.routing.RoutingAppender;
@@ -35,6 +37,8 @@ import org.apache.logging.log4j.core.config.plugins.util.PluginType;
 import org.apache.logging.log4j.core.filter.AbstractFilter;
 import org.apache.logging.log4j.core.layout.PatternLayout;
 
+import java.util.concurrent.TimeUnit;
+
 /**
  * Divert appender to redirect and filter test operation logs to match the output of the original
  * CLI qtest results.
@@ -172,6 +176,12 @@ public final class LogDivertAppenderForTest {
 
     LoggerContext context = (LoggerContext)LogManager.getContext(false);
     Configuration configuration = context.getConfiguration();
+    String timeToLive = String.valueOf(HiveConf
+        .getTimeVar(conf, HiveConf.ConfVars.HIVE_SERVER2_OPERATION_LOG_PURGEPOLICY_TIMETOLIVE, TimeUnit.SECONDS));
+    PurgePolicy purgePolicy = IdlePurgePolicy.createPurgePolicy(timeToLive, null, "SECONDS", configuration);
+    // Hack: due to the (non-standard) way that log4j configuration is extended to introduce the routing appender
+    // the life-cycle methods are not called as expected leading to initialization problems (such as the scheduler)
+    configuration.getScheduler().incrementScheduledItems();
 
     // Create the appender
     RoutingAppender routingAppender = RoutingAppender.createAppender(TEST_QUERY_ROUTING_APPENDER,
@@ -179,7 +189,7 @@ public final class LogDivertAppenderForTest {
         routes,
         configuration,
         null,
-        null,
+        purgePolicy,
         null);
 
     LoggerConfig loggerConfig = configuration.getRootLogger();
diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
index 55551e8..91dfa95 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
@@ -35,8 +35,6 @@ import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
 import org.apache.hadoop.hive.common.metrics.common.MetricsScope;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.log.LogDivertAppender;
-import org.apache.hadoop.hive.ql.log.LogDivertAppenderForTest;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
 import org.apache.hadoop.hive.ql.session.OperationLog;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -304,10 +302,6 @@ public abstract class Operation {
   }
 
   protected synchronized void cleanupOperationLog(final long operationLogCleanupDelayMs) {
-    // stop the appenders for the operation log
-    String queryId = queryState.getQueryId();
-    LogUtils.stopQueryAppender(LogDivertAppender.QUERY_ROUTING_APPENDER, queryId);
-    LogUtils.stopQueryAppender(LogDivertAppenderForTest.TEST_QUERY_ROUTING_APPENDER, queryId);
     if (isOperationLogEnabled) {
       if (opHandle == null) {
         log.warn("Operation seems to be in invalid state, opHandle is null");