You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by mr...@apache.org on 2021/10/19 02:41:40 UTC

[spark] branch master updated: [SPARK-36834][SHUFFLE] Add support for namespacing log lines emitted by external shuffle service

This is an automated email from the ASF dual-hosted git repository.

mridulm80 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 4072a22  [SPARK-36834][SHUFFLE] Add support for namespacing log lines emitted by external shuffle service
4072a22 is described below

commit 4072a22aa2bf15e95d3043f937a3468057f4fd36
Author: Thejdeep Gudivada <tg...@linkedin.com>
AuthorDate: Mon Oct 18 21:40:55 2021 -0500

    [SPARK-36834][SHUFFLE] Add support for namespacing log lines emitted by external shuffle service
    
    ### What changes were proposed in this pull request?
    Added a config `spark.yarn.shuffle.service.logs.namespace` which can be used to add a namespace suffix to log lines emitted by the External Shuffle Service.
    
    ### Why are the changes needed?
    Since many instances of ESS can be running on the same NM, it would be easier to distinguish between them.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    N/A
    
    Closes #34079 from thejdeep/SPARK-36834.
    
    Authored-by: Thejdeep Gudivada <tg...@linkedin.com>
    Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../apache/spark/network/yarn/YarnShuffleService.java  | 18 ++++++++++++++++--
 docs/running-on-yarn.md                                | 11 +++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index ac16369..f1b8941 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -93,7 +93,8 @@ import org.apache.spark.network.yarn.util.HadoopConfigProvider;
  * This {@code classpath} configuration is only supported on YARN versions >= 2.9.0.
  */
 public class YarnShuffleService extends AuxiliaryService {
-  private static final Logger logger = LoggerFactory.getLogger(YarnShuffleService.class);
+  private static final Logger defaultLogger = LoggerFactory.getLogger(YarnShuffleService.class);
+  private Logger logger = defaultLogger;
 
   // Port on which the shuffle server listens for fetch requests
   private static final String SPARK_SHUFFLE_SERVICE_PORT_KEY = "spark.shuffle.service.port";
@@ -107,6 +108,12 @@ public class YarnShuffleService extends AuxiliaryService {
       "spark.yarn.shuffle.service.metrics.namespace";
   private static final String DEFAULT_SPARK_SHUFFLE_SERVICE_METRICS_NAME = "sparkShuffleService";
 
+  /**
+   * The namespace to use for the logs produced by the shuffle service
+   */
+  static final String SPARK_SHUFFLE_SERVICE_LOGS_NAMESPACE_KEY =
+      "spark.yarn.shuffle.service.logs.namespace";
+
   // Whether the shuffle server should authenticate fetch requests
   private static final String SPARK_AUTHENTICATE_KEY = "spark.authenticate";
   private static final boolean DEFAULT_SPARK_AUTHENTICATE = false;
@@ -204,6 +211,13 @@ public class YarnShuffleService extends AuxiliaryService {
           confOverlayUrl);
       _conf.addResource(confOverlayUrl);
     }
+
+    String logsNamespace = _conf.get(SPARK_SHUFFLE_SERVICE_LOGS_NAMESPACE_KEY, "");
+    if (!logsNamespace.isEmpty()) {
+      String className = YarnShuffleService.class.getName();
+      logger = LoggerFactory.getLogger(className + "." + logsNamespace);
+    }
+
     super.serviceInit(_conf);
 
     boolean stopOnFailure = _conf.getBoolean(STOP_ON_FAILURE_KEY, DEFAULT_STOP_ON_FAILURE);
@@ -284,7 +298,7 @@ public class YarnShuffleService extends AuxiliaryService {
       // will also need the transport configuration.
       return mergeManagerSubClazz.getConstructor(TransportConf.class).newInstance(conf);
     } catch (Exception e) {
-      logger.error("Unable to create an instance of {}", mergeManagerImplClassName);
+      defaultLogger.error("Unable to create an instance of {}", mergeManagerImplClassName);
       return new NoOpMergedShuffleFileManager(conf);
     }
   }
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 8b7ed18..52d365a 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -806,6 +806,17 @@ The following extra configuration options are available when the shuffle service
     NodeManager.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.shuffle.service.logs.namespace</code></td>
+  <td><code>(not set)</code></td>
+  <td>
+    A namespace which will be appended to the class name when forming the logger name to use for
+    emitting logs from the YARN shuffle service, like
+    <code>org.apache.spark.network.yarn.YarnShuffleService.logsNamespaceValue</code>. Since some logging frameworks
+    may expect the logger name to look like a class name, it's generally recommended to provide a value which
+    would be a valid Java package or class name and not include spaces.
+  </td>
+</tr>
 </table>
 
 Please note that the instructions above assume that the default shuffle service name,

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org