You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by GitBox <gi...@apache.org> on 2022/10/27 03:45:48 UTC

[GitHub] [dolphinscheduler] caishunfeng commented on a diff in pull request #12197: [Improvement][Task] Improved way to collect yarn job's appIds

caishunfeng commented on code in PR #12197:
URL: https://github.com/apache/dolphinscheduler/pull/12197#discussion_r1006373010


##########
dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java:
##########
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler.poc;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+
+import org.aspectj.lang.annotation.AfterReturning;
+import org.aspectj.lang.annotation.Aspect;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Aspect
+public class YarnClientAspectMoc {
+
+    protected final Logger logger = LoggerFactory.getLogger(getClass());
+
+    private ApplicationId privateId = null;
+
+    @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext")
+    public void submitApplication(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) {
+        logger.info("YarnClientAspectMoc[submitApplication]: app context " + appContext + ", submittedAppId "
+                + submittedAppId + " privateId " + privateId);

Review Comment:
   ```suggestion
           logger.info("YarnClientAspectMoc[submitApplication]: app context: {}, submittedAppId: {}, privateId: {}", appContext, submittedAppId, privateId);
   ```



##########
dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java:
##########
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler.aop;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.Collections;
+
+import org.aspectj.lang.annotation.AfterReturning;
+import org.aspectj.lang.annotation.Aspect;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Aspect
+public class YarnClientAspect {
+
+    /**
+     * The current application report when application submitted successfully
+     */
+    private ApplicationReport currentApplicationReport = null;
+
+    private final String appInfoFilePath;
+    private boolean debug;
+
+    protected final Logger logger = LoggerFactory.getLogger(getClass());
+
+    public YarnClientAspect() {
+        appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), "appInfo.log");
+        debug = true;

Review Comment:
   ```suggestion
          
   ```



##########
dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java:
##########
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler.poc;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class YarnClientMoc {
+
+    protected final Logger logger = LoggerFactory.getLogger(getClass());
+
+    private Random random = new Random();
+
+    public ApplicationId createAppId() {
+        ApplicationId created = ApplicationId.newInstance(System.currentTimeMillis(), random.nextInt());
+        logger.info("created id " + created.getId());

Review Comment:
   ```suggestion
           logger.info("created id {}", created.getId());
   ```



##########
dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java:
##########
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler.aop;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.Collections;
+
+import org.aspectj.lang.annotation.AfterReturning;
+import org.aspectj.lang.annotation.Aspect;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Aspect
+public class YarnClientAspect {
+
+    /**
+     * The current application report when application submitted successfully
+     */
+    private ApplicationReport currentApplicationReport = null;
+
+    private final String appInfoFilePath;
+    private boolean debug;
+
+    protected final Logger logger = LoggerFactory.getLogger(getClass());
+
+    public YarnClientAspect() {
+        appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), "appInfo.log");
+        debug = true;
+    }
+
+    /**
+     * Trigger submitApplication when invoking YarnClientImpl.submitApplication
+     *
+     * @param appContext     application context when invoking YarnClientImpl.submitApplication
+     * @param submittedAppId the submitted application id returned by YarnClientImpl.submitApplication
+     * @throws Throwable exceptions
+     */
+    @AfterReturning(pointcut = "execution(ApplicationId org.apache.hadoop.yarn.client.api.impl.YarnClientImpl." +
+            "submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext,submittedAppId")
+    public void registerApplicationInfo(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) {
+        if (appInfoFilePath != null) {
+            try {
+                Files.write(Paths.get(appInfoFilePath),
+                        Collections.singletonList(submittedAppId.toString()),
+                        StandardOpenOption.CREATE,
+                        StandardOpenOption.WRITE,
+                        StandardOpenOption.APPEND);
+            } catch (IOException ioException) {
+                logger.error("YarnClientAspect[registerAppInfo]: can't output current application information, because "
+                        + ioException.getMessage());
+            }
+        }
+        if (debug) {
+            logger.info("YarnClientAspect[submitApplication]: current application context " + appContext);
+            logger.info("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId);
+            logger.info(
+                    "YarnClientAspect[submitApplication]: current application report  " + currentApplicationReport);
+        }

Review Comment:
   ```suggestion
               logger.info("YarnClientAspect[submitApplication]: current application context " + appContext);
               logger.info("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId);
               logger.info(
                       "YarnClientAspect[submitApplication]: current application report  " + currentApplicationReport);
   ```



##########
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java:
##########
@@ -340,6 +340,7 @@ protected TaskExecutionContext getTaskExecutionContext(TaskInstance taskInstance
                 .buildTaskDefinitionRelatedInfo(taskInstance.getTaskDefine())
                 .buildProcessInstanceRelatedInfo(taskInstance.getProcessInstance())
                 .buildProcessDefinitionRelatedInfo(taskInstance.getProcessDefine())
+                .buildExecPathRelatedInfo()

Review Comment:
   Why to `buildExecPathRelatedInfo` here?



##########
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java:
##########
@@ -126,6 +127,24 @@ public TaskExecutionContextBuilder buildProcessDefinitionRelatedInfo(ProcessDefi
         return this;
     }
 
+    /**
+     * build execPath related info
+     *
+     * @return TaskExecutionContextBuilder
+     */
+    public TaskExecutionContextBuilder buildExecPathRelatedInfo() {

Review Comment:
   I think it should setExecutePath and appInfoPath in worker before task running.  cc @ruanwenjun 



##########
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskExecutionContext.java:
##########
@@ -81,6 +81,11 @@ public class TaskExecutionContext implements Serializable {
      */
     private String logPath;
 
+    /**
+     * applicationId path
+     */
+    private String appInfoPath;

Review Comment:
   It's no need to add this field if the appInfoPath is default `execPath/appInfo.log`.



##########
script/env/dolphinscheduler_env.sh:
##########
@@ -33,3 +33,10 @@ export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel}
 export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun}
 
 export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH
+
+# applicationId auto collection related configuration
+export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*
+export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH
+export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS
+export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS
+export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS

Review Comment:
   It looks weird :thinking:  , do we have to add this OPTS here?



##########
dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java:
##########
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler.poc;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+
+import org.aspectj.lang.annotation.AfterReturning;
+import org.aspectj.lang.annotation.Aspect;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Aspect
+public class YarnClientAspectMoc {
+
+    protected final Logger logger = LoggerFactory.getLogger(getClass());
+
+    private ApplicationId privateId = null;
+
+    @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext")
+    public void submitApplication(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) {
+        logger.info("YarnClientAspectMoc[submitApplication]: app context " + appContext + ", submittedAppId "
+                + submittedAppId + " privateId " + privateId);
+    }
+
+    @AfterReturning(pointcut = "cflow(execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext))) "
+            +
+            "&& !within(CfowAspect) && execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.createAppId())", returning = "submittedAppId")
+    public void createAppId(ApplicationId submittedAppId) {
+        privateId = submittedAppId;
+        logger.info("YarnClientAspectMoc[createAppId]: created submittedAppId " + submittedAppId);

Review Comment:
   ```suggestion
           logger.info("YarnClientAspectMoc[createAppId]: created submittedAppId: {}", submittedAppId);
   ```



##########
dolphinscheduler-aop/pom.xml:
##########
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.dolphinscheduler</groupId>
+        <artifactId>dolphinscheduler</artifactId>
+        <version>dev-SNAPSHOT</version>
+    </parent>
+    <artifactId>dolphinscheduler-aop</artifactId>
+    <packaging>jar</packaging>
+    <name>${project.artifactId}</name>
+    <description>aop 4 YarnClient to get application id when submitting jars using 'yarn jar mainClass args'</description>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <maven.compiler.source>1.8</maven.compiler.source>
+        <maven.compiler.target>1.8</maven.compiler.target>
+        <aspectj.version>1.9.7</aspectj.version>
+        <hadoop.version>3.2.4</hadoop.version>

Review Comment:
   Should move to bom module.



##########
dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java:
##########
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler;
+
+import org.apache.dolphinscheduler.poc.YarnClientMoc;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class YarnClientAspectMocTest {
+
+    private final PrintStream standardOut = System.out;
+    ByteArrayOutputStream stdoutStream = new ByteArrayOutputStream();
+    @BeforeEach
+    public void beforeEveryTest() {
+        System.setOut(new PrintStream(stdoutStream));
+    }
+    @AfterEach
+    public void afterEveryTest() throws IOException {
+        System.setOut(standardOut);
+        stdoutStream.close();
+    }
+    @Test
+    public void testMoc() {
+        YarnClientMoc moc = new YarnClientMoc();
+        try {
+            ApplicationSubmissionContext appContext = ApplicationSubmissionContext.newInstance(
+                    ApplicationId.newInstance(System.currentTimeMillis(), 1236), "appName",
+                    "queue", Priority.UNDEFINED,
+                    null, false,
+                    false, 10, null,
+                    "type");
+            moc.createAppId();
+            ApplicationId applicationId = moc.submitApplication(appContext);
+            String stdoutContent = stdoutStream.toString();
+            Assertions.assertTrue(stdoutContent.contains("YarnClientAspectMoc[submitApplication]"),
+                    "trigger YarnClientAspectMoc.submitApplication failed");
+            Assertions.assertTrue(stdoutContent.contains("YarnClientAspectMoc[createAppId]:"),
+                    "trigger YarnClientAspectMoc.createAppId failed");
+        } catch (YarnException | IOException e) {
+            Assertions.fail("test YarnClientAspectMoc failed: " + e.getMessage());
+            e.printStackTrace();

Review Comment:
   Please remove `e.printStackTrace();`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@dolphinscheduler.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org