You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2020/12/18 08:56:54 UTC

[zeppelin] branch branch-0.9 updated: [ZEPPELIN-5150]. Allow option to allow exclude paragraph result when saving note to NotebookRepo

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch branch-0.9
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/branch-0.9 by this push:
     new 3b77d16  [ZEPPELIN-5150]. Allow option to allow exclude paragraph result when saving note to NotebookRepo
3b77d16 is described below

commit 3b77d167162268908cb7aed5a28ef8cfaa3a30cb
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Thu Dec 3 12:24:08 2020 +0800

    [ZEPPELIN-5150]. Allow option to allow exclude paragraph result when saving note to NotebookRepo
    
    ### What is this PR for?
    
    When using zeppelin as job server instead of interactive notebook, it is not necessary to save paragraph results (sometimes it is pretty large) into NotebookRepo which may cause high pressure on IO when many jobs are running. It doesn't affect current behavior, because by default we still store paragraph results in NotebookRepo
    
    ### What type of PR is it?
    [Improvement | Feature]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-5150
    
    ### How should this be tested?
    * Manually tested
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #3989 from zjffdu/ZEPPELIN-5150 and squashes the following commits:
    
    f7a8c74e9 [Jeff Zhang] [ZEPPELIN-5150]. Allow option to allow exclude paragraph result when saving note to NotebookRepo
    
    (cherry picked from commit e2579d261286d343196c7781bbbfeecf601af461)
    Signed-off-by: Jeff Zhang <zj...@apache.org>
---
 conf/zeppelin-site.xml.template                    |  6 ++++
 .../zeppelin/conf/ZeppelinConfiguration.java       |  7 +++-
 .../zeppelin/service/SessionManagerService.java    |  4 +--
 .../java/org/apache/zeppelin/notebook/Note.java    | 39 ++++++++++++++++++----
 .../zeppelin/conf/ZeppelinConfigurationTest.java   |  2 ++
 5 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
index 81b4a91..53b0144 100755
--- a/conf/zeppelin-site.xml.template
+++ b/conf/zeppelin-site.xml.template
@@ -815,4 +815,10 @@
   <description>Whether only allow yarn cluster mode</description>
 </property>
 
+<property>
+  <name>zeppelin.note.file.exclude.fields</name>
+  <value></value>
+  <description>fields to be excluded from being saved in note files, with Paragraph prefix mean the fields in Paragraph, e.g. Paragraph.results</description>
+</property>
+
 </configuration>
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index ee42f9f..d76b2b5 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -645,6 +645,10 @@ public class ZeppelinConfiguration extends XMLConfiguration {
     return getString(ConfVars.ZEPPELIN_SERVER_RPC_PORTRANGE);
   }
 
+  public String[] getNoteFileExcludedFields() {
+    return StringUtils.split(getString(ConfVars.ZEPPELIN_NOTE_FILE_EXCLUDE_FIELDS), (","));
+  }
+
   public String getInterpreterPortRange() {
     return getString(ConfVars.ZEPPELIN_INTERPRETER_RPC_PORTRANGE);
   }
@@ -1129,7 +1133,8 @@ public class ZeppelinConfiguration extends XMLConfiguration {
     ZEPPELIN_SEARCH_INDEX_PATH("zeppelin.search.index.path", "/tmp/zeppelin-index"),
     ZEPPELIN_JOBMANAGER_ENABLE("zeppelin.jobmanager.enable", false),
     ZEPPELIN_SPARK_ONLY_YARN_CLUSTER("zeppelin.spark.only_yarn_cluster", false),
-    ZEPPELIN_SESSION_CHECK_INTERVAL("zeppelin.session.check_interval", 60 * 10 * 1000);
+    ZEPPELIN_SESSION_CHECK_INTERVAL("zeppelin.session.check_interval", 60 * 10 * 1000),
+    ZEPPELIN_NOTE_FILE_EXCLUDE_FIELDS("zeppelin.note.file.exclude.fields", "");
 
     private String varName;
     @SuppressWarnings("rawtypes")
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/service/SessionManagerService.java b/zeppelin-server/src/main/java/org/apache/zeppelin/service/SessionManagerService.java
index 644b00b..3c1adb4 100644
--- a/zeppelin-server/src/main/java/org/apache/zeppelin/service/SessionManagerService.java
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/service/SessionManagerService.java
@@ -173,13 +173,13 @@ public class SessionManagerService {
         } else {
           // if it is running before, but interpreterGroup is not running now, that means the session is stopped.
           // e.g. InterpreterProcess is terminated for whatever unexpected reason.
-          if (sessionInfo.getState().equals(SessionState.RUNNING.name())) {
+          if (SessionState.RUNNING.name().equalsIgnoreCase(sessionInfo.getState())) {
             sessionInfo.setState(SessionState.STOPPED.name());
           }
         }
       }
     } else {
-      if (sessionInfo.getState().equals(SessionState.RUNNING.name())) {
+      if (SessionState.RUNNING.name().equalsIgnoreCase(sessionInfo.getState())) {
         // if it is running before, but interpreterGroup is null now, that means the session is stopped.
         // e.g. InterpreterProcess is killed if it exceed idle timeout threshold.
         sessionInfo.setState(SessionState.STOPPED.name());
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
index cae5c3f..26c98d2 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java
@@ -61,7 +61,6 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -74,24 +73,50 @@ import java.util.concurrent.CopyOnWriteArrayList;
 public class Note implements JsonSerializable {
 
   private static final Logger LOGGER = LoggerFactory.getLogger(Note.class);
+
   // serialize Paragraph#runtimeInfos and Note#path to frontend but not to note file
-  private static final ExclusionStrategy strategy = new ExclusionStrategy() {
+  private static final ExclusionStrategy NOTE_GSON_EXCLUSION_STRATEGY =
+          new NoteJsonExclusionStrategy(ZeppelinConfiguration.create());
+
+  private static class NoteJsonExclusionStrategy implements ExclusionStrategy {
+    private Set<String> noteExcludeFields = new HashSet<>();
+    private Set<String> paragraphExcludeFields = new HashSet<>();
+
+    public NoteJsonExclusionStrategy(ZeppelinConfiguration zConf) {
+      String[] excludeFields = zConf.getNoteFileExcludedFields();
+      for (String field : excludeFields) {
+        if (field.startsWith("Paragraph")) {
+          paragraphExcludeFields.add(field.substring(10));
+        } else {
+          noteExcludeFields.add(field);
+        }
+      }
+    }
+
     @Override
-    public boolean shouldSkipField(FieldAttributes f) {
-      return f.getName().equals("path");
+    public boolean shouldSkipField(FieldAttributes field) {
+      if(field.getName().equals("path")) {
+        return true;
+      }
+      if (field.getDeclaringClass().equals(Paragraph.class)) {
+        return paragraphExcludeFields.contains(field.getName());
+      } else {
+        return noteExcludeFields.contains(field.getName());
+      }
     }
 
     @Override
-    public boolean shouldSkipClass(Class<?> clazz) {
+    public boolean shouldSkipClass(Class<?> aClass) {
       return false;
     }
-  };
+  }
+
   private static final Gson GSON = new GsonBuilder()
           .setPrettyPrinting()
           .setDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
           .registerTypeAdapter(Date.class, new NotebookImportDeserializer())
           .registerTypeAdapterFactory(Input.TypeAdapterFactory)
-          .setExclusionStrategies(strategy)
+          .setExclusionStrategies(NOTE_GSON_EXCLUSION_STRATEGY)
           .create();
   private static final DateTimeFormatter DATE_TIME_FORMATTER =
           DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss");
diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/conf/ZeppelinConfigurationTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/conf/ZeppelinConfigurationTest.java
index 2ce9ada..1557d0e 100644
--- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/conf/ZeppelinConfigurationTest.java
+++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/conf/ZeppelinConfigurationTest.java
@@ -18,6 +18,7 @@ package org.apache.zeppelin.conf;
 
 
 import org.apache.commons.configuration.ConfigurationException;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
 import org.junit.Assert;
 import org.junit.Before;
@@ -136,4 +137,5 @@ public class ZeppelinConfigurationTest {
     System.setProperty(ConfVars.ZEPPELIN_CONFIG_STORAGE_CLASS.getVarName(), "org.apache.zeppelin.storage.FileSystemConfigStorage");
     assertEquals("conf", conf.getConfigFSDir(false));
   }
+
 }