You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2022/05/25 04:14:46 UTC

[GitHub] [hive] maheshk114 commented on a diff in pull request #3293: HIVE-24884: Move top level dump metadata content to be in JSON format

maheshk114 commented on code in PR #3293:
URL: https://github.com/apache/hive/pull/3293#discussion_r876521344


##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java:
##########
@@ -35,33 +40,64 @@
 import java.util.Arrays;
 import java.util.List;
 
+@JsonIgnoreProperties(ignoreUnknown = true)
 public class DumpMetaData {
   // wrapper class for reading and writing metadata about a dump
   // responsible for _dumpmetadata files
   public static final String DUMP_METADATA = "_dumpmetadata";
+
+  // New version of dump metadata file to store top level dumpmetadata content in JSON format
+  public static final String DUMP_METADATA_V2 = "_dumpmetadata_v2";
   private static final Logger LOG = LoggerFactory.getLogger(DumpMetaData.class);
+  private static ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); // Thread-safe.
 
+  @JsonProperty
   private DumpType dumpType;
+  @JsonProperty
   private Long eventFrom = null;
+  @JsonProperty
   private Long eventTo = null;
+  @JsonProperty
   private Path cmRoot;
+  @JsonProperty
   private String payload = null;
-  private ReplScope replScope = null;
-
-  private boolean initialized = false;
-  private final Path dumpFile;
-  private final HiveConf hiveConf;
+  @JsonProperty
   private Long dumpExecutionId;
+  @JsonProperty
   private boolean replScopeModified = false;
+  @JsonProperty
+  private String replScopeStr = null;
+  //Ignore rest of the properties
+  @JsonIgnore
+  private ReplScope replScope = null;
+  @JsonIgnore
+  private Path dumpFile;
+  @JsonIgnore
+  private final HiveConf hiveConf;
+  @JsonIgnore
+  private boolean isTopLevel;
+  @JsonIgnore
+  private Path dumpRoot;
+  @JsonIgnore
+  private boolean initialized = false;
+
+  public DumpMetaData() {
+    //to be instantiated by JSON ObjectMapper.
+    hiveConf = null;
+  }
 
   public DumpMetaData(Path dumpRoot, HiveConf hiveConf) {
-    this.hiveConf = hiveConf;
-    dumpFile = new Path(dumpRoot, DUMP_METADATA);
+    this(dumpRoot, hiveConf, false);
   }
 
+  public DumpMetaData(Path dumpRoot, HiveConf hiveConf, boolean isTopLevel) {
+    this.dumpRoot = dumpRoot;
+    this.hiveConf = hiveConf;
+    this.isTopLevel = isTopLevel;
+  }
   public DumpMetaData(Path dumpRoot, DumpType lvl, Long eventFrom, Long eventTo, Path cmRoot,

Review Comment:
   Where is it used now ..dumping to normal files ?



##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java:
##########
@@ -35,33 +40,64 @@
 import java.util.Arrays;
 import java.util.List;
 
+@JsonIgnoreProperties(ignoreUnknown = true)
 public class DumpMetaData {
   // wrapper class for reading and writing metadata about a dump
   // responsible for _dumpmetadata files
   public static final String DUMP_METADATA = "_dumpmetadata";
+
+  // New version of dump metadata file to store top level dumpmetadata content in JSON format
+  public static final String DUMP_METADATA_V2 = "_dumpmetadata_v2";
   private static final Logger LOG = LoggerFactory.getLogger(DumpMetaData.class);
+  private static ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); // Thread-safe.
 
+  @JsonProperty
   private DumpType dumpType;
+  @JsonProperty
   private Long eventFrom = null;
+  @JsonProperty
   private Long eventTo = null;
+  @JsonProperty
   private Path cmRoot;
+  @JsonProperty
   private String payload = null;
-  private ReplScope replScope = null;
-
-  private boolean initialized = false;
-  private final Path dumpFile;
-  private final HiveConf hiveConf;
+  @JsonProperty
   private Long dumpExecutionId;
+  @JsonProperty
   private boolean replScopeModified = false;
+  @JsonProperty
+  private String replScopeStr = null;
+  //Ignore rest of the properties
+  @JsonIgnore
+  private ReplScope replScope = null;
+  @JsonIgnore
+  private Path dumpFile;
+  @JsonIgnore
+  private final HiveConf hiveConf;
+  @JsonIgnore
+  private boolean isTopLevel;
+  @JsonIgnore
+  private Path dumpRoot;
+  @JsonIgnore
+  private boolean initialized = false;
+
+  public DumpMetaData() {
+    //to be instantiated by JSON ObjectMapper.
+    hiveConf = null;
+  }
 
   public DumpMetaData(Path dumpRoot, HiveConf hiveConf) {
-    this.hiveConf = hiveConf;
-    dumpFile = new Path(dumpRoot, DUMP_METADATA);
+    this(dumpRoot, hiveConf, false);
   }
 
+  public DumpMetaData(Path dumpRoot, HiveConf hiveConf, boolean isTopLevel) {

Review Comment:
   what is isTopLevel means ?



##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java:
##########
@@ -117,6 +153,32 @@ private void readReplScope(String line) throws IOException {
   }
 
   private void loadDumpFromFile() throws SemanticException {
+    boolean isInJSONFormat = resolveDumpFilePathAndGetIfV2();
+    if (isInJSONFormat) {
+      loadDumpFromFileV2();
+    } else {
+      loadDumpFromFileV1();
+    }
+  }
+
+  //Returns true if dumpmetaData is in V2 Format
+  private boolean resolveDumpFilePathAndGetIfV2() throws SemanticException {
+    if (isTopLevel) {
+      dumpFile = new Path(dumpRoot, DUMP_METADATA_V2);
+      if (Utils.fileExists(dumpFile, hiveConf)) {
+        return true;
+      }
+      //Backward-compatibility: fall back to old version. Dump might be generated by old version
+      dumpFile = new Path(dumpRoot, DUMP_METADATA);
+      LOG.info("Falling back to old version of dump meta data {}", dumpFile);
+    } else {
+      // The nested level _dumpmetadata file content is still in old format: To save JSON parsing cost.
+      dumpFile = new Path(dumpRoot, DUMP_METADATA);
+    }
+    return false;
+  }
+
+  private void loadDumpFromFileV1() throws SemanticException {
     BufferedReader br = null;
     try {

Review Comment:
   is there any tests to verify this path ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org