You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2021/11/30 07:43:06 UTC

[GitHub] [hive] ArkoSharma commented on a change in pull request #2539: HIVE-25397: snapshot support for controlled failover

ArkoSharma commented on a change in pull request #2539:
URL: https://github.com/apache/hive/pull/2539#discussion_r759005160



##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java
##########
@@ -189,57 +191,137 @@ private void dirLocationToCopy(String tableName, FileList fileList, Path sourceP
       targetPath = new Path(Utils.replaceHost(targetPath.toString(), sourcePath.toUri().getHost()));
       sourcePath = new Path(Utils.replaceHost(sourcePath.toString(), remoteNS));
     }
-    fileList.add(new DirCopyWork(tableName, sourcePath, targetPath, copyMode, snapshotPrefix).convertToString());
+    fileList.add(new DirCopyWork(tableName, sourcePath, targetPath, copyMode, snapshotPrefix, isBootstrap).convertToString());
   }
 
-  private SnapshotUtils.SnapshotCopyMode createSnapshotsAtSource(Path sourcePath, String snapshotPrefix,
-      boolean isSnapshotEnabled, HiveConf conf, SnapshotUtils.ReplSnapshotCount replSnapshotCount, FileList snapPathFileList,
-      ArrayList<String> prevSnaps, boolean isBootstrap) throws IOException {
+  SnapshotUtils.SnapshotCopyMode createSnapshotsAtSource(Path sourcePath, Path targetPath, String snapshotPrefix,
+                                                                              boolean isSnapshotEnabled, HiveConf conf, SnapshotUtils.ReplSnapshotCount replSnapshotCount, FileList snapPathFileList,
+                                                                              ArrayList<String> prevSnaps, boolean isBootstrap) throws IOException {
     if (!isSnapshotEnabled) {
       LOG.info("Snapshot copy not enabled for path {} Will use normal distCp for copying data.", sourcePath);
       return FALLBACK_COPY;
     }
     DistributedFileSystem sourceDfs = SnapshotUtils.getDFS(sourcePath, conf);
     try {
-      if(isBootstrap) {
-        // Delete any pre existing snapshots.
-        SnapshotUtils.deleteSnapshotIfExists(sourceDfs, sourcePath, firstSnapshot(snapshotPrefix), conf);
-        SnapshotUtils.deleteSnapshotIfExists(sourceDfs, sourcePath, secondSnapshot(snapshotPrefix), conf);
-        allowAndCreateInitialSnapshot(sourcePath, snapshotPrefix, conf, replSnapshotCount, snapPathFileList, sourceDfs);
-        return INITIAL_COPY;
+      if(isBootstrap && conf.getBoolVar(HiveConf.ConfVars.REPL_REUSE_SNAPSHOTS)) {
+        try {
+          FileStatus[] listing = sourceDfs.listStatus(new Path(sourcePath, ".snapshot"));
+          for (FileStatus elem : listing) {
+            String snapShotName = elem.getPath().getName();
+            String prefix;
+            if (snapShotName.contains(OLD_SNAPSHOT)) {
+              prefix = snapShotName.substring(0, snapShotName.lastIndexOf(OLD_SNAPSHOT));
+              if(!prefix.equals(snapshotPrefix)) {
+                sourceDfs.renameSnapshot(sourcePath, firstSnapshot(prefix), firstSnapshot(snapshotPrefix));
+              }
+            }
+            if (snapShotName.contains(NEW_SNAPSHOT)) {
+              prefix = snapShotName.substring(0, snapShotName.lastIndexOf(NEW_SNAPSHOT));
+              if(!prefix.equals(snapshotPrefix)) {
+                sourceDfs.renameSnapshot(sourcePath, secondSnapshot(prefix), secondSnapshot(snapshotPrefix));
+              }
+            }
+          }
+        } catch (SnapshotException e) {
+          //dir not snapshottable, continue
+        }
       }
+      boolean firstSnapAvailable =
+              SnapshotUtils.isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix, OLD_SNAPSHOT, conf);
+      boolean secondSnapAvailable =
+              SnapshotUtils.isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix, NEW_SNAPSHOT, conf);
 
+      //While resuming a failed replication
       if (prevSnaps.contains(sourcePath.toString())) {
         // We already created a snapshot for this, just refresh the latest snapshot and leave.
-        sourceDfs.deleteSnapshot(sourcePath, secondSnapshot(snapshotPrefix));
-        replSnapshotCount.incrementNumDeleted();
+        // In case of reverse replication after fail-over, in some paths, second snapshot may not be present.
+        if(SnapshotUtils.deleteSnapshotIfExists(sourceDfs, sourcePath, secondSnapshot(snapshotPrefix), conf)) {
+          replSnapshotCount.incrementNumDeleted();
+        }
         SnapshotUtils.createSnapshot(sourceDfs, sourcePath, secondSnapshot(snapshotPrefix), conf);
         replSnapshotCount.incrementNumCreated();
         snapPathFileList.add(sourcePath.toString());
         return SnapshotUtils
-            .isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix, OLD_SNAPSHOT, conf) ? DIFF_COPY : INITIAL_COPY;
+                .isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix, OLD_SNAPSHOT, conf) ? DIFF_COPY : INITIAL_COPY;
       }
-      // check if second snapshot exists.
-      boolean isSecondSnapAvlb = SnapshotUtils.isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix,
-          OLD_SNAPSHOT, conf);
-      if (isSecondSnapAvlb) {
-        sourceDfs.deleteSnapshot(sourcePath, firstSnapshot(snapshotPrefix));
-        replSnapshotCount.incrementNumDeleted();
-        sourceDfs.renameSnapshot(sourcePath, secondSnapshot(snapshotPrefix), firstSnapshot(snapshotPrefix));
-        SnapshotUtils.createSnapshot(sourceDfs, sourcePath, secondSnapshot(snapshotPrefix), conf);
-        replSnapshotCount.incrementNumCreated();
-        snapPathFileList.add(sourcePath.toString());
-        return DIFF_COPY;
-      } else {
-        // Check if first snapshot is available
-        boolean isFirstSnapshotAvailable =
-            SnapshotUtils.isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix, NEW_SNAPSHOT, conf);
-        if (isFirstSnapshotAvailable) {
+
+      //for bootstrap and forward replication
+      if(isBootstrap && !(!secondSnapAvailable && firstSnapAvailable)) {
+        if (conf.getBoolVar(HiveConf.ConfVars.REPL_REUSE_SNAPSHOTS)) {
+          //this can be used in re-bootstrap cases after irrecoverable error
+          if(SnapshotUtils.deleteSnapshotIfExists(sourceDfs, sourcePath, secondSnapshot(snapshotPrefix), conf)) {
+            replSnapshotCount.incrementNumDeleted();
+          }
+          SnapshotUtils.createSnapshot(sourceDfs, sourcePath, secondSnapshot(snapshotPrefix), conf);
+          snapPathFileList.add(sourcePath.toString());
+          replSnapshotCount.incrementNumCreated();
+          return SnapshotUtils
+                  .isSnapshotAvailable(sourceDfs, sourcePath, snapshotPrefix, OLD_SNAPSHOT, conf) ? DIFF_COPY : INITIAL_COPY;
+        } else {

Review comment:
       This case is included in the following block.
   
   (!secondSnapAvailable && firstSnapAvailable) - this condition denotes reverse replication taking place for the first time for a particular path. So the execution is similar for both incremental and bootstrap - whereby we either reuse the snaps or don't depending upon the conf. For this purpose, this case is clubbed for both.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org