You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/11/22 23:12:27 UTC

[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r754700441



##########
File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemGuardConfig.java
##########
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The consistency guard relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+        groupName = ConfigGroups.Names.WRITE_CLIENT,
+        description = "The filesystem guard related config options, to help deal with runtime exception like s3 list/get/put/delete performance issues.")
+public class FileSystemGuardConfig  extends HoodieConfig {
+
+  public static final ConfigProperty<String> FILESYSTEM_RETRY_ENABLE = ConfigProperty
+      .key("hoodie.filesystem.action.retry.enabled")
+      .defaultValue("false")
+      .sinceVersion("0.10.0")
+      .withDocumentation("Enabled to handle S3 list/get/delete etc file system performance issue.");
+
+  public static final ConfigProperty<Long> INITIAL_RETRY_INTERVAL_MS = ConfigProperty
+      .key("hoodie.filesystem.action.retry.initial_interval_ms")
+      .defaultValue(100L)
+      .sinceVersion("0.10.0")
+      .withDocumentation("Amount of time (in ms) to wait, before retry to do operations on storage.");
+
+  public static final ConfigProperty<Long> MAX_RETRY_INTERVAL_MS = ConfigProperty
+      .key("hoodie.filesystem.action.retry.max_interval_ms")
+      .defaultValue(2000L)
+      .sinceVersion("0.10.0")
+      .withDocumentation("Maximum amount of time (in ms), to wait for next retry.");
+
+  public static final ConfigProperty<Integer> MAX_RETRY_NUMBERS = ConfigProperty
+      .key("hoodie.filesystem.action.retry.max_numbers")

Review comment:
       can you help me understand, why do we need both max retry number and max internal ms ? I thought either one is good enough. 
   So, either 100*4 = 4 retries w/ 100 ms delay. 
   or 2000/100 = 20 retries w/ 100 ms delay. 

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
##########
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.Random;
+
+public class RetryHelper<T> {
+  private static final Logger LOG = LogManager.getLogger(RetryHelper.class);
+  private HoodieWrapperFileSystem.CheckedFunction<T> func;
+  private int num;
+  private long maxIntervalTime;
+  private long initialIntervalTime = 100L;
+  private String taskInfo = "N/A";
+
+  public RetryHelper() {
+  }
+
+  public RetryHelper(String taskInfo) {
+    this.taskInfo = taskInfo;
+  }
+
+  public RetryHelper tryWith(HoodieWrapperFileSystem.CheckedFunction<T> func) {
+    this.func = func;
+    return this;
+  }
+
+  public RetryHelper tryNum(int num) {
+    this.num = num;
+    return this;
+  }
+
+  public RetryHelper tryTaskInfo(String taskInfo) {
+    this.taskInfo = taskInfo;
+    return this;
+  }
+
+  public RetryHelper tryMaxInterval(long time) {
+    maxIntervalTime = time;
+    return this;
+  }
+
+  public RetryHelper tryInitialInterval(long time) {
+    initialIntervalTime = time;
+    return this;
+  }
+
+  public T start() throws IOException {
+    int retries = 0;
+    boolean success = false;
+    RuntimeException exception = null;
+    T t = null;

Review comment:
       can we please name the variables nicely. 

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemGuardConfig.java
##########
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The consistency guard relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+        groupName = ConfigGroups.Names.WRITE_CLIENT,
+        description = "The filesystem guard related config options, to help deal with runtime exception like s3 list/get/put/delete performance issues.")
+public class FileSystemGuardConfig  extends HoodieConfig {
+
+  public static final ConfigProperty<String> FILESYSTEM_RETRY_ENABLE = ConfigProperty
+      .key("hoodie.filesystem.action.retry.enabled")

Review comment:
       "enable" would suffice. we don't need a "d" in the end, to be in line with other configs

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
##########
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.Random;
+
+public class RetryHelper<T> {
+  private static final Logger LOG = LogManager.getLogger(RetryHelper.class);
+  private HoodieWrapperFileSystem.CheckedFunction<T> func;
+  private int num;
+  private long maxIntervalTime;
+  private long initialIntervalTime = 100L;
+  private String taskInfo = "N/A";
+
+  public RetryHelper() {
+  }
+
+  public RetryHelper(String taskInfo) {
+    this.taskInfo = taskInfo;
+  }
+
+  public RetryHelper tryWith(HoodieWrapperFileSystem.CheckedFunction<T> func) {
+    this.func = func;
+    return this;
+  }
+
+  public RetryHelper tryNum(int num) {
+    this.num = num;
+    return this;
+  }
+
+  public RetryHelper tryTaskInfo(String taskInfo) {
+    this.taskInfo = taskInfo;
+    return this;
+  }
+
+  public RetryHelper tryMaxInterval(long time) {
+    maxIntervalTime = time;
+    return this;
+  }
+
+  public RetryHelper tryInitialInterval(long time) {
+    initialIntervalTime = time;
+    return this;
+  }
+
+  public T start() throws IOException {
+    int retries = 0;
+    boolean success = false;
+    RuntimeException exception = null;
+    T t = null;
+    do {
+      long waitTime = Math.min(getWaitTimeExp(retries), maxIntervalTime);
+      try {
+        t = func.get();
+        success = true;
+        break;
+      } catch (RuntimeException e) {
+        // deal with RuntimeExceptions such like AmazonS3Exception 503
+        exception = e;
+        LOG.warn("Catch RuntimeException " + taskInfo + ", will retry after " + waitTime + " ms.", e);
+        try {
+          Thread.sleep(waitTime);
+        } catch (InterruptedException ex) {
+            // ignore InterruptedException here
+        }
+        retries++;
+      }
+    } while (retries <= num);

Review comment:
       we can probably remove L88 and do retries++ here




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org