You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/10/08 02:50:35 UTC

[GitHub] [hudi] xushiyan commented on a change in pull request #3416: [HUDI-2362] Add external config file support

xushiyan commented on a change in pull request #3416:
URL: https://github.com/apache/hudi/pull/3416#discussion_r724653630



##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -43,70 +45,88 @@
 
   private static final Logger LOG = LogManager.getLogger(DFSPropertiesConfiguration.class);
 
+  private static final String DEFAULT_PROPERTIES_FILE = "hudi-defaults.conf";
+
+  public static final String CONF_FILE_DIR_ENV_NAME = "HUDI_CONF_DIR";
+
+  // props read from hudi-defaults.conf
+  private static final TypedProperties HUDI_CONF_PROPS = loadGlobalProps();
+
   private final FileSystem fs;
 
-  private final Path rootFile;
+  private Path currentFilePath;
 
+  // props read from user defined configuration file or input stream
   private final TypedProperties props;
 
   // Keep track of files visited, to detect loops
-  private final Set<String> visitedFiles;
+  private final Set<String> visitedFilePaths;
 
-  public DFSPropertiesConfiguration(FileSystem fs, Path rootFile, TypedProperties defaults) {
+  public DFSPropertiesConfiguration(FileSystem fs, Path filePath) {
     this.fs = fs;
-    this.rootFile = rootFile;
-    this.props = defaults;
-    this.visitedFiles = new HashSet<>();
-    visitFile(rootFile);
-  }
-
-  public DFSPropertiesConfiguration(FileSystem fs, Path rootFile) {
-    this(fs, rootFile, new TypedProperties());
+    this.currentFilePath = filePath;
+    this.props = new TypedProperties();
+    this.visitedFilePaths = new HashSet<>();
+    addPropsFromFile(filePath);
   }
 
   public DFSPropertiesConfiguration() {
     this.fs = null;
-    this.rootFile = null;
+    this.currentFilePath = null;
     this.props = new TypedProperties();
-    this.visitedFiles = new HashSet<>();
+    this.visitedFilePaths = new HashSet<>();
   }
 
-  private String[] splitProperty(String line) {
-    int ind = line.indexOf('=');
-    String k = line.substring(0, ind).trim();
-    String v = line.substring(ind + 1).trim();
-    return new String[] {k, v};
+  /**
+   * Load global props from hudi-defaults.conf which is under CONF_FILE_DIR_ENV_NAME.
+   * @return Typed Properties
+   */
+  public static TypedProperties loadGlobalProps() {
+    DFSPropertiesConfiguration conf = new DFSPropertiesConfiguration();
+    Path defaultConfPath = getDefaultConfPath();
+    if (defaultConfPath != null) {
+      conf.addPropsFromFile(defaultConfPath);
+    }
+    return conf.getConfig();
   }
 
-  private void visitFile(Path file) {
+  /**
+   * Add properties from external configuration files.
+   *
+   * @param filePath File path for configuration file
+   */
+  public void addPropsFromFile(Path filePath) {
     try {
-      if (visitedFiles.contains(file.getName())) {
-        throw new IllegalStateException("Loop detected; file " + file + " already referenced");
+      if (visitedFilePaths.contains(filePath.toString())) {
+        throw new IllegalStateException("Loop detected; file " + filePath + " already referenced");
       }
-      visitedFiles.add(file.getName());
-      BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(file)));
-      addProperties(reader);
+      visitedFilePaths.add(filePath.toString());
+      currentFilePath = filePath;
+      FileSystem fileSystem = fs != null ? fs : filePath.getFileSystem(new Configuration());
+      BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(filePath)));

Review comment:
       use try with resources to close?

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -117,7 +137,49 @@ public void addProperties(BufferedReader reader) throws IOException {
     }
   }
 
+  public static TypedProperties getGlobalConfig() {
+    final TypedProperties globalProps = new TypedProperties();
+    globalProps.putAll(HUDI_CONF_PROPS);
+    return globalProps;
+  }
+
   public TypedProperties getConfig() {
-    return props;
+    return getConfig(false);
+  }
+
+  public TypedProperties getConfig(Boolean includeHudiConf) {
+    if (includeHudiConf) {
+      TypedProperties mergedProps = new TypedProperties();
+      mergedProps.putAll(HUDI_CONF_PROPS);
+      mergedProps.putAll(props);
+      return mergedProps;
+    } else {
+      return props;
+    }
+  }
+
+  private static Path getDefaultConfPath() {
+    String confDir = System.getenv(CONF_FILE_DIR_ENV_NAME);
+    if (confDir == null) {
+      LOG.warn("Cannot find " + CONF_FILE_DIR_ENV_NAME + ", please set it as the dir of " + DEFAULT_PROPERTIES_FILE);
+      return null;
+    }
+    return new Path("file://" + confDir + File.separator + DEFAULT_PROPERTIES_FILE);
+  }
+
+  private String[] splitProperty(String line) {
+    line = line.replaceAll("\\s+"," ");
+    String delimiter = line.contains("=") ? "=" : " ";
+    int ind = line.indexOf(delimiter);
+    String k = line.substring(0, ind).trim();
+    String v = line.substring(ind + 1).trim();
+    return new String[] {k, v};
+  }
+
+  private boolean isValidLine(String line) {
+    if (line.startsWith("#") || line.equals("")) {

Review comment:
       use ValidationUtils.checkArugment to check `line` to be  non-null?

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala
##########
@@ -94,7 +94,7 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
 
      // Get options from the external table and append with the options in ddl.
      val originTableConfig =  HoodieOptionConfig.mappingTableConfigToSqlOption(
-       metaClient.getTableConfig.getProps.asScala.toMap)
+       metaClient.getTableConfig.getProps().asScala.toMap)

Review comment:
       did you meant to pass `true`?

##########
File path: hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
##########
@@ -123,18 +123,11 @@ public static Schema getSourceSchema(org.apache.flink.configuration.Configuratio
    * Read config from properties file (`--props` option) and cmd line (`--hoodie-conf` option).
    */
   public static DFSPropertiesConfiguration readConfig(FileSystem fs, Path cfgPath, List<String> overriddenProps) {
-    DFSPropertiesConfiguration conf;
-    try {
-      conf = new DFSPropertiesConfiguration(cfgPath.getFileSystem(fs.getConf()), cfgPath);
-    } catch (Exception e) {
-      conf = new DFSPropertiesConfiguration();
-      LOG.warn("Unexpected error read props file at :" + cfgPath, e);
-    }
-
+    DFSPropertiesConfiguration conf = new DFSPropertiesConfiguration(fs, cfgPath);
     try {
       if (!overriddenProps.isEmpty()) {
         LOG.info("Adding overridden properties to file properties.");
-        conf.addProperties(new BufferedReader(new StringReader(String.join("\n", overriddenProps))));
+        conf.addPropsFromInputStream(new BufferedReader(new StringReader(String.join("\n", overriddenProps))));

Review comment:
       same here better pass an InputStream to align with the method name.

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java
##########
@@ -163,7 +167,17 @@ public String getString(String key) {
   }
 
   public Properties getProps() {
-    return props;
+    return getProps(false);
+  }
+
+  public Properties getProps(Boolean includeHudiConf) {

Review comment:
       also unboxed boolean?

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -43,70 +45,88 @@
 
   private static final Logger LOG = LogManager.getLogger(DFSPropertiesConfiguration.class);
 
+  private static final String DEFAULT_PROPERTIES_FILE = "hudi-defaults.conf";
+
+  public static final String CONF_FILE_DIR_ENV_NAME = "HUDI_CONF_DIR";
+
+  // props read from hudi-defaults.conf
+  private static final TypedProperties HUDI_CONF_PROPS = loadGlobalProps();
+
   private final FileSystem fs;
 
-  private final Path rootFile;
+  private Path currentFilePath;
 
+  // props read from user defined configuration file or input stream
   private final TypedProperties props;
 
   // Keep track of files visited, to detect loops
-  private final Set<String> visitedFiles;
+  private final Set<String> visitedFilePaths;
 
-  public DFSPropertiesConfiguration(FileSystem fs, Path rootFile, TypedProperties defaults) {
+  public DFSPropertiesConfiguration(FileSystem fs, Path filePath) {
     this.fs = fs;
-    this.rootFile = rootFile;
-    this.props = defaults;
-    this.visitedFiles = new HashSet<>();
-    visitFile(rootFile);
-  }
-
-  public DFSPropertiesConfiguration(FileSystem fs, Path rootFile) {
-    this(fs, rootFile, new TypedProperties());
+    this.currentFilePath = filePath;
+    this.props = new TypedProperties();
+    this.visitedFilePaths = new HashSet<>();
+    addPropsFromFile(filePath);
   }
 
   public DFSPropertiesConfiguration() {
     this.fs = null;
-    this.rootFile = null;
+    this.currentFilePath = null;
     this.props = new TypedProperties();
-    this.visitedFiles = new HashSet<>();
+    this.visitedFilePaths = new HashSet<>();
   }
 
-  private String[] splitProperty(String line) {
-    int ind = line.indexOf('=');
-    String k = line.substring(0, ind).trim();
-    String v = line.substring(ind + 1).trim();
-    return new String[] {k, v};
+  /**
+   * Load global props from hudi-defaults.conf which is under CONF_FILE_DIR_ENV_NAME.
+   * @return Typed Properties
+   */
+  public static TypedProperties loadGlobalProps() {
+    DFSPropertiesConfiguration conf = new DFSPropertiesConfiguration();
+    Path defaultConfPath = getDefaultConfPath();
+    if (defaultConfPath != null) {
+      conf.addPropsFromFile(defaultConfPath);
+    }
+    return conf.getConfig();
   }
 
-  private void visitFile(Path file) {
+  /**
+   * Add properties from external configuration files.
+   *
+   * @param filePath File path for configuration file
+   */
+  public void addPropsFromFile(Path filePath) {
     try {
-      if (visitedFiles.contains(file.getName())) {
-        throw new IllegalStateException("Loop detected; file " + file + " already referenced");
+      if (visitedFilePaths.contains(filePath.toString())) {
+        throw new IllegalStateException("Loop detected; file " + filePath + " already referenced");
       }
-      visitedFiles.add(file.getName());
-      BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(file)));
-      addProperties(reader);
+      visitedFilePaths.add(filePath.toString());
+      currentFilePath = filePath;
+      FileSystem fileSystem = fs != null ? fs : filePath.getFileSystem(new Configuration());
+      BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(filePath)));
+      addPropsFromInputStream(reader);
     } catch (IOException ioe) {
-      LOG.error("Error reading in properies from dfs", ioe);
+      LOG.error("Error reading in properties from dfs", ioe);
       throw new IllegalArgumentException("Cannot read properties from dfs", ioe);
     }
   }
 
   /**
    * Add properties from input stream.
-   * 
+   *
    * @param reader Buffered Reader
    * @throws IOException
    */
-  public void addProperties(BufferedReader reader) throws IOException {
+  public void addPropsFromInputStream(BufferedReader reader) throws IOException {

Review comment:
       "FromInputStream" can be confusing as the argument is a `BufferedReader`. To use that name, you may pass a InputStream here and create and close the reader inside.

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java
##########
@@ -117,7 +137,49 @@ public void addProperties(BufferedReader reader) throws IOException {
     }
   }
 
+  public static TypedProperties getGlobalConfig() {
+    final TypedProperties globalProps = new TypedProperties();
+    globalProps.putAll(HUDI_CONF_PROPS);
+    return globalProps;
+  }
+
   public TypedProperties getConfig() {
-    return props;
+    return getConfig(false);
+  }
+
+  public TypedProperties getConfig(Boolean includeHudiConf) {

Review comment:
       use primitive boolean instead of boxed?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org