You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/02/02 09:23:49 UTC
[hudi] 07/08: [HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit c487ee428d5a639655a5a48b45ce4507d45c34a3
Author: Shiyan Xu <27...@users.noreply.github.com>
AuthorDate: Thu Feb 2 00:39:28 2023 -0600
[HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816)
---
.../hudi/gcp/bigquery/BigQuerySyncConfig.java | 38 ++++--------
.../gcp/bigquery/TestBigQuerySyncToolArgs.java | 70 ++++++++++++++++++++++
packaging/hudi-gcp-bundle/pom.xml | 8 ++-
3 files changed, 90 insertions(+), 26 deletions(-)
diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
index b46cd9a9f81..52b3d3b74e5 100644
--- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
@@ -20,14 +20,13 @@
package org.apache.hudi.gcp.bigquery;
import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.sync.common.HoodieSyncConfig;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParametersDelegate;
import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Properties;
/**
@@ -101,38 +100,27 @@ public class BigQuerySyncConfig extends HoodieSyncConfig implements Serializable
public String datasetName;
@Parameter(names = {"--dataset-location"}, description = "Location of the target dataset in BigQuery", required = true)
public String datasetLocation;
- @Parameter(names = {"--table-name"}, description = "Name of the target table in BigQuery", required = true)
- public String tableName;
@Parameter(names = {"--source-uri"}, description = "Name of the source uri gcs path of the table", required = true)
public String sourceUri;
@Parameter(names = {"--source-uri-prefix"}, description = "Name of the source uri gcs path prefix of the table", required = true)
public String sourceUriPrefix;
- @Parameter(names = {"--base-path"}, description = "Base path of the hoodie table to sync", required = true)
- public String basePath;
- @Parameter(names = {"--partitioned-by"}, description = "Comma-delimited partition fields. Default to non-partitioned.")
- public List<String> partitionFields = new ArrayList<>();
- @Parameter(names = {"--use-file-listing-from-metadata"}, description = "Fetch file listing from Hudi's metadata")
- public boolean useFileListingFromMetadata = false;
- @Parameter(names = {"--assume-date-partitioning"}, description = "Assume standard yyyy/mm/dd partitioning, this"
- + " exists to support backward compatibility. If you use hoodie 0.3.x, do not set this parameter")
- public boolean assumeDatePartitioning = false;
public boolean isHelp() {
return hoodieSyncConfigParams.isHelp();
}
- public Properties toProps() {
- final Properties props = hoodieSyncConfigParams.toProps();
- props.setProperty(BIGQUERY_SYNC_PROJECT_ID.key(), projectId);
- props.setProperty(BIGQUERY_SYNC_DATASET_NAME.key(), datasetName);
- props.setProperty(BIGQUERY_SYNC_DATASET_LOCATION.key(), datasetLocation);
- props.setProperty(BIGQUERY_SYNC_TABLE_NAME.key(), tableName);
- props.setProperty(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri);
- props.setProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(), sourceUriPrefix);
- props.setProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), basePath);
- props.setProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key(), String.join(",", partitionFields));
- props.setProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), String.valueOf(useFileListingFromMetadata));
- props.setProperty(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(), String.valueOf(assumeDatePartitioning));
+ public TypedProperties toProps() {
+ final TypedProperties props = hoodieSyncConfigParams.toProps();
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_PROJECT_ID.key(), projectId);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_NAME.key(), datasetName);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_LOCATION.key(), datasetLocation);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_TABLE_NAME.key(), hoodieSyncConfigParams.tableName);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(), sourceUriPrefix);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), hoodieSyncConfigParams.basePath);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_PARTITION_FIELDS.key(), String.join(",", hoodieSyncConfigParams.partitionFields));
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), hoodieSyncConfigParams.useFileListingFromMetadata);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(), hoodieSyncConfigParams.assumeDatePartitioning);
return props;
}
}
diff --git a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
new file mode 100644
index 00000000000..898358484d9
--- /dev/null
+++ b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import com.beust.jcommander.JCommander;
+import org.junit.jupiter.api.Test;
+
+import java.util.Properties;
+
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_LOCATION;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_NAME;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PARTITION_FIELDS;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PROJECT_ID;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI_PREFIX;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SYNC_BASE_PATH;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_TABLE_NAME;
+import static org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class TestBigQuerySyncToolArgs {
+
+ @Test
+ public void testArgsParse() {
+ BigQuerySyncConfig.BigQuerySyncConfigParams params = new BigQuerySyncConfig.BigQuerySyncConfigParams();
+ JCommander cmd = JCommander.newBuilder().addObject(params).build();
+ String[] args = {
+ "--project-id", "hudi-bq",
+ "--dataset-name", "foobar",
+ "--dataset-location", "us-west1",
+ "--table", "foobartable",
+ "--source-uri", "gs://foobartable/year=*",
+ "--source-uri-prefix", "gs://foobartable/",
+ "--base-path", "gs://foobartable",
+ "--partitioned-by", "year,month,day",
+ "--use-file-listing-from-metadata"
+ };
+ cmd.parse(args);
+
+ final Properties props = params.toProps();
+ assertEquals("hudi-bq", props.getProperty(BIGQUERY_SYNC_PROJECT_ID.key()));
+ assertEquals("foobar", props.getProperty(BIGQUERY_SYNC_DATASET_NAME.key()));
+ assertEquals("us-west1", props.getProperty(BIGQUERY_SYNC_DATASET_LOCATION.key()));
+ assertEquals("foobartable", props.getProperty(BIGQUERY_SYNC_TABLE_NAME.key()));
+ assertEquals("gs://foobartable/year=*", props.getProperty(BIGQUERY_SYNC_SOURCE_URI.key()));
+ assertEquals("gs://foobartable/", props.getProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key()));
+ assertEquals("gs://foobartable", props.getProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key()));
+ assertEquals("year,month,day", props.getProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key()));
+ assertEquals("true", props.getProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key()));
+ assertFalse(props.containsKey(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key()));
+ }
+}
diff --git a/packaging/hudi-gcp-bundle/pom.xml b/packaging/hudi-gcp-bundle/pom.xml
index 2149d1d35ae..4bf60363106 100644
--- a/packaging/hudi-gcp-bundle/pom.xml
+++ b/packaging/hudi-gcp-bundle/pom.xml
@@ -95,9 +95,9 @@
<include>org.apache.hudi:hudi-common</include>
<include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.hudi:hudi-sync-common</include>
+ <include>org.apache.hudi:hudi-hive-sync</include>
<include>org.apache.hudi:hudi-gcp</include>
<include>org.apache.parquet:parquet-avro</include>
-
<include>com.google.cloud:google-cloud-bigquery</include>
<include>com.beust:jcommander</include>
<include>commons-io:commons-io</include>
@@ -164,6 +164,12 @@
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-hive-sync</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-gcp</artifactId>