You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@gobblin.apache.org by GitBox <gi...@apache.org> on 2020/07/17 19:21:10 UTC

[GitHub] [incubator-gobblin] yukuai518 commented on a change in pull request #3064: [GOBBLIN-1216] Embedded Hive Distcp

yukuai518 commented on a change in pull request #3064:
URL: https://github.com/apache/incubator-gobblin/pull/3064#discussion_r456629126



##########
File path: gobblin-data-management/src/test/java/org/apache/gobblin/runtime/embedded/EmbeddedGobblinDistcpTest.java
##########
@@ -83,6 +118,87 @@ public void test() throws Exception {
     Assert.assertTrue(new File(tmpTarget, fileName).exists());
   }
 
+  @Test
+  public void hiveTest() throws Exception {
+    // Hive Environment set-up
+    HiveConf hiveConf = new HiveConf();
+
+    try {
+      // Start a Hive session in this thread and register the UDF
+      SessionState.start(hiveConf);
+      SessionState.get().initTxnMgr(hiveConf);
+    } catch (HiveException he) {
+      throw new RuntimeException("Failed to start Hive session.", he);
+    }
+    try {
+      jdbcConnector = HiveJdbcConnector.newEmbeddedConnector(2);
+    } catch (SQLException se) {
+      throw new RuntimeException("Cannot initialize the jdbc-connector due to: ", se);
+    }

Review comment:
       Similar question, combine them into one try?

##########
File path: gobblin-data-management/src/test/java/org/apache/gobblin/runtime/embedded/EmbeddedGobblinDistcpTest.java
##########
@@ -21,37 +21,72 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.sql.SQLException;
+import java.sql.Statement;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.gobblin.configuration.ConfigurationKeys;
-import org.apache.gobblin.converter.GobblinMetricsPinotFlattenerConverter;
-import org.apache.gobblin.data.management.copy.CopyConfiguration;
-import org.apache.gobblin.data.management.copy.CopySource;
-import org.apache.gobblin.data.management.copy.SchemaCheckedCopySource;
-import org.apache.gobblin.runtime.api.JobExecutionResult;
-import org.apache.gobblin.util.PathUtils;
-import org.apache.gobblin.util.filesystem.DataFileVersionStrategy;
-
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import com.google.api.client.util.Charsets;
 import com.google.common.collect.Sets;
 import com.google.common.io.Files;
 import com.typesafe.config.Config;
 
+import org.apache.gobblin.configuration.ConfigurationKeys;
+import org.apache.gobblin.converter.GobblinMetricsPinotFlattenerConverter;
+import org.apache.gobblin.data.management.copy.CopyConfiguration;
+import org.apache.gobblin.data.management.copy.CopySource;
+import org.apache.gobblin.data.management.copy.SchemaCheckedCopySource;
+import org.apache.gobblin.runtime.api.JobExecutionResult;
+import org.apache.gobblin.util.HiveJdbcConnector;
+import org.apache.gobblin.util.PathUtils;
+import org.apache.gobblin.util.filesystem.DataFileVersionStrategy;
+
 
 public class EmbeddedGobblinDistcpTest {
+  private HiveJdbcConnector jdbcConnector;
+  private IMetaStoreClient metaStoreClient;
+  private static final String TEST_DB = "testdb";
+  private static final String TEST_TABLE = "test_table";
+  private static final String TARGET_PATH = "/tmp/target";
+  private static final String TARGET_DB = "target";
+
+  @BeforeClass
+  public void setup() throws Exception {
+    try {
+      HiveConf hiveConf = new HiveConf();
+      // Start a Hive session in this thread and register the UDF
+      SessionState.start(hiveConf);
+      SessionState.get().initTxnMgr(hiveConf);
+      metaStoreClient = new HiveMetaStoreClient(new HiveConf());
+    } catch (HiveException he) {
+      throw new RuntimeException("Failed to start Hive session.", he);
+    }
+    try {

Review comment:
       Why not combine two try block into one like this?
   
   try {
    ...
   } catch (HiveException e) {
   ...
   } catch (SQLException e) {
   ...
   } 

##########
File path: gobblin-data-management/src/test/java/org/apache/gobblin/runtime/embedded/EmbeddedGobblinDistcpTest.java
##########
@@ -21,37 +21,72 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.sql.SQLException;
+import java.sql.Statement;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.gobblin.configuration.ConfigurationKeys;
-import org.apache.gobblin.converter.GobblinMetricsPinotFlattenerConverter;
-import org.apache.gobblin.data.management.copy.CopyConfiguration;
-import org.apache.gobblin.data.management.copy.CopySource;
-import org.apache.gobblin.data.management.copy.SchemaCheckedCopySource;
-import org.apache.gobblin.runtime.api.JobExecutionResult;
-import org.apache.gobblin.util.PathUtils;
-import org.apache.gobblin.util.filesystem.DataFileVersionStrategy;
-
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import com.google.api.client.util.Charsets;
 import com.google.common.collect.Sets;
 import com.google.common.io.Files;
 import com.typesafe.config.Config;
 
+import org.apache.gobblin.configuration.ConfigurationKeys;
+import org.apache.gobblin.converter.GobblinMetricsPinotFlattenerConverter;
+import org.apache.gobblin.data.management.copy.CopyConfiguration;
+import org.apache.gobblin.data.management.copy.CopySource;
+import org.apache.gobblin.data.management.copy.SchemaCheckedCopySource;
+import org.apache.gobblin.runtime.api.JobExecutionResult;
+import org.apache.gobblin.util.HiveJdbcConnector;
+import org.apache.gobblin.util.PathUtils;
+import org.apache.gobblin.util.filesystem.DataFileVersionStrategy;
+
 
 public class EmbeddedGobblinDistcpTest {
+  private HiveJdbcConnector jdbcConnector;
+  private IMetaStoreClient metaStoreClient;
+  private static final String TEST_DB = "testdb";
+  private static final String TEST_TABLE = "test_table";
+  private static final String TARGET_PATH = "/tmp/target";
+  private static final String TARGET_DB = "target";
+
+  @BeforeClass
+  public void setup() throws Exception {
+    try {
+      HiveConf hiveConf = new HiveConf();
+      // Start a Hive session in this thread and register the UDF
+      SessionState.start(hiveConf);

Review comment:
       When do we close the session?

##########
File path: gobblin-data-management/src/test/java/org/apache/gobblin/runtime/embedded/EmbeddedGobblinDistcpTest.java
##########
@@ -83,6 +118,87 @@ public void test() throws Exception {
     Assert.assertTrue(new File(tmpTarget, fileName).exists());
   }
 
+  @Test
+  public void hiveTest() throws Exception {
+    // Hive Environment set-up
+    HiveConf hiveConf = new HiveConf();
+
+    try {
+      // Start a Hive session in this thread and register the UDF
+      SessionState.start(hiveConf);
+      SessionState.get().initTxnMgr(hiveConf);
+    } catch (HiveException he) {
+      throw new RuntimeException("Failed to start Hive session.", he);
+    }
+    try {
+      jdbcConnector = HiveJdbcConnector.newEmbeddedConnector(2);
+    } catch (SQLException se) {
+      throw new RuntimeException("Cannot initialize the jdbc-connector due to: ", se);
+    }

Review comment:
       Would you help me understand why we need a separate session here instead of using the session created in the setup? Maybe add some comments?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org