You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2020/05/12 06:33:02 UTC

[kylin] 01/02: KYLIN-4298 KYLIN-4299

This is an automated email from the ASF dual-hosted git repository.

nic pushed a commit to branch 3.0.x
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 90f0946476bbed70685fdee0326ea929e3c97acd
Author: Rongnengwei <wr...@163.com>
AuthorDate: Sun Jan 5 18:35:41 2020 +0800

    KYLIN-4298 KYLIN-4299
---
 .../org/apache/kylin/common/KylinConfigBase.java   | 33 ++++++++------
 .../src/main/resources/kylin-defaults.properties   |  4 +-
 metrics-reporter-hive/pom.xml                      |  5 +-
 .../kylin/metrics/lib/impl/hive/HiveProducer.java  | 38 ++++++++--------
 .../apache/kylin/source/hive/CLIHiveClient.java    | 34 +++++++-------
 .../source/hive/HiveMetaStoreClientFactory.java    | 53 ++++++++++++++++++++++
 6 files changed, 113 insertions(+), 54 deletions(-)

diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 4c8d437..7953807 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -18,6 +18,20 @@
 
 package org.apache.kylin.common;
 
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.text.StrSubstitutor;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kylin.common.lock.DistributedLockFactory;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.CliCommandExecutor;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.File;
 import java.io.IOException;
 import java.io.Serializable;
@@ -33,21 +47,6 @@ import java.util.TimeZone;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.text.StrSubstitutor;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kylin.common.lock.DistributedLockFactory;
-import org.apache.kylin.common.util.ClassUtil;
-import org.apache.kylin.common.util.CliCommandExecutor;
-import org.apache.kylin.common.util.HadoopUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
 /**
  * An abstract class to encapsulate access to a set of 'properties'.
  * Subclass can override methods in this class to extend the content of the 'properties',
@@ -1052,6 +1051,10 @@ public abstract class KylinConfigBase implements Serializable {
         return getOptional("kylin.source.hive.intermediate-table-prefix", "kylin_intermediate_");
     }
 
+    public String getHiveMetaDataType() {
+        return getOptional("kylin.source.hive.metadata-type", "hcatalog");
+    }
+
     // ============================================================================
     // SOURCE.KAFKA
     // ============================================================================
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index cc6ee38..128b3d1 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -104,7 +104,9 @@ kylin.source.hive.database-for-flat-table=default
 
 # Whether redistribute the intermediate flat table before building
 kylin.source.hive.redistribute-flat-table=true
-
+# Define how to access to hive metadata
+# When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
+kylin.source.hive.metadata-type=hcatalog
 
 ### STORAGE ###
 
diff --git a/metrics-reporter-hive/pom.xml b/metrics-reporter-hive/pom.xml
index d588b04..14d2c52 100644
--- a/metrics-reporter-hive/pom.xml
+++ b/metrics-reporter-hive/pom.xml
@@ -36,7 +36,10 @@
             <groupId>org.apache.kylin</groupId>
             <artifactId>kylin-core-metrics</artifactId>
         </dependency>
-
+        <dependency>
+            <groupId>org.apache.kylin</groupId>
+            <artifactId>kylin-source-hive</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.hive.hcatalog</groupId>
             <artifactId>hive-hcatalog-core</artifactId>
diff --git a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
index 53a9385..7e27bdb 100644
--- a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
+++ b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
@@ -14,25 +14,24 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
+ */
 
 package org.apache.kylin.metrics.lib.impl.hive;
 
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.cli.CliSessionState;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -42,16 +41,17 @@ import org.apache.kylin.metrics.lib.ActiveReservoirReporter;
 import org.apache.kylin.metrics.lib.Record;
 import org.apache.kylin.metrics.lib.impl.TimePropertyEnum;
 import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord.RecordKey;
+import org.apache.kylin.source.hive.HiveMetaStoreClientFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import com.google.common.cache.RemovalListener;
-import com.google.common.cache.RemovalNotification;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
 
 public class HiveProducer {
 
@@ -98,7 +98,7 @@ public class HiveProducer {
                 .build(new CacheLoader<Pair<String, String>, Pair<String, List<FieldSchema>>>() {
                     @Override
                     public Pair<String, List<FieldSchema>> load(Pair<String, String> tableName) throws Exception {
-                        HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(hiveConf);
+                        IMetaStoreClient metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
                         String tableLocation = metaStoreClient.getTable(tableName.getFirst(), tableName.getSecond())
                                 .getSd().getLocation();
                         logger.debug("Find table location for {} at {}", tableName.getSecond(), tableLocation);
@@ -307,7 +307,7 @@ public class HiveProducer {
     }
 
     public HiveProducerRecord parseToHiveProducerRecord(String tableName, Map<String, String> partitionKVs,
-            Map<String, Object> rawValue) throws Exception {
+                                                        Map<String, Object> rawValue) throws Exception {
         Pair<String, String> tableNameSplits = ActiveReservoirReporter.getTableNameSplits(tableName);
         List<FieldSchema> fields = tableFieldSchemaCache.get(tableNameSplits).getSecond();
         List<Object> columnValues = Lists.newArrayListWithExpectedSize(fields.size());
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
index 0592362..5cc15e4 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
@@ -6,26 +6,22 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *     http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
+ */
 
 package org.apache.kylin.source.hive;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
+import com.google.common.collect.Lists;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -33,7 +29,10 @@ import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.HiveCmdBuilder;
 import org.apache.kylin.common.util.Pair;
 
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
 
 /**
  * Hive meta API client for Kylin
@@ -42,7 +41,7 @@ import com.google.common.collect.Lists;
  */
 public class CLIHiveClient implements IHiveClient {
     protected HiveConf hiveConf = null;
-    protected HiveMetaStoreClient metaStoreClient = null;
+    protected IMetaStoreClient metaStoreClient = null;
 
     public CLIHiveClient() {
         hiveConf = new HiveConf(CLIHiveClient.class);
@@ -160,21 +159,20 @@ public class CLIHiveClient implements IHiveClient {
         return data;
     }
 
-    private HiveMetaStoreClient getMetaStoreClient() throws Exception {
+    private IMetaStoreClient getMetaStoreClient() throws Exception {
         if (metaStoreClient == null) {
-            metaStoreClient = new HiveMetaStoreClient(hiveConf);
+            metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
         }
         return metaStoreClient;
     }
 
     /**
      * COPIED FROM org.apache.hadoop.hive.ql.stats.StatsUtil for backward compatibility
-     * 
+     * <p>
      * Get basic stats of table
-     * @param table
-     *          - table
-     * @param statType
-     *          - type of stats
+     *
+     * @param table    - table
+     * @param statType - type of stats
      * @return value of stats
      */
     private long getBasicStatForTable(org.apache.hadoop.hive.ql.metadata.Table table, String statType) {
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
new file mode 100644
index 0000000..984623f
--- /dev/null
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.source.hive;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.kylin.common.KylinConfig;
+
+import java.io.IOException;
+
+public class HiveMetaStoreClientFactory {
+
+    /**
+     * Get hivemetastoreclient. At present, it supports hivecatalog and glue catalog. When it is configured as hcatalog,
+     * you can directly new hivemetastoreclient (hiveconf), which is more efficient.
+     * But if you need to use hcatutil.gethivemetastoreclient (hiveconf) to configure gluecatalog,
+     * you can get: com.amazon aws.glue.catalog.metastore.awsgluedatacataloghiveclientfactory according to the configuration file
+     *
+     * @param hiveConf
+     * @return metaStoreClient
+     * @throws MetaException
+     * @throws IOException
+     */
+    public static IMetaStoreClient getHiveMetaStoreClient(HiveConf hiveConf) throws MetaException, IOException {
+        IMetaStoreClient metaStoreClient = null;
+        if ("hcatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+            metaStoreClient = new HiveMetaStoreClient(hiveConf);
+        } else if ("gluecatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+            metaStoreClient = HCatUtil.getHiveMetastoreClient(hiveConf);
+        }
+        return metaStoreClient;
+    }
+
+}