You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2020/05/12 07:19:39 UTC

[kylin] branch 2.6.x updated: KYLIN-4298 KYLIN-4299

This is an automated email from the ASF dual-hosted git repository.

nic pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/2.6.x by this push:
     new a61fec6  KYLIN-4298 KYLIN-4299
a61fec6 is described below

commit a61fec60f94383d1ea4b38c9e64922b2f2c5221e
Author: Rongnengwei <wr...@163.com>
AuthorDate: Sun Jan 5 18:35:41 2020 +0800

    KYLIN-4298 KYLIN-4299
---
 .../org/apache/kylin/common/KylinConfigBase.java   |  4 ++
 .../src/main/resources/kylin-defaults.properties   |  4 +-
 metrics-reporter-hive/pom.xml                      |  5 +-
 .../kylin/metrics/lib/impl/hive/HiveProducer.java  | 39 ++++++++--------
 .../apache/kylin/source/hive/CLIHiveClient.java    | 37 ++++++++-------
 .../source/hive/HiveMetaStoreClientFactory.java    | 53 ++++++++++++++++++++++
 6 files changed, 101 insertions(+), 41 deletions(-)

diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 3e6bd5f..88218f3 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -977,6 +977,10 @@ abstract public class KylinConfigBase implements Serializable {
         }
     }
 
+    public String getHiveMetaDataType() {
+        return getOptional("kylin.source.hive.metadata-type", "hcatalog");
+    }
+
     // ============================================================================
     // SOURCE.KAFKA
     // ============================================================================
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index 3491c15..6af19d4 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -101,7 +101,9 @@ kylin.source.hive.database-for-flat-table=default
 
 # Whether redistribute the intermediate flat table before building
 kylin.source.hive.redistribute-flat-table=true
-
+# Define how to access to hive metadata
+# When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
+kylin.source.hive.metadata-type=hcatalog
 
 ### STORAGE ###
 
diff --git a/metrics-reporter-hive/pom.xml b/metrics-reporter-hive/pom.xml
index 9ca49ff..aa0f0c1 100644
--- a/metrics-reporter-hive/pom.xml
+++ b/metrics-reporter-hive/pom.xml
@@ -36,7 +36,10 @@
             <groupId>org.apache.kylin</groupId>
             <artifactId>kylin-core-metrics</artifactId>
         </dependency>
-
+        <dependency>
+            <groupId>org.apache.kylin</groupId>
+            <artifactId>kylin-source-hive</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.hive.hcatalog</groupId>
             <artifactId>hive-hcatalog-core</artifactId>
diff --git a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
index 72121a9..b833ee1 100644
--- a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
+++ b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
@@ -18,20 +18,19 @@
 
 package org.apache.kylin.metrics.lib.impl.hive;
 
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.cli.CliSessionState;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -41,16 +40,17 @@ import org.apache.kylin.metrics.lib.ActiveReservoirReporter;
 import org.apache.kylin.metrics.lib.Record;
 import org.apache.kylin.metrics.lib.impl.TimePropertyEnum;
 import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord.RecordKey;
+import org.apache.kylin.source.hive.HiveMetaStoreClientFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import com.google.common.cache.RemovalListener;
-import com.google.common.cache.RemovalNotification;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
 
 public class HiveProducer {
 
@@ -62,13 +62,12 @@ public class HiveProducer {
     private final FileSystem fileSystem;
     private final LoadingCache<Pair<String, String>, Pair<String, List<FieldSchema>>> tableFieldSchemaCache;
     private final String CONTENT_FILE_NAME;
-    private FSDataOutputStream fout;
-    private long partId = 0;
-
     /**
      * Some cloud file system, like AWS S3, didn't support append action to exist file.
      */
     private final boolean supportAppend;
+    private FSDataOutputStream fout;
+    private long partId = 0;
 
     public HiveProducer(Properties props) throws Exception {
         this(props, new HiveConf());
@@ -90,7 +89,7 @@ public class HiveProducer {
         }).maximumSize(CACHE_MAX_SIZE).build(new CacheLoader<Pair<String, String>, Pair<String, List<FieldSchema>>>() {
             @Override
             public Pair<String, List<FieldSchema>> load(Pair<String, String> tableName) throws Exception {
-                HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(hiveConf);
+                IMetaStoreClient metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
                 String tableLocation = metaStoreClient.getTable(tableName.getFirst(), tableName.getSecond()).getSd().getLocation();
                 logger.debug("Find table location for {} at {}", tableName.getSecond(), tableLocation);
                 List<FieldSchema> fields = metaStoreClient.getFields(tableName.getFirst(), tableName.getSecond());
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
index bc9f17e..2491cc4 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
@@ -6,25 +6,22 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *     http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
+ */
 
 package org.apache.kylin.source.hive;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
+import com.google.common.collect.Lists;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -32,16 +29,18 @@ import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.HiveCmdBuilder;
 import org.apache.kylin.common.util.Pair;
 
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
 
 /**
  * Hive meta API client for Kylin
- * @author shaoshi
  *
+ * @author shaoshi
  */
 public class CLIHiveClient implements IHiveClient {
     protected HiveConf hiveConf = null;
-    protected HiveMetaStoreClient metaStoreClient = null;
+    protected IMetaStoreClient metaStoreClient = null;
 
     public CLIHiveClient() {
         hiveConf = new HiveConf(CLIHiveClient.class);
@@ -49,7 +48,8 @@ public class CLIHiveClient implements IHiveClient {
 
     /**
      * only used by Deploy Util
-     * @throws IOException 
+     *
+     * @throws IOException
      */
     @Override
     public void executeHQL(String hql) throws IOException {
@@ -129,21 +129,20 @@ public class CLIHiveClient implements IHiveClient {
         return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.ROW_COUNT);
     }
 
-    private HiveMetaStoreClient getMetaStoreClient() throws Exception {
+    private IMetaStoreClient getMetaStoreClient() throws Exception {
         if (metaStoreClient == null) {
-            metaStoreClient = new HiveMetaStoreClient(hiveConf);
+            metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
         }
         return metaStoreClient;
     }
 
     /**
      * COPIED FROM org.apache.hadoop.hive.ql.stats.StatsUtil for backward compatibility
-     * 
+     * <p>
      * Get basic stats of table
-     * @param table
-     *          - table
-     * @param statType
-     *          - type of stats
+     *
+     * @param table    - table
+     * @param statType - type of stats
      * @return value of stats
      */
     private long getBasicStatForTable(org.apache.hadoop.hive.ql.metadata.Table table, String statType) {
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
new file mode 100644
index 0000000..984623f
--- /dev/null
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.source.hive;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.kylin.common.KylinConfig;
+
+import java.io.IOException;
+
+public class HiveMetaStoreClientFactory {
+
+    /**
+     * Get hivemetastoreclient. At present, it supports hivecatalog and glue catalog. When it is configured as hcatalog,
+     * you can directly new hivemetastoreclient (hiveconf), which is more efficient.
+     * But if you need to use hcatutil.gethivemetastoreclient (hiveconf) to configure gluecatalog,
+     * you can get: com.amazon aws.glue.catalog.metastore.awsgluedatacataloghiveclientfactory according to the configuration file
+     *
+     * @param hiveConf
+     * @return metaStoreClient
+     * @throws MetaException
+     * @throws IOException
+     */
+    public static IMetaStoreClient getHiveMetaStoreClient(HiveConf hiveConf) throws MetaException, IOException {
+        IMetaStoreClient metaStoreClient = null;
+        if ("hcatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+            metaStoreClient = new HiveMetaStoreClient(hiveConf);
+        } else if ("gluecatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+            metaStoreClient = HCatUtil.getHiveMetastoreClient(hiveConf);
+        }
+        return metaStoreClient;
+    }
+
+}