You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ni...@apache.org on 2020/05/12 07:19:39 UTC
[kylin] branch 2.6.x updated: KYLIN-4298 KYLIN-4299
This is an automated email from the ASF dual-hosted git repository.
nic pushed a commit to branch 2.6.x
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/2.6.x by this push:
new a61fec6 KYLIN-4298 KYLIN-4299
a61fec6 is described below
commit a61fec60f94383d1ea4b38c9e64922b2f2c5221e
Author: Rongnengwei <wr...@163.com>
AuthorDate: Sun Jan 5 18:35:41 2020 +0800
KYLIN-4298 KYLIN-4299
---
.../org/apache/kylin/common/KylinConfigBase.java | 4 ++
.../src/main/resources/kylin-defaults.properties | 4 +-
metrics-reporter-hive/pom.xml | 5 +-
.../kylin/metrics/lib/impl/hive/HiveProducer.java | 39 ++++++++--------
.../apache/kylin/source/hive/CLIHiveClient.java | 37 ++++++++-------
.../source/hive/HiveMetaStoreClientFactory.java | 53 ++++++++++++++++++++++
6 files changed, 101 insertions(+), 41 deletions(-)
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 3e6bd5f..88218f3 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -977,6 +977,10 @@ abstract public class KylinConfigBase implements Serializable {
}
}
+ public String getHiveMetaDataType() {
+ return getOptional("kylin.source.hive.metadata-type", "hcatalog");
+ }
+
// ============================================================================
// SOURCE.KAFKA
// ============================================================================
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index 3491c15..6af19d4 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -101,7 +101,9 @@ kylin.source.hive.database-for-flat-table=default
# Whether redistribute the intermediate flat table before building
kylin.source.hive.redistribute-flat-table=true
-
+# Define how to access to hive metadata
+# When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
+kylin.source.hive.metadata-type=hcatalog
### STORAGE ###
diff --git a/metrics-reporter-hive/pom.xml b/metrics-reporter-hive/pom.xml
index 9ca49ff..aa0f0c1 100644
--- a/metrics-reporter-hive/pom.xml
+++ b/metrics-reporter-hive/pom.xml
@@ -36,7 +36,10 @@
<groupId>org.apache.kylin</groupId>
<artifactId>kylin-core-metrics</artifactId>
</dependency>
-
+ <dependency>
+ <groupId>org.apache.kylin</groupId>
+ <artifactId>kylin-source-hive</artifactId>
+ </dependency>
<dependency>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
diff --git a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
index 72121a9..b833ee1 100644
--- a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
+++ b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
@@ -18,20 +18,19 @@
package org.apache.kylin.metrics.lib.impl.hive;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -41,16 +40,17 @@ import org.apache.kylin.metrics.lib.ActiveReservoirReporter;
import org.apache.kylin.metrics.lib.Record;
import org.apache.kylin.metrics.lib.impl.TimePropertyEnum;
import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord.RecordKey;
+import org.apache.kylin.source.hive.HiveMetaStoreClientFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import com.google.common.cache.RemovalListener;
-import com.google.common.cache.RemovalNotification;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
public class HiveProducer {
@@ -62,13 +62,12 @@ public class HiveProducer {
private final FileSystem fileSystem;
private final LoadingCache<Pair<String, String>, Pair<String, List<FieldSchema>>> tableFieldSchemaCache;
private final String CONTENT_FILE_NAME;
- private FSDataOutputStream fout;
- private long partId = 0;
-
/**
* Some cloud file system, like AWS S3, didn't support append action to exist file.
*/
private final boolean supportAppend;
+ private FSDataOutputStream fout;
+ private long partId = 0;
public HiveProducer(Properties props) throws Exception {
this(props, new HiveConf());
@@ -90,7 +89,7 @@ public class HiveProducer {
}).maximumSize(CACHE_MAX_SIZE).build(new CacheLoader<Pair<String, String>, Pair<String, List<FieldSchema>>>() {
@Override
public Pair<String, List<FieldSchema>> load(Pair<String, String> tableName) throws Exception {
- HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(hiveConf);
+ IMetaStoreClient metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
String tableLocation = metaStoreClient.getTable(tableName.getFirst(), tableName.getSecond()).getSd().getLocation();
logger.debug("Find table location for {} at {}", tableName.getSecond(), tableLocation);
List<FieldSchema> fields = metaStoreClient.getFields(tableName.getFirst(), tableName.getSecond());
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
index bc9f17e..2491cc4 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
@@ -6,25 +6,22 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/
+ */
package org.apache.kylin.source.hive;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
+import com.google.common.collect.Lists;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
@@ -32,16 +29,18 @@ import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HiveCmdBuilder;
import org.apache.kylin.common.util.Pair;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
/**
* Hive meta API client for Kylin
- * @author shaoshi
*
+ * @author shaoshi
*/
public class CLIHiveClient implements IHiveClient {
protected HiveConf hiveConf = null;
- protected HiveMetaStoreClient metaStoreClient = null;
+ protected IMetaStoreClient metaStoreClient = null;
public CLIHiveClient() {
hiveConf = new HiveConf(CLIHiveClient.class);
@@ -49,7 +48,8 @@ public class CLIHiveClient implements IHiveClient {
/**
* only used by Deploy Util
- * @throws IOException
+ *
+ * @throws IOException
*/
@Override
public void executeHQL(String hql) throws IOException {
@@ -129,21 +129,20 @@ public class CLIHiveClient implements IHiveClient {
return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.ROW_COUNT);
}
- private HiveMetaStoreClient getMetaStoreClient() throws Exception {
+ private IMetaStoreClient getMetaStoreClient() throws Exception {
if (metaStoreClient == null) {
- metaStoreClient = new HiveMetaStoreClient(hiveConf);
+ metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
}
return metaStoreClient;
}
/**
* COPIED FROM org.apache.hadoop.hive.ql.stats.StatsUtil for backward compatibility
- *
+ * <p>
* Get basic stats of table
- * @param table
- * - table
- * @param statType
- * - type of stats
+ *
+ * @param table - table
+ * @param statType - type of stats
* @return value of stats
*/
private long getBasicStatForTable(org.apache.hadoop.hive.ql.metadata.Table table, String statType) {
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
new file mode 100644
index 0000000..984623f
--- /dev/null
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.source.hive;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.kylin.common.KylinConfig;
+
+import java.io.IOException;
+
+public class HiveMetaStoreClientFactory {
+
+ /**
+ * Get hivemetastoreclient. At present, it supports hivecatalog and glue catalog. When it is configured as hcatalog,
+ * you can directly new hivemetastoreclient (hiveconf), which is more efficient.
+ * But if you need to use hcatutil.gethivemetastoreclient (hiveconf) to configure gluecatalog,
+ * you can get: com.amazon aws.glue.catalog.metastore.awsgluedatacataloghiveclientfactory according to the configuration file
+ *
+ * @param hiveConf
+ * @return metaStoreClient
+ * @throws MetaException
+ * @throws IOException
+ */
+ public static IMetaStoreClient getHiveMetaStoreClient(HiveConf hiveConf) throws MetaException, IOException {
+ IMetaStoreClient metaStoreClient = null;
+ if ("hcatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+ metaStoreClient = new HiveMetaStoreClient(hiveConf);
+ } else if ("gluecatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+ metaStoreClient = HCatUtil.getHiveMetastoreClient(hiveConf);
+ }
+ return metaStoreClient;
+ }
+
+}