You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2020/01/06 01:57:10 UTC
[kylin] branch master updated: KYLIN-4298 KYLIN-4299
This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push:
new 7a97c73 KYLIN-4298 KYLIN-4299
7a97c73 is described below
commit 7a97c733a975ff9e3ef91624a1a3b5158f7533b6
Author: Rongnengwei <wr...@163.com>
AuthorDate: Sun Jan 5 18:35:41 2020 +0800
KYLIN-4298 KYLIN-4299
---
.../org/apache/kylin/common/KylinConfigBase.java | 33 ++++++++------
.../src/main/resources/kylin-defaults.properties | 4 +-
metrics-reporter-hive/pom.xml | 5 +-
.../kylin/metrics/lib/impl/hive/HiveProducer.java | 38 ++++++++--------
.../apache/kylin/source/hive/CLIHiveClient.java | 34 +++++++-------
.../source/hive/HiveMetaStoreClientFactory.java | 53 ++++++++++++++++++++++
6 files changed, 113 insertions(+), 54 deletions(-)
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 7cd45d3..44629e6 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -18,6 +18,20 @@
package org.apache.kylin.common;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.text.StrSubstitutor;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.kylin.common.lock.DistributedLockFactory;
+import org.apache.kylin.common.util.ClassUtil;
+import org.apache.kylin.common.util.CliCommandExecutor;
+import org.apache.kylin.common.util.HadoopUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
@@ -33,21 +47,6 @@ import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.text.StrSubstitutor;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.kylin.common.lock.DistributedLockFactory;
-import org.apache.kylin.common.util.ClassUtil;
-import org.apache.kylin.common.util.CliCommandExecutor;
-import org.apache.kylin.common.util.HadoopUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
/**
* An abstract class to encapsulate access to a set of 'properties'.
* Subclass can override methods in this class to extend the content of the 'properties',
@@ -1102,6 +1101,10 @@ public abstract class KylinConfigBase implements Serializable {
return getOptional("kylin.source.hive.intermediate-table-prefix", "kylin_intermediate_");
}
+ public String getHiveMetaDataType() {
+ return getOptional("kylin.source.hive.metadata-type", "hcatalog");
+ }
+
// ============================================================================
// SOURCE.KAFKA
// ============================================================================
diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties
index ddf76cd..1dee90f 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -104,7 +104,9 @@ kylin.source.hive.database-for-flat-table=default
# Whether redistribute the intermediate flat table before building
kylin.source.hive.redistribute-flat-table=true
-
+# Define how to access to hive metadata
+# When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead
+kylin.source.hive.metadata-type=hcatalog
### STORAGE ###
diff --git a/metrics-reporter-hive/pom.xml b/metrics-reporter-hive/pom.xml
index 0c79050..c3b0f33 100644
--- a/metrics-reporter-hive/pom.xml
+++ b/metrics-reporter-hive/pom.xml
@@ -36,7 +36,10 @@
<groupId>org.apache.kylin</groupId>
<artifactId>kylin-core-metrics</artifactId>
</dependency>
-
+ <dependency>
+ <groupId>org.apache.kylin</groupId>
+ <artifactId>kylin-source-hive</artifactId>
+ </dependency>
<dependency>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
diff --git a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
index 5082b6a..5ab6c9f 100644
--- a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
+++ b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java
@@ -14,25 +14,24 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/
+ */
package org.apache.kylin.metrics.lib.impl.hive;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -42,16 +41,17 @@ import org.apache.kylin.metrics.lib.ActiveReservoirReporter;
import org.apache.kylin.metrics.lib.Record;
import org.apache.kylin.metrics.lib.impl.TimePropertyEnum;
import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord.RecordKey;
+import org.apache.kylin.source.hive.HiveMetaStoreClientFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import com.google.common.cache.RemovalListener;
-import com.google.common.cache.RemovalNotification;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
public class HiveProducer {
@@ -93,7 +93,7 @@ public class HiveProducer {
.build(new CacheLoader<Pair<String, String>, Pair<String, List<FieldSchema>>>() {
@Override
public Pair<String, List<FieldSchema>> load(Pair<String, String> tableName) throws Exception {
- HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(hiveConf);
+ IMetaStoreClient metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
String tableLocation = metaStoreClient.getTable(tableName.getFirst(), tableName.getSecond())
.getSd().getLocation();
List<FieldSchema> fields = metaStoreClient.getFields(tableName.getFirst(),
@@ -262,7 +262,7 @@ public class HiveProducer {
}
public HiveProducerRecord parseToHiveProducerRecord(String tableName, Map<String, String> partitionKVs,
- Map<String, Object> rawValue) throws Exception {
+ Map<String, Object> rawValue) throws Exception {
Pair<String, String> tableNameSplits = ActiveReservoirReporter.getTableNameSplits(tableName);
List<FieldSchema> fields = tableFieldSchemaCache.get(tableNameSplits).getSecond();
List<Object> columnValues = Lists.newArrayListWithExpectedSize(fields.size());
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
index 0592362..5cc15e4 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java
@@ -6,26 +6,22 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/
+ */
package org.apache.kylin.source.hive;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
+import com.google.common.collect.Lists;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
@@ -33,7 +29,10 @@ import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HiveCmdBuilder;
import org.apache.kylin.common.util.Pair;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
/**
* Hive meta API client for Kylin
@@ -42,7 +41,7 @@ import com.google.common.collect.Lists;
*/
public class CLIHiveClient implements IHiveClient {
protected HiveConf hiveConf = null;
- protected HiveMetaStoreClient metaStoreClient = null;
+ protected IMetaStoreClient metaStoreClient = null;
public CLIHiveClient() {
hiveConf = new HiveConf(CLIHiveClient.class);
@@ -160,21 +159,20 @@ public class CLIHiveClient implements IHiveClient {
return data;
}
- private HiveMetaStoreClient getMetaStoreClient() throws Exception {
+ private IMetaStoreClient getMetaStoreClient() throws Exception {
if (metaStoreClient == null) {
- metaStoreClient = new HiveMetaStoreClient(hiveConf);
+ metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf);
}
return metaStoreClient;
}
/**
* COPIED FROM org.apache.hadoop.hive.ql.stats.StatsUtil for backward compatibility
- *
+ * <p>
* Get basic stats of table
- * @param table
- * - table
- * @param statType
- * - type of stats
+ *
+ * @param table - table
+ * @param statType - type of stats
* @return value of stats
*/
private long getBasicStatForTable(org.apache.hadoop.hive.ql.metadata.Table table, String statType) {
diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
new file mode 100644
index 0000000..984623f
--- /dev/null
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.source.hive;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.kylin.common.KylinConfig;
+
+import java.io.IOException;
+
+public class HiveMetaStoreClientFactory {
+
+ /**
+ * Get hivemetastoreclient. At present, it supports hivecatalog and glue catalog. When it is configured as hcatalog,
+ * you can directly new hivemetastoreclient (hiveconf), which is more efficient.
+ * But if you need to use hcatutil.gethivemetastoreclient (hiveconf) to configure gluecatalog,
+ * you can get: com.amazon aws.glue.catalog.metastore.awsgluedatacataloghiveclientfactory according to the configuration file
+ *
+ * @param hiveConf
+ * @return metaStoreClient
+ * @throws MetaException
+ * @throws IOException
+ */
+ public static IMetaStoreClient getHiveMetaStoreClient(HiveConf hiveConf) throws MetaException, IOException {
+ IMetaStoreClient metaStoreClient = null;
+ if ("hcatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+ metaStoreClient = new HiveMetaStoreClient(hiveConf);
+ } else if ("gluecatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) {
+ metaStoreClient = HCatUtil.getHiveMetastoreClient(hiveConf);
+ }
+ return metaStoreClient;
+ }
+
+}