You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2017/07/11 17:44:36 UTC
[2/3] hive git commit: HIVE-17070 remove .orig files from src (Eugene
Koifman, reviewed by Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/7fc72367/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig
deleted file mode 100644
index da48a7c..0000000
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig
+++ /dev/null
@@ -1,4717 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.conf;
-
-import com.google.common.base.Joiner;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.classification.InterfaceAudience;
-import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
-import org.apache.hadoop.hive.conf.Validator.PatternSet;
-import org.apache.hadoop.hive.conf.Validator.RangeValidator;
-import org.apache.hadoop.hive.conf.Validator.RatioValidator;
-import org.apache.hadoop.hive.conf.Validator.SizeValidator;
-import org.apache.hadoop.hive.conf.Validator.StringSet;
-import org.apache.hadoop.hive.conf.Validator.TimeValidator;
-import org.apache.hadoop.hive.conf.Validator.WritableDirectoryValidator;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.util.Shell;
-import org.apache.hive.common.HiveCompat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.security.auth.login.LoginException;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.net.URI;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Hive Configuration.
- */
-public class HiveConf extends Configuration {
- protected String hiveJar;
- protected Properties origProp;
- protected String auxJars;
- private static final Logger l4j = LoggerFactory.getLogger(HiveConf.class);
- private static boolean loadMetastoreConfig = false;
- private static boolean loadHiveServer2Config = false;
- private static URL hiveDefaultURL = null;
- private static URL hiveSiteURL = null;
- private static URL hivemetastoreSiteUrl = null;
- private static URL hiveServer2SiteUrl = null;
-
- private static byte[] confVarByteArray = null;
-
-
- private static final Map<String, ConfVars> vars = new HashMap<String, ConfVars>();
- private static final Map<String, ConfVars> metaConfs = new HashMap<String, ConfVars>();
- private final List<String> restrictList = new ArrayList<String>();
- private final Set<String> hiddenSet = new HashSet<String>();
-
- private Pattern modWhiteListPattern = null;
- private volatile boolean isSparkConfigUpdated = false;
- private static final int LOG_PREFIX_LENGTH = 64;
-
- public boolean getSparkConfigUpdated() {
- return isSparkConfigUpdated;
- }
-
- public void setSparkConfigUpdated(boolean isSparkConfigUpdated) {
- this.isSparkConfigUpdated = isSparkConfigUpdated;
- }
-
- public interface EncoderDecoder<K, V> {
- V encode(K key);
- K decode(V value);
- }
-
- public static class URLEncoderDecoder implements EncoderDecoder<String, String> {
- private static final String UTF_8 = "UTF-8";
- @Override
- public String encode(String key) {
- try {
- return URLEncoder.encode(key, UTF_8);
- } catch (UnsupportedEncodingException e) {
- return key;
- }
- }
-
- @Override
- public String decode(String value) {
- try {
- return URLDecoder.decode(value, UTF_8);
- } catch (UnsupportedEncodingException e) {
- return value;
- }
- }
- }
- public static class EncoderDecoderFactory {
- public static final URLEncoderDecoder URL_ENCODER_DECODER = new URLEncoderDecoder();
- }
-
- static {
- ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
- if (classLoader == null) {
- classLoader = HiveConf.class.getClassLoader();
- }
-
- hiveDefaultURL = classLoader.getResource("hive-default.xml");
-
- // Look for hive-site.xml on the CLASSPATH and log its location if found.
- hiveSiteURL = findConfigFile(classLoader, "hive-site.xml", true);
- hivemetastoreSiteUrl = findConfigFile(classLoader, "hivemetastore-site.xml", false);
- hiveServer2SiteUrl = findConfigFile(classLoader, "hiveserver2-site.xml", false);
-
- for (ConfVars confVar : ConfVars.values()) {
- vars.put(confVar.varname, confVar);
- }
-
- Set<String> llapDaemonConfVarsSetLocal = new LinkedHashSet<>();
- populateLlapDaemonVarsSet(llapDaemonConfVarsSetLocal);
- llapDaemonVarsSet = Collections.unmodifiableSet(llapDaemonConfVarsSetLocal);
- }
-
- private static URL findConfigFile(ClassLoader classLoader, String name, boolean doLog) {
- URL result = classLoader.getResource(name);
- if (result == null) {
- String confPath = System.getenv("HIVE_CONF_DIR");
- result = checkConfigFile(new File(confPath, name));
- if (result == null) {
- String homePath = System.getenv("HIVE_HOME");
- String nameInConf = "conf" + File.pathSeparator + name;
- result = checkConfigFile(new File(homePath, nameInConf));
- if (result == null) {
- URI jarUri = null;
- try {
- jarUri = HiveConf.class.getProtectionDomain().getCodeSource().getLocation().toURI();
- } catch (Throwable e) {
- if (l4j.isInfoEnabled()) {
- l4j.info("Cannot get jar URI", e);
- }
- System.err.println("Cannot get jar URI: " + e.getMessage());
- }
- result = checkConfigFile(new File(new File(jarUri).getParentFile(), nameInConf));
- }
- }
- }
- if (doLog && l4j.isInfoEnabled()) {
- l4j.info("Found configuration file " + result);
- }
- return result;
- }
-
- private static URL checkConfigFile(File f) {
- try {
- return (f.exists() && f.isFile()) ? f.toURI().toURL() : null;
- } catch (Throwable e) {
- if (l4j.isInfoEnabled()) {
- l4j.info("Error looking for config " + f, e);
- }
- System.err.println("Error looking for config " + f + ": " + e.getMessage());
- return null;
- }
- }
-
-
-
-
- @InterfaceAudience.Private
- public static final String PREFIX_LLAP = "llap.";
- @InterfaceAudience.Private
- public static final String PREFIX_HIVE_LLAP = "hive.llap.";
-
- /**
- * Metastore related options that the db is initialized against. When a conf
- * var in this is list is changed, the metastore instance for the CLI will
- * be recreated so that the change will take effect.
- */
- public static final HiveConf.ConfVars[] metaVars = {
- HiveConf.ConfVars.METASTOREWAREHOUSE,
- HiveConf.ConfVars.REPLDIR,
- HiveConf.ConfVars.METASTOREURIS,
- HiveConf.ConfVars.METASTORE_SERVER_PORT,
- HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES,
- HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES,
- HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY,
- HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT,
- HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME,
- HiveConf.ConfVars.METASTOREPWD,
- HiveConf.ConfVars.METASTORECONNECTURLHOOK,
- HiveConf.ConfVars.METASTORECONNECTURLKEY,
- HiveConf.ConfVars.METASTORESERVERMINTHREADS,
- HiveConf.ConfVars.METASTORESERVERMAXTHREADS,
- HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE,
- HiveConf.ConfVars.METASTORE_INT_ORIGINAL,
- HiveConf.ConfVars.METASTORE_INT_ARCHIVED,
- HiveConf.ConfVars.METASTORE_INT_EXTRACTED,
- HiveConf.ConfVars.METASTORE_KERBEROS_KEYTAB_FILE,
- HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL,
- HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL,
- HiveConf.ConfVars.METASTORE_TOKEN_SIGNATURE,
- HiveConf.ConfVars.METASTORE_CACHE_PINOBJTYPES,
- HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE,
- HiveConf.ConfVars.METASTORE_VALIDATE_TABLES,
- HiveConf.ConfVars.METASTORE_DATANUCLEUS_INIT_COL_INFO,
- HiveConf.ConfVars.METASTORE_VALIDATE_COLUMNS,
- HiveConf.ConfVars.METASTORE_VALIDATE_CONSTRAINTS,
- HiveConf.ConfVars.METASTORE_STORE_MANAGER_TYPE,
- HiveConf.ConfVars.METASTORE_AUTO_CREATE_ALL,
- HiveConf.ConfVars.METASTORE_TRANSACTION_ISOLATION,
- HiveConf.ConfVars.METASTORE_CACHE_LEVEL2,
- HiveConf.ConfVars.METASTORE_CACHE_LEVEL2_TYPE,
- HiveConf.ConfVars.METASTORE_IDENTIFIER_FACTORY,
- HiveConf.ConfVars.METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK,
- HiveConf.ConfVars.METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS,
- HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX,
- HiveConf.ConfVars.METASTORE_EVENT_LISTENERS,
- HiveConf.ConfVars.METASTORE_TRANSACTIONAL_EVENT_LISTENERS,
- HiveConf.ConfVars.METASTORE_EVENT_CLEAN_FREQ,
- HiveConf.ConfVars.METASTORE_EVENT_EXPIRY_DURATION,
- HiveConf.ConfVars.METASTORE_EVENT_MESSAGE_FACTORY,
- HiveConf.ConfVars.METASTORE_FILTER_HOOK,
- HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL,
- HiveConf.ConfVars.METASTORE_END_FUNCTION_LISTENERS,
- HiveConf.ConfVars.METASTORE_PART_INHERIT_TBL_PROPS,
- HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX,
- HiveConf.ConfVars.METASTORE_INIT_HOOKS,
- HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS,
- HiveConf.ConfVars.HMSHANDLERATTEMPTS,
- HiveConf.ConfVars.HMSHANDLERINTERVAL,
- HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF,
- HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN,
- HiveConf.ConfVars.METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS,
- HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES,
- HiveConf.ConfVars.USERS_IN_ADMIN_ROLE,
- HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
- HiveConf.ConfVars.HIVE_TXN_MANAGER,
- HiveConf.ConfVars.HIVE_TXN_TIMEOUT,
- HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES,
- HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE,
- HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH,
- HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX,
- HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER,
- HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_FPP,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_TTL,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL,
- HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL,
- HiveConf.ConfVars.METASTORE_FASTPATH,
- HiveConf.ConfVars.METASTORE_HBASE_CATALOG_CACHE_SIZE,
- HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE,
- HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS,
- HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY,
- HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE,
- HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE,
- HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT,
- HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT,
- HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL,
- HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL,
- HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS,
- HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES,
- HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL,
- HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY,
- HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL,
- HiveConf.ConfVars.METASTORE_HBASE_FILE_METADATA_THREADS
- };
-
- /**
- * User configurable Metastore vars
- */
- public static final HiveConf.ConfVars[] metaConfVars = {
- HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL,
- HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL_DDL,
- HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT,
- HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN,
- HiveConf.ConfVars.METASTORE_CAPABILITY_CHECK
- };
-
- static {
- for (ConfVars confVar : metaConfVars) {
- metaConfs.put(confVar.varname, confVar);
- }
- }
-
- public static final String HIVE_LLAP_DAEMON_SERVICE_PRINCIPAL_NAME = "hive.llap.daemon.service.principal";
- public static final String HIVE_SERVER2_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME =
- "hive.server2.authentication.ldap.userMembershipKey";
-
- /**
- * dbVars are the parameters can be set per database. If these
- * parameters are set as a database property, when switching to that
- * database, the HiveConf variable will be changed. The change of these
- * parameters will effectively change the DFS and MapReduce clusters
- * for different databases.
- */
- public static final HiveConf.ConfVars[] dbVars = {
- HiveConf.ConfVars.HADOOPBIN,
- HiveConf.ConfVars.METASTOREWAREHOUSE,
- HiveConf.ConfVars.SCRATCHDIR
- };
-
- /**
- * Variables used by LLAP daemons.
- * TODO: Eventually auto-populate this based on prefixes. The conf variables
- * will need to be renamed for this.
- */
- private static final Set<String> llapDaemonVarsSet;
-
- private static void populateLlapDaemonVarsSet(Set<String> llapDaemonVarsSetLocal) {
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_ENABLED.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_MEMORY_MODE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_MIN_ALLOC.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_MAX_ALLOC.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_ARENA_COUNT.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_DIRECT.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_USE_LRFU.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_LRFU_LAMBDA.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_USE_FILEID_PATH.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ORC_ENABLE_TIME_COUNTERS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_KERBEROS_PRINCIPAL.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_KERBEROS_KEYTAB_FILE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_KERBEROS_PRINCIPAL.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_KERBEROS_KEYTAB_FILE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_ZK_CONNECTION_STRING.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_SECURITY_ACL.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_ACL.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_SECURITY_ACL_DENY.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_ACL_DENY.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DELEGATION_TOKEN_LIFETIME.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_RPC_PORT.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_WEB_AUTO_AUTH.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_NUM_HANDLERS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WORK_DIRS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_YARN_SHUFFLE_PORT.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SHUFFLE_DIR_WATCHER_ENABLED.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_HEARTBEAT_INTERVAL_MS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_CONNECTION_TIMEOUT_MS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_CONNECTION_SLEEP_BETWEEN_RETRIES_MS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_PORT.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_XMX_HEADROOM.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_FILE_CLEANER_THREADS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_FILE_CLEANUP_DELAY_SECONDS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SERVICE_REFRESH_INTERVAL.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOW_PERMANENT_FNS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_DOWNLOAD_PERMANENT_FNS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_SCHEDULER_WAIT_QUEUE_SIZE.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WAIT_QUEUE_COMPARATOR_CLASS_NAME.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_SCHEDULER_ENABLE_PREEMPTION.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_PREEMPTION_METRICS_INTERVALS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WEB_PORT.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WEB_SSL.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname);
- }
-
- /**
- * Get a set containing configuration parameter names used by LLAP Server isntances
- * @return an unmodifiable set containing llap ConfVars
- */
- public static final Set<String> getLlapDaemonConfVars() {
- return llapDaemonVarsSet;
- }
-
-
- /**
- * ConfVars.
- *
- * These are the default configuration properties for Hive. Each HiveConf
- * object is initialized as follows:
- *
- * 1) Hadoop configuration properties are applied.
- * 2) ConfVar properties with non-null values are overlayed.
- * 3) hive-site.xml properties are overlayed.
- *
- * WARNING: think twice before adding any Hadoop configuration properties
- * with non-null values to this list as they will override any values defined
- * in the underlying Hadoop configuration.
- */
- public static enum ConfVars {
- // QL execution stuff
- SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""),
- PLAN("hive.exec.plan", "", ""),
- STAGINGDIR("hive.exec.stagingdir", ".hive-staging",
- "Directory name that will be created inside table locations in order to support HDFS encryption. " +
- "This is replaces ${hive.exec.scratchdir} for query results with the exception of read-only tables. " +
- "In all cases ${hive.exec.scratchdir} is still used for other temporary files, such as job plans."),
- SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive",
- "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " +
- "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, " +
- "with ${hive.scratch.dir.permission}."),
- REPLDIR("hive.repl.rootdir","/user/hive/repl/",
- "HDFS root dir for all replication dumps."),
- REPLCMENABLED("hive.repl.cm.enabled", false,
- "Turn on ChangeManager, so delete files will go to cmrootdir."),
- REPLCMDIR("hive.repl.cmrootdir","/user/hive/cmroot/",
- "Root dir for ChangeManager, used for deleted files."),
- REPLCMRETIAN("hive.repl.cm.retain","24h",
- new TimeValidator(TimeUnit.HOURS),
- "Time to retain removed files in cmrootdir."),
- REPLCMINTERVAL("hive.repl.cm.interval","3600s",
- new TimeValidator(TimeUnit.SECONDS),
- "Inteval for cmroot cleanup thread."),
- REPL_FUNCTIONS_ROOT_DIR("hive.repl.replica.functions.root.dir","/user/hive/repl/functions/",
- "Root directory on the replica warehouse where the repl sub-system will store jars from the primary warehouse"),
- LOCALSCRATCHDIR("hive.exec.local.scratchdir",
- "${system:java.io.tmpdir}" + File.separator + "${system:user.name}",
- "Local scratch space for Hive jobs"),
- DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir",
- "${system:java.io.tmpdir}" + File.separator + "${hive.session.id}_resources",
- "Temporary local directory for added resources in the remote file system."),
- SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700",
- "The permission for the user specific scratch directories that get created."),
- SUBMITVIACHILD("hive.exec.submitviachild", false, ""),
- SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true,
- "Determines whether local tasks (typically mapjoin hashtable generation phase) runs in \n" +
- "separate JVM (true recommended) or not. \n" +
- "Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues."),
- SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000,
- "Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). \n" +
- "This prevents runaway scripts from filling logs partitions to capacity"),
- ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false,
- "When enabled, this option allows a user script to exit successfully without consuming \n" +
- "all the data from the standard input."),
- STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:",
- "Streaming jobs that log to standard error with this prefix can log counter or status information."),
- STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true,
- "Enable consumption of status and counter messages for streaming jobs."),
- COMPRESSRESULT("hive.exec.compress.output", false,
- "This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. \n" +
- "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"),
- COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false,
- "This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. \n" +
- "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"),
- COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", "", ""),
- COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", "", ""),
- BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (256 * 1000 * 1000),
- "size per reducer.The default is 256Mb, i.e if the input size is 1G, it will use 4 reducers."),
- MAXREDUCERS("hive.exec.reducers.max", 1009,
- "max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is\n" +
- "negative, Hive will use this one as the max number of reducers when automatically determine number of reducers."),
- PREEXECHOOKS("hive.exec.pre.hooks", "",
- "Comma-separated list of pre-execution hooks to be invoked for each statement. \n" +
- "A pre-execution hook is specified as the name of a Java class which implements the \n" +
- "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."),
- POSTEXECHOOKS("hive.exec.post.hooks", "",
- "Comma-separated list of post-execution hooks to be invoked for each statement. \n" +
- "A post-execution hook is specified as the name of a Java class which implements the \n" +
- "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."),
- ONFAILUREHOOKS("hive.exec.failure.hooks", "",
- "Comma-separated list of on-failure hooks to be invoked for each statement. \n" +
- "An on-failure hook is specified as the name of Java class which implements the \n" +
- "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."),
- QUERYREDACTORHOOKS("hive.exec.query.redactor.hooks", "",
- "Comma-separated list of hooks to be invoked for each query which can \n" +
- "tranform the query before it's placed in the job.xml file. Must be a Java class which \n" +
- "extends from the org.apache.hadoop.hive.ql.hooks.Redactor abstract class."),
- CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", "",
- "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" +
- "A client stats publisher is specified as the name of a Java class which implements the \n" +
- "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."),
- ATSHOOKQUEUECAPACITY("hive.ats.hook.queue.capacity", 64,
- "Queue size for the ATS Hook executor. If the number of outstanding submissions \n" +
- "to the ATS executor exceed this amount, the Hive ATS Hook will not try to log queries to ATS."),
- EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"),
- EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8,
- "How many jobs at most can be executed in parallel"),
- HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true,
- "Whether speculative execution for reducers should be turned on. "),
- HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L,
- "The interval with which to poll the JobTracker for the counters the running job. \n" +
- "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."),
- DYNAMICPARTITIONING("hive.exec.dynamic.partition", true,
- "Whether or not to allow dynamic partitions in DML/DDL."),
- DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict",
- "In strict mode, the user must specify at least one static partition\n" +
- "in case the user accidentally overwrites all partitions.\n" +
- "In nonstrict mode all partitions are allowed to be dynamic."),
- DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000,
- "Maximum number of dynamic partitions allowed to be created in total."),
- DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100,
- "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."),
- MAXCREATEDFILES("hive.exec.max.created.files", 100000L,
- "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."),
- DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__",
- "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" +
- "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" +
- "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."),
- DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__", ""),
-
- // Whether to show a link to the most failed task + debugging tips
- SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true,
- "If a job fails, whether to provide a link in the CLI to the task with the\n" +
- "most failures, along with debugging hints if applicable."),
- JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true,
- "Whether or not stack traces parsed from the task logs of a sampled failed task \n" +
- "for each failed job should be stored in the SessionState"),
- JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000, ""),
- TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000, ""),
- OUTPUT_FILE_EXTENSION("hive.output.file.extension", null,
- "String used as a file extension for output files. \n" +
- "If not set, defaults to the codec extension for text files (e.g. \".gz\"), or no extension otherwise."),
-
- HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true),
- HIVE_IN_TEST_SHORT_LOGS("hive.in.test.short.logs", false,
- "internal usage only, used only in test mode. If set true, when requesting the " +
- "operation logs the short version (generated by LogDivertAppenderForTest) will be " +
- "returned"),
- HIVE_IN_TEST_REMOVE_LOGS("hive.in.test.remove.logs", true,
- "internal usage only, used only in test mode. If set false, the operation logs, and the " +
- "operation log directory will not be removed, so they can be found after the test runs."),
-
- HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez",
- true),
-
- LOCALMODEAUTO("hive.exec.mode.local.auto", false,
- "Let Hive determine whether to run in local mode automatically"),
- LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L,
- "When hive.exec.mode.local.auto is true, input bytes should less than this for local mode."),
- LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4,
- "When hive.exec.mode.local.auto is true, the number of tasks should less than this for local mode."),
-
- DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true,
- "Do not report an error if DROP TABLE/VIEW/Index/Function specifies a non-existent table/view/index/function"),
-
- HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true, "Ignore the mapjoin hint"),
-
- HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100,
- "maximum number of lines for footer user can define for a table file"),
-
- HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true,
- "Make column names unique in the result set by qualifying column names with table alias if needed.\n" +
- "Table alias will be added to column names for queries of type \"select *\" or \n" +
- "if query explicitly uses table alias \"select r1.x..\"."),
-
- // Hadoop Configuration Properties
- // Properties with null values are ignored and exist only for the purpose of giving us
- // a symbolic name to reference in the Hive source code. Properties with non-null
- // values will override any values set in the underlying Hadoop configuration.
- HADOOPBIN("hadoop.bin.path", findHadoopBinary(), "", true),
- YARNBIN("yarn.bin.path", findYarnBinary(), "", true),
- HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem",
- "The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20"),
- MAPREDMAXSPLITSIZE(FileInputFormat.SPLIT_MAXSIZE, 256000000L, "", true),
- MAPREDMINSPLITSIZE(FileInputFormat.SPLIT_MINSIZE, 1L, "", true),
- MAPREDMINSPLITSIZEPERNODE(CombineFileInputFormat.SPLIT_MINSIZE_PERNODE, 1L, "", true),
- MAPREDMINSPLITSIZEPERRACK(CombineFileInputFormat.SPLIT_MINSIZE_PERRACK, 1L, "", true),
- // The number of reduce tasks per job. Hadoop sets this value to 1 by default
- // By setting this property to -1, Hive will automatically determine the correct
- // number of reducers.
- HADOOPNUMREDUCERS("mapreduce.job.reduces", -1, "", true),
-
- // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here!
- METASTOREDBTYPE("hive.metastore.db.type", "DERBY", new StringSet("DERBY", "ORACLE", "MYSQL", "MSSQL", "POSTGRES"),
- "Type of database used by the metastore. Information schema & JDBCStorageHandler depend on it."),
- METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse",
- "location of default database for the warehouse"),
- METASTOREURIS("hive.metastore.uris", "",
- "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."),
-
- METASTORE_CAPABILITY_CHECK("hive.metastore.client.capability.check", true,
- "Whether to check client capabilities for potentially breaking API usage."),
- METASTORE_FASTPATH("hive.metastore.fastpath", false,
- "Used to avoid all of the proxies and object copies in the metastore. Note, if this is " +
- "set, you MUST use a local metastore (hive.metastore.uris must be empty) otherwise " +
- "undefined and most likely undesired behavior will result"),
- METASTORE_FS_HANDLER_THREADS_COUNT("hive.metastore.fshandler.threads", 15,
- "Number of threads to be allocated for metastore handler for fs operations."),
- METASTORE_HBASE_CATALOG_CACHE_SIZE("hive.metastore.hbase.catalog.cache.size", 50000, "Maximum number of " +
- "objects we will place in the hbase metastore catalog cache. The objects will be divided up by " +
- "types that we need to cache."),
- METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.hbase.aggregate.stats.cache.size", 10000,
- "Maximum number of aggregate stats nodes that we will place in the hbase metastore aggregate stats cache."),
- METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.hbase.aggregate.stats.max.partitions", 10000,
- "Maximum number of partitions that are aggregated per cache node."),
- METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY("hive.metastore.hbase.aggregate.stats.false.positive.probability",
- (float) 0.01, "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."),
- METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.hbase.aggregate.stats.max.variance", (float) 0.1,
- "Maximum tolerable variance in number of partitions between a cached node and our request (default 10%)."),
- METASTORE_HBASE_CACHE_TIME_TO_LIVE("hive.metastore.hbase.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS),
- "Number of seconds for a cached node to be active in the cache before they become stale."),
- METASTORE_HBASE_CACHE_MAX_WRITER_WAIT("hive.metastore.hbase.cache.max.writer.wait", "5000ms", new TimeValidator(TimeUnit.MILLISECONDS),
- "Number of milliseconds a writer will wait to acquire the writelock before giving up."),
- METASTORE_HBASE_CACHE_MAX_READER_WAIT("hive.metastore.hbase.cache.max.reader.wait", "1000ms", new TimeValidator(TimeUnit.MILLISECONDS),
- "Number of milliseconds a reader will wait to acquire the readlock before giving up."),
- METASTORE_HBASE_CACHE_MAX_FULL("hive.metastore.hbase.cache.max.full", (float) 0.9,
- "Maximum cache full % after which the cache cleaner thread kicks in."),
- METASTORE_HBASE_CACHE_CLEAN_UNTIL("hive.metastore.hbase.cache.clean.until", (float) 0.8,
- "The cleaner thread cleans until cache reaches this % full size."),
- METASTORE_HBASE_CONNECTION_CLASS("hive.metastore.hbase.connection.class",
- "org.apache.hadoop.hive.metastore.hbase.VanillaHBaseConnection",
- "Class used to connection to HBase"),
- METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES("hive.metastore.hbase.aggr.stats.cache.entries",
- 10000, "How many in stats objects to cache in memory"),
- METASTORE_HBASE_AGGR_STATS_MEMORY_TTL("hive.metastore.hbase.aggr.stats.memory.ttl", "60s",
- new TimeValidator(TimeUnit.SECONDS),
- "Number of seconds stats objects live in memory after they are read from HBase."),
- METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY(
- "hive.metastore.hbase.aggr.stats.invalidator.frequency", "5s",
- new TimeValidator(TimeUnit.SECONDS),
- "How often the stats cache scans its HBase entries and looks for expired entries"),
- METASTORE_HBASE_AGGR_STATS_HBASE_TTL("hive.metastore.hbase.aggr.stats.hbase.ttl", "604800s",
- new TimeValidator(TimeUnit.SECONDS),
- "Number of seconds stats entries live in HBase cache after they are created. They may be" +
- " invalided by updates or partition drops before this. Default is one week."),
- METASTORE_HBASE_FILE_METADATA_THREADS("hive.metastore.hbase.file.metadata.threads", 1,
- "Number of threads to use to read file metadata in background to cache it."),
-
- METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3,
- "Number of retries while opening a connection to metastore"),
- METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1,
- "Number of retries upon failure of Thrift metastore calls"),
- METASTORE_SERVER_PORT("hive.metastore.port", 9083, "Hive metastore listener port"),
- METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", "1s",
- new TimeValidator(TimeUnit.SECONDS),
- "Number of seconds for the client to wait between consecutive connection attempts"),
- METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", "600s",
- new TimeValidator(TimeUnit.SECONDS),
- "MetaStore Client socket timeout in seconds"),
- METASTORE_CLIENT_SOCKET_LIFETIME("hive.metastore.client.socket.lifetime", "0s",
- new TimeValidator(TimeUnit.SECONDS),
- "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" +
- "reconnects on the next MetaStore operation. A value of 0s means the connection\n" +
- "has an infinite lifetime."),
- METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine",
- "password to use against metastore database"),
- METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "",
- "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"),
- METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true,
- "Set this to true if multiple threads access metastore through JDO concurrently."),
- METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL",
- "jdbc:derby:;databaseName=metastore_db;create=true",
- "JDBC connect string for a JDBC metastore.\n" +
- "To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.\n" +
- "For example, jdbc:postgresql://myhost/db?ssl=true for postgres database."),
- METASTORE_DBACCESS_SSL_PROPS("hive.metastore.dbaccess.ssl.properties", "",
- "Comma-separated SSL properties for metastore to access database when JDO connection URL\n" +
- "enables SSL access. e.g. javax.net.ssl.trustStore=/tmp/truststore,javax.net.ssl.trustStorePassword=pwd."),
- HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 10,
- "The number of times to retry a HMSHandler call if there were a connection error."),
- HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "2000ms",
- new TimeValidator(TimeUnit.MILLISECONDS), "The time between HMSHandler retry attempts on failure."),
- HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false,
- "Whether to force reloading of the HMSHandler configuration (including\n" +
- "the connection URL, before the next metastore query that accesses the\n" +
- "datastore. Once reloaded, this value is reset to false. Used for\n" +
- "testing only."),
- METASTORESERVERMAXMESSAGESIZE("hive.metastore.server.max.message.size", 100*1024*1024L,
- "Maximum message size in bytes a HMS will accept."),
- METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200,
- "Minimum number of worker threads in the Thrift server's pool."),
- METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 1000,
- "Maximum number of worker threads in the Thrift server's pool."),
- METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true,
- "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."),
-
- METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original",
- "_INTERMEDIATE_ORIGINAL",
- "Intermediate dir suffixes used for archiving. Not important what they\n" +
- "are, as long as collisions are avoided"),
- METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived",
- "_INTERMEDIATE_ARCHIVED", ""),
- METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted",
- "_INTERMEDIATE_EXTRACTED", ""),
- METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", "",
- "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."),
- METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal",
- "hive-metastore/_HOST@EXAMPLE.COM",
- "The service principal for the metastore Thrift server. \n" +
- "The special string _HOST will be replaced automatically with the correct host name."),
- METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false,
- "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."),
- METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false,
- "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."),
- METASTORE_USE_THRIFT_COMPACT_PROTOCOL("hive.metastore.thrift.compact.protocol.enabled", false,
- "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" +
- "Setting it to true will break compatibility with older clients running TBinaryProtocol."),
- METASTORE_TOKEN_SIGNATURE("hive.metastore.token.signature", "",
- "The delegation token service name to match when selecting a token from the current user's tokens."),
- METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS("hive.cluster.delegation.token.store.class",
- "org.apache.hadoop.hive.thrift.MemoryTokenStore",
- "The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster."),
- METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_CONNECTSTR(
- "hive.cluster.delegation.token.store.zookeeper.connectString", "",
- "The ZooKeeper token store connect string. You can re-use the configuration value\n" +
- "set in hive.zookeeper.quorum, by leaving this parameter unset."),
- METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ZNODE(
- "hive.cluster.delegation.token.store.zookeeper.znode", "/hivedelegation",
- "The root path for token store data. Note that this is used by both HiveServer2 and\n" +
- "MetaStore to store delegation Token. One directory gets created for each of them.\n" +
- "The final directory names would have the servername appended to it (HIVESERVER2,\n" +
- "METASTORE)."),
- METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ACL(
- "hive.cluster.delegation.token.store.zookeeper.acl", "",
- "ACL for token store entries. Comma separated list of ACL entries. For example:\n" +
- "sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa\n" +
- "Defaults to all permissions for the hiveserver2/metastore process user."),
- METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order",
- "List of comma separated metastore object types that should be pinned in the cache"),
- METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "HikariCP", new StringSet("BONECP", "DBCP",
- "HikariCP", "NONE"),
- "Specify connection pool library for datanucleus"),
- METASTORE_CONNECTION_POOLING_MAX_CONNECTIONS("datanucleus.connectionPool.maxPoolSize", 10,
- "Specify the maximum number of connections in the connection pool. Note: The configured size will be used by\n" +
- "2 connection pools (TxnHandler and ObjectStore). When configuring the max connection pool size, it is\n" +
- "recommended to take into account the number of metastore instances and the number of HiveServer2 instances\n" +
- "configured with embedded metastore. To get optimal performance, set config to meet the following condition\n"+
- "(2 * pool_size * metastore_instances + 2 * pool_size * HS2_instances_with_embedded_metastore) = \n" +
- "(2 * physical_core_count + hard_disk_count)."),
- // Workaround for DN bug on Postgres:
- // http://www.datanucleus.org/servlet/forum/viewthread_thread,7985_offset
- METASTORE_DATANUCLEUS_INIT_COL_INFO("datanucleus.rdbms.initializeColumnInfo", "NONE",
- "initializeColumnInfo setting for DataNucleus; set to NONE at least on Postgres."),
- METASTORE_VALIDATE_TABLES("datanucleus.schema.validateTables", false,
- "validates existing schema against code. turn this on if you want to verify existing schema"),
- METASTORE_VALIDATE_COLUMNS("datanucleus.schema.validateColumns", false,
- "validates existing schema against code. turn this on if you want to verify existing schema"),
- METASTORE_VALIDATE_CONSTRAINTS("datanucleus.schema.validateConstraints", false,
- "validates existing schema against code. turn this on if you want to verify existing schema"),
- METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms", "metadata store type"),
- METASTORE_AUTO_CREATE_ALL("datanucleus.schema.autoCreateAll", false,
- "Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once."
- + "To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not "
- + "recommended for production use cases, run schematool command instead." ),
- METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", true,
- "Enforce metastore schema version consistency.\n" +
- "True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic\n" +
- " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" +
- " proper metastore schema migration. (Default)\n" +
- "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."),
- METASTORE_SCHEMA_VERIFICATION_RECORD_VERSION("hive.metastore.schema.verification.record.version", false,
- "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" +
- " enabled the MS will be unusable."),
- METASTORE_SCHEMA_INFO_CLASS("hive.metastore.schema.info.class",
- "org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo",
- "Fully qualified class name for the metastore schema information class \n"
- + "which is used by schematool to fetch the schema information.\n"
- + " This class should implement the IMetaStoreSchemaInfo interface"),
- METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed",
- "Default transaction isolation level for identity generation."),
- METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false,
- "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"),
- METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none", ""),
- METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1",
- "Name of the identifier factory to use when generating table/column names etc. \n" +
- "'datanucleus1' is used for backward compatibility with DataNucleus v1"),
- METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""),
- METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG",
- "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"),
- METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300,
- "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" +
- "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" +
- "but it may also cause higher memory requirement at the client side."),
- METASTORE_BATCH_RETRIEVE_OBJECTS_MAX(
- "hive.metastore.batch.retrieve.table.partition.max", 1000,
- "Maximum number of objects that metastore internally retrieves in one batch."),
-
- METASTORE_INIT_HOOKS("hive.metastore.init.hooks", "",
- "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" +
- "An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener."),
- METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", "",
- "List of comma separated listeners for metastore events."),
- METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", "",
- "A comma separated list of Java classes that implement the org.apache.hadoop.hive.metastore.MetaStoreEventListener" +
- " interface. The metastore event and corresponding listener method will be invoked in separate JDO transactions. " +
- "Alternatively, configure hive.metastore.transactional.event.listeners to ensure both are invoked in same JDO transaction."),
- METASTORE_TRANSACTIONAL_EVENT_LISTENERS("hive.metastore.transactional.event.listeners", "",
- "A comma separated list of Java classes that implement the org.apache.hadoop.hive.metastore.MetaStoreEventListener" +
- " interface. Both the metastore event and corresponding listener method will be invoked in the same JDO transaction."),
- METASTORE_EVENT_DB_LISTENER_TTL("hive.metastore.event.db.listener.timetolive", "86400s",
- new TimeValidator(TimeUnit.SECONDS),
- "time after which events will be removed from the database listener queue"),
- METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false,
- "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" +
- "for operations like drop-partition (disallow the drop-partition if the user in\n" +
- "question doesn't have permissions to delete the corresponding directory\n" +
- "on the storage)."),
- METASTORE_AUTHORIZATION_EXTERNALTABLE_DROP_CHECK("hive.metastore.authorization.storage.check.externaltable.drop", true,
- "Should StorageBasedAuthorization check permission of the storage before dropping external table.\n" +
- "StorageBasedAuthorization already does this check for managed table. For external table however,\n" +
- "anyone who has read permission of the directory could drop external table, which is surprising.\n" +
- "The flag is set to false by default to maintain backward compatibility."),
- METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq", "0s",
- new TimeValidator(TimeUnit.SECONDS),
- "Frequency at which timer task runs to purge expired events in metastore."),
- METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration", "0s",
- new TimeValidator(TimeUnit.SECONDS),
- "Duration after which events expire from events table"),
- METASTORE_EVENT_MESSAGE_FACTORY("hive.metastore.event.message.factory",
- "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory",
- "Factory class for making encoding and decoding messages in the events generated."),
- METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true,
- "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" +
- "the client's reported user and group permissions. Note that this property must be set on \n" +
- "both the client and server sides. Further note that its best effort. \n" +
- "If client sets its to true and server sets it to false, client setting will be ignored."),
- METASTORE_PARTITION_NAME_WHITELIST_PATTERN("hive.metastore.partition.name.whitelist.pattern", "",
- "Partition names will be checked against this regex pattern and rejected if not matched."),
-
- METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false,
- "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" +
- "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" +
- "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" +
- "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" +
- "is also irrelevant."),
- METASTORE_TRY_DIRECT_SQL("hive.metastore.try.direct.sql", true,
- "Whether the Hive metastore should try to use direct SQL queries instead of the\n" +
- "DataNucleus for certain read paths. This can improve metastore performance when\n" +
- "fetching many partitions or column statistics by orders of magnitude; however, it\n" +
- "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" +
- "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" +
- "work for all queries on your datastore. If all SQL queries fail (for example, your\n" +
- "metastore is backed by MongoDB), you might want to disable this to save the\n" +
- "try-and-fall-back cost."),
- METASTORE_DIRECT_SQL_PARTITION_BATCH_SIZE("hive.metastore.direct.sql.batch.size", 0,
- "Batch size for partition and other object retrieval from the underlying DB in direct\n" +
- "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" +
- "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" +
- "may impede performance. -1 means no batching, 0 means automatic batching."),
- METASTORE_TRY_DIRECT_SQL_DDL("hive.metastore.try.direct.sql.ddl", true,
- "Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" +
- "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" +
- "select query has incorrect syntax or something similar inside a transaction, the\n" +
- "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" +
- "should disable the usage of direct SQL inside transactions if that happens in your case."),
- METASTORE_DIRECT_SQL_MAX_QUERY_LENGTH("hive.direct.sql.max.query.length", 100, "The maximum\n" +
- " size of a query string (in KB)."),
- METASTORE_DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE("hive.direct.sql.max.elements.in.clause", 1000,
- "The maximum number of values in a IN clause. Once exceeded, it will be broken into\n" +
- " multiple OR separated IN clauses."),
- METASTORE_DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("hive.direct.sql.max.elements.values.clause",
- 1000, "The maximum number of values in a VALUES clause for INSERT statement."),
- METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false,
- "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " +
- "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " +
- "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " +
- "pruning is the correct behaviour"),
- METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES(
- "hive.metastore.disallow.incompatible.col.type.changes", true,
- "If true (default is false), ALTER TABLE operations which change the type of a\n" +
- "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" +
- "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" +
- "datatypes can be converted from string to any type. The map is also serialized as\n" +
- "a string, which can be read as a string as well. However, with any binary\n" +
- "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" +
- "when subsequently trying to access old partitions.\n" +
- "\n" +
- "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" +
- "not blocked.\n" +
- "\n" +
- "See HIVE-4409 for more details."),
- METASTORE_LIMIT_PARTITION_REQUEST("hive.metastore.limit.partition.request", -1,
- "This limits the number of partitions that can be requested from the metastore for a given table.\n" +
- "The default value \"-1\" means no limit."),
-
- NEWTABLEDEFAULTPARA("hive.table.parameters.default", "",
- "Default property values for newly created tables"),
- DDL_CTL_PARAMETERS_WHITELIST("hive.ddl.createtablelike.properties.whitelist", "",
- "Table Properties to copy over when executing a Create Table Like."),
- METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore",
- "Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. \n" +
- "This class is used to store and retrieval of raw metadata objects such as table, database"),
- METASTORE_CACHED_RAW_STORE_IMPL("hive.metastore.cached.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore",
- "Name of the wrapped RawStore class"),
- METASTORE_CACHED_RAW_STORE_CACHE_UPDATE_FREQUENCY(
- "hive.metastore.cached.rawstore.cache.update.frequency", "60", new TimeValidator(
- TimeUnit.SECONDS),
- "The time after which metastore cache is updated from metastore DB."),
- METASTORE_TXN_STORE_IMPL("hive.metastore.txn.store.impl",
- "org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler",
- "Name of class that implements org.apache.hadoop.hive.metastore.txn.TxnStore. This " +
- "class is used to store and retrieve transactions and locks"),
- METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver",
- "Driver class name for a JDBC metastore"),
- METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass",
- "org.datanucleus.api.jdo.JDOPersistenceManagerFactory",
- "class implementing the jdo persistence"),
- METASTORE_EXPRESSION_PROXY_CLASS("hive.metastore.expression.proxy",
- "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", ""),
- METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true,
- "Detaches all objects from session so that they can be used after transaction is committed"),
- METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true,
- "Reads outside of transactions"),
- METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP",
- "Username to use against metastore database"),
- METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", "",
- "List of comma separated listeners for the end of metastore functions."),
- METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties", "",
- "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" +
- "* implies all the keys will get inherited."),
- METASTORE_FILTER_HOOK("hive.metastore.filter.hook", "org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl",
- "Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager"
- + "is set to instance of HiveAuthorizerFactory, then this value is ignored."),
- FIRE_EVENTS_FOR_DML("hive.metastore.dml.events", false, "If true, the metastore will be asked" +
- " to fire events for DML operations"),
- METASTORE_CLIENT_DROP_PARTITIONS_WITH_EXPRESSIONS("hive.metastore.client.drop.partitions.using.expressions", true,
- "Choose whether dropping partitions with HCatClient pushes the partition-predicate to the metastore, " +
- "or drops partitions iteratively"),
-
- METASTORE_AGGREGATE_STATS_CACHE_ENABLED("hive.metastore.aggregate.stats.cache.enabled", true,
- "Whether aggregate stats caching is enabled or not."),
- METASTORE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.aggregate.stats.cache.size", 10000,
- "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."),
- METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.aggregate.stats.cache.max.partitions", 10000,
- "Maximum number of partitions that are aggregated per cache node."),
- METASTORE_AGGREGATE_STATS_CACHE_FPP("hive.metastore.aggregate.stats.cache.fpp", (float) 0.01,
- "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."),
- METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.aggregate.stats.cache.max.variance", (float) 0.01,
- "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."),
- METASTORE_AGGREGATE_STATS_CACHE_TTL("hive.metastore.aggregate.stats.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS),
- "Number of seconds for a cached node to be active in the cache before they become stale."),
- METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("hive.metastore.aggregate.stats.cache.max.writer.wait", "5000ms",
- new TimeValidator(TimeUnit.MILLISECONDS),
- "Number of milliseconds a writer will wait to acquire the writelock before giving up."),
- METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT("hive.metastore.aggregate.stats.cache.max.reader.wait", "1000ms",
- new TimeValidator(TimeUnit.MILLISECONDS),
- "Number of milliseconds a reader will wait to acquire the readlock before giving up."),
- METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL("hive.metastore.aggregate.stats.cache.max.full", (float) 0.9,
- "Maximum cache full % after which the cache cleaner thread kicks in."),
- METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL("hive.metastore.aggregate.stats.cache.clean.until", (float) 0.8,
- "The cleaner thread cleans until cache reaches this % full size."),
- METASTORE_METRICS("hive.metastore.metrics.enabled", false, "Enable metrics on the metastore."),
- METASTORE_INIT_METADATA_COUNT_ENABLED("hive.metastore.initial.metadata.count.enabled", true,
- "Enable a metadata count at metastore startup for metrics."),
-
- // Metastore SSL settings
- HIVE_METASTORE_USE_SSL("hive.metastore.use.SSL", false,
- "Set this to true for using SSL encryption in HMS server."),
- HIVE_METASTORE_SSL_KEYSTORE_PATH("hive.metastore.keystore.path", "",
- "Metastore SSL certificate keystore location."),
- HIVE_METASTORE_SSL_KEYSTORE_PASSWORD("hive.metastore.keystore.password", "",
- "Metastore SSL certificate keystore password."),
- HIVE_METASTORE_SSL_TRUSTSTORE_PATH("hive.metastore.truststore.path", "",
- "Metastore SSL certificate truststore location."),
- HIVE_METASTORE_SSL_TRUSTSTORE_PASSWORD("hive.metastore.truststore.password", "",
- "Metastore SSL certificate truststore password."),
-
- // Parameters for exporting metadata on table drop (requires the use of the)
- // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener
- METADATA_EXPORT_LOCATION("hive.metadata.export.location", "",
- "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
- "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" +
- "metadata being exported to the current user's home directory on HDFS."),
- MOVE_EXPORTED_METADATA_TO_TRASH("hive.metadata.move.exported.metadata.to.trash", true,
- "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
- "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" +
- "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."),
-
- // CLI
- CLIIGNOREERRORS("hive.cli.errors.ignore", false, ""),
- CLIPRINTCURRENTDB("hive.cli.print.current.db", false,
- "Whether to include the current database in the Hive prompt."),
- CLIPROMPT("hive.cli.prompt", "hive",
- "Command line prompt configuration value. Other hiveconf can be used in this configuration value. \n" +
- "Variable substitution will only be invoked at the Hive CLI startup."),
- CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1,
- "The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.\n" +
- "If the value of this property is -1, then Hive will use the auto-detected terminal width."),
-
- HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""),
-
- // Things we log in the jobconf
-
- // session identifier
- HIVESESSIONID("hive.session.id", "", ""),
- // whether session is running in silent mode or not
- HIVESESSIONSILENT("hive.session.silent", false, ""),
-
- HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false,
- "Whether to log Hive query, query plan, runtime statistics etc."),
-
- HIVEQUERYSTRING("hive.query.string", "",
- "Query being executed (might be multiple per a session)"),
-
- HIVEQUERYID("hive.query.id", "",
- "ID for query being executed (might be multiple per a session)"),
-
- HIVEJOBNAMELENGTH("hive.jobname.length", 50, "max jobname length"),
-
- // hive jar
- HIVEJAR("hive.jar.path", "",
- "The location of hive_cli.jar that is used when submitting jobs in a separate jvm."),
- HIVEAUXJARS("hive.aux.jars.path", "",
- "The location of the plugin jars that contain implementations of user defined functions and serdes."),
-
- // reloadable jars
- HIVERELOADABLEJARS("hive.reloadable.aux.jars.path", "",
- "The locations of the plugin jars, which can be a comma-separated folders or jars. Jars can be renewed\n"
- + "by executing reload command. And these jars can be "
- + "used as the auxiliary classes like creating a UDF or SerDe."),
-
- // hive added files and jars
- HIVEADDEDFILES("hive.added.files.path", "", "This an internal parameter."),
- HIVEADDEDJARS("hive.added.jars.path", "", "This an internal parameter."),
- HIVEADDEDARCHIVES("hive.added.archives.path", "", "This an internal parameter."),
-
- HIVE_CURRENT_DATABASE("hive.current.database", "", "Database name used by current session. Internal usage only.", true),
-
- // for hive script operator
- HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", "0s",
- new TimeValidator(TimeUnit.SECONDS),
- "How long to run autoprogressor for the script/UDTF operators.\n" +
- "Set to 0 for forever."),
- HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false,
- "Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker \n" +
- "to avoid the task getting killed because of inactivity. Hive sends progress information when the script is \n" +
- "outputting to stderr. This option removes the need of periodically producing stderr messages, \n" +
- "but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker."),
- HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID",
- "Name of the environment variable that holds the unique script operator ID in the user's \n" +
- "transform function (the custom mapper/reducer that the user has specified in the query)"),
- HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false,
- "Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)"),
- HIVESCRIPT_ENV_BLACKLIST("hive.script.operator.env.blacklist",
- "hive.txn.valid.txns,hive.script.operator.env.blacklist",
- "Comma separated list of keys from the configuration file not to convert to environment " +
- "variables when envoking the script operator"),
- HIVE_STRICT_CHECKS_LARGE_QUERY("hive.strict.checks.large.query", false,
- "Enabling strict large query checks disallows the following:\n" +
- " Orderby without limit.\n" +
- " No partition being picked up for a query against partitioned table.\n" +
- "Note that these checks currently do not consider data size, only the query pattern."),
- HIVE_STRICT_CHECKS_TYPE_SAFETY("hive.strict.checks.type.safety", true,
- "Enabling strict type safety checks disallows the following:\n" +
- " Comparing bigints and strings.\n" +
- " Comparing bigints and doubles."),
- HIVE_STRICT_CHECKS_CARTESIAN("hive.strict.checks.cartesian.product", true,
- "Enabling strict Cartesian join checks disallows the following:\n" +
- " Cartesian product (cross join)."),
- HIVE_STRICT_CHECKS_BUCKETING("hive.strict.checks.bucketing", true,
- "Enabling strict bucketing checks disallows the following:\n" +
- " Load into bucketed tables."),
-
- @Deprecated
- HIVEMAPREDMODE("hive.mapred.mode", null,
- "Deprecated; use hive.strict.checks.* settings instead."),
- HIVEALIAS("hive.alias", "", ""),
- HIVEMAPSIDEAGGREGATE("hive.map.aggr", true, "Whether to use map-side aggregation in Hive Group By queries"),
- HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"),
- HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000,
- "How many rows in the right-most join operand Hive should buffer before emitting the join result."),
- HIVEJOINCACHESIZE("hive.join.cache.size", 25000,
- "How many rows in the joining tables (except the streaming table) should be cached in memory."),
- HIVE_PUSH_RESIDUAL_INNER("hive.join.inner.residual", false,
- "Whether to push non-equi filter predicates within inner joins. This can improve efficiency in "
- + "the evaluation of certain joins, since we will not be emitting rows which are thrown away by "
- + "a Filter operator straight away. However, currently vectorization does not support them, thus "
- + "enabling it is only recommended when vectorization is disabled."),
-
- // CBO related
- HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."),
- HIVE_CBO_CNF_NODES_LIMIT("hive.cbo.cnf.maxnodes", -1, "When converting to conjunctive normal form (CNF), fail if" +
- "the expression exceeds this threshold; the threshold is expressed in terms of number of nodes (leaves and" +
- "interior nodes). -1 to not set up a threshold."),
- HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"),
- HIVE_CBO_EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", false, "Flag to control enabling the extended cost model based on"
- + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."),
- HIVE_CBO_COST_MODEL_CPU("hive.cbo.costmodel.cpu", "0.000001", "Default cost of a comparison"),
- HIVE_CBO_COST_MODEL_NET("hive.cbo.costmodel.network", "150.0", "Default cost of a transfering a byte over network;"
- + " expressed as multiple of CPU cost"),
- HIVE_CBO_COST_MODEL_LFS_WRITE("hive.cbo.costmodel.local.fs.write", "4.0", "Default cost of writing a byte to local FS;"
- + " expressed as multiple of NETWORK cost"),
- HIVE_CBO_COST_MODEL_LFS_READ("hive.cbo.costmodel.local.fs.read", "4.0", "Default cost of reading a byte from local FS;"
- + " expressed as multiple of NETWORK cost"),
- HIVE_CBO_COST_MODEL_HDFS_WRITE("hive.cbo.costmodel.hdfs.write", "10.0", "Default cost of writing a byte to HDFS;"
- + " expressed as multiple of Local FS write cost"),
- HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;"
- + " expressed as multiple of Local FS read cost"),
- HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true,
- "Toggle display of CBO warnings like missing column stats"),
- AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
- SEMIJOIN_CONVERSION("hive.optimize.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"),
- HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" +
- "columns in operators such as Aggregate or Join so that we try to reduce the number of shuffling stages"),
-
- // materialized views
- HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING("hive.materializedview.rewriting", false,
- "Whether to try to rewrite queries using the materialized views enabled for rewriting"),
- HIVE_MATERIALIZED_VIEW_FILE_FORMAT("hive.materializedview.fileformat", "ORC",
- new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"),
- "Default file format for CREATE MATERIALIZED VIEW statement"),
- HIVE_MATERIALIZED_VIEW_SERDE("hive.materializedview.serde",
- "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "Default SerDe used for materialized views"),
-
- // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
- // need to remove by hive .13. Also, do not change default (see SMB operator)
- HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""),
-
- HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true,
- "Whether Hive should use memory-optimized hash table for MapJoin.\n" +
- "Only works on Tez and Spark, because memory-optimized hashtable cannot be serialized."),
- HIVEMAPJOINOPTIMIZEDTABLEPROBEPERCENT("hive.mapjoin.optimized.hashtable.probe.percent",
- (float) 0.5, "Probing space percentage of the optimized hashtable"),
- HIVEUSEHYBRIDGRACEHASHJOIN("hive.mapjoin.hybridgrace.hashtable", true, "Whether to use hybrid" +
- "grace hash join as the join method for mapjoin. Tez only."),
- HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " +
- "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " +
- "This number should be power of 2."),
- HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" +
- "Hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."),
- HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" +
- "Hybrid grace hash join, the minimum number of partitions to create."),
- HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024,
- "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" +
- "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" +
- "joins unnecessary memory will be allocated and then trimmed."),
- HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " +
- "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."),
-
- HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000,
- "How many rows with the same key value should be cached in memory per smb joined table."),
- HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000,
- "Number of rows after which size of the grouping keys/aggregation classes is performed"),
- HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5,
- "Portion of total memory to be used by map-side group aggregation hash table"),
- HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3,
- "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"),
- HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9,
- "The max memory to be used by map-side group aggregation hash table.\n" +
- "If the memory usage is higher than this number, force to flush data"),
- HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5,
- "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" +
- "Set to 1 to make sure hash aggregation is never turned off."),
- HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
- "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" +
- "common group by keys, it will be optimized to generate single M/R job."),
- HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true,
- "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" +
- "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" +
- "is that it limits the number of mappers to the number of files."),
- HIVE_GROUPBY_POSITION_ALIAS("hive.groupby.position.alias", false,
- "Whether to enable using Column Position Alias in Group By"),
- HIVE_ORDERBY_POSITION_ALIAS("hive.orderby.position.alias", true,
- "Whether to enable using Column Position Alias in Order By"),
- @Deprecated
- HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false,
- "Whether to enable using Column Position Alias in Group By or Order By (deprecated).\n" +
- "Use " + HIVE_ORDERBY_POSITION_ALIAS.varname + " or " + HIVE_GROUPBY_POSITION_ALIAS.varname + " instead"),
- HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30,
- "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" +
- "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" +
- "4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).\n" +
- "This can lead to explosion across map-reduce boundary if the cardinality of T is very high,\n" +
- "and map-side aggregation does not do a very good job. \n" +
- "\n" +
- "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" +
- "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" +
- "assumption that the original group by will reduce the data size."),
- HIVE_GROUPBY_LIMIT_EXTRASTEP("hive.groupby.limit.extrastep", true, "This parameter decides if Hive should \n" +
- "create new MR job for sorting final output"),
-
- // Max file num and size used to do a single copy (after that, distcp is used)
- HIVE_EXEC_COPYFILE_MAXNUMFILES("hive.exec.copyfile.maxnumfiles", 1L,
- "Maximum number of files Hive uses to do sequential HDFS copies between directories." +
- "Distributed copies (distcp) will be used instead for larger numbers of files so that copies can be done faster."),
- HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/,
- "Maximum file size (in bytes) that Hive uses to do single HDFS copies between directories." +
- "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."),
-
- // for hive udtf operator
- HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false,
- "Whether Hive should automatically send progress information to TaskTracker \n" +
- "when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious \n" +
- "because this may prevent TaskTracker from killing tasks with infinite loops."),
-
- HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC", "parquet"),
- "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"),
- HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none",
- new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC", "parquet"),
- "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" +
- "created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat \n" +
- "for all tables."),
- HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "SequenceFile", new StringSet("TextFile", "SequenceFile", "RCfile", "Llap"),
- "Default file format for storing result of the query."),
- HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"),
-
- // default serde for rcfile
- HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde",
- "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe",
- "The default SerDe Hive will use for the RCFile format"),
-
- HIVEDEFAULTSERDE("hive.default.serde",
- "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
- "The default SerDe Hive will use for storage formats that do not specify a SerDe."),
-
- SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema",
- "org.apache.hadoop.hive.ql.io.orc.OrcSerde," +
- "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
- "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," +
- "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," +
- "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," +
- "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," +
- "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," +
- "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe",
- "SerDes retrieving schema from metastore. This is an internal parameter."),
-
- HIVEHISTORYFILELOC("hive.querylog.location",
- "${system:java.io.tmpdir}" + File.separator + "${system:user.name}",
- "Location of Hive run time structured log file"),
-
- HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true,
- "Whether to log the plan's progress every time a job's progress is checked.\n" +
- "These logs are written to the location specified by hive.querylog.location"),
-
- HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", "60000ms",
- new TimeValidator(TimeUnit.MILLISECONDS),
- "The interval to wait between logging the plan's progress.\n" +
- "If there is a whole number percentage change in the progress of the mappers or the reducers,\n" +
- "the progress is logged regardless of this value.\n" +
- "The actual interval will be the ceiling of (this value divided by the value of\n" +
- "hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval\n" +
- "I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be\n" +
- "logged less frequently than specified.\n" +
- "This only has an effect if hive.querylog.enable.plan.progress is set to true."),
-
- HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
- "The default SerDe for transmitting input data to and reading output data from the user scripts. "),
- HIVESCRIPTRECORDREADER("hive.script.recordreader",
- "org.apache.hadoop.hive.ql.exec.TextRecordReader",
- "The default record reader for reading data from the user scripts. "),
- HIVESCRIPTRECORDWRITER("hive.script.recordwriter",
- "org.apache.hadoop.hive.ql.exec.TextRecordWriter",
- "The default record writer for writing data to the user scripts. "),
- HIVESCRIPTESCAPE("hive.transform.escape.input", false,
- "This adds an option to escape special chars (newlines, carriage returns and\n" +
- "tabs) when they are passed to the user script. This is useful if the Hive tables\n" +
- "can contain data that contains special characters."),
- HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000,
- "Read from a binary stream and treat each hive.binary.record.max.length bytes as a record. \n" +
- "The last record before the end of stream can have less than hive.binary.record.max.length bytes"),
-
- HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0, "mapper/reducer memory in local mode"),
-
- //small table file size
- HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize", 25000000L,
- "The threshold for the input file size of the small tables; if the file size is smaller \n" +
- "than this threshold, it will try to convert the common join into map join"),
-
-
- HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", true,
- "Use schema evolution to convert self-describing file format's data to the schema desired by the reader."),
-
- HIVE_TRANSACTIONAL_TABLE_SCAN("hive.transactional.table.scan", false,
- "internal usage only -- do transaction (ACID) table scan.", true),
-
- HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY("hive.transactional.events.mem", 10000000,
- "Vectorized ACID readers can often load all the delete events from all the delete deltas\n"
- + "into memory to optimize for performance. To prevent out-of-memory errors, this is a rough heuristic\n"
- + "that limits the total number of delete events that can be loaded into memory at once.\n"
- + "Roughly it has been set to 10 million delete events per bucket (~160 MB).\n"),
-
- HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0,
- "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."),
-
- //
<TRUNCATED>