You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vi...@apache.org on 2018/07/19 19:55:27 UTC
[28/51] [partial] hive git commit: HIVE-20188 : Split server-specific
code outside of standalone metastore-common (Alexander Kolbasov reviewed by
Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
deleted file mode 100644
index c2bbba5..0000000
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ /dev/null
@@ -1,1686 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.conf;
-
-import com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.metastore.DefaultStorageSchemaReader;
-import org.apache.hadoop.hive.metastore.HiveAlterHandler;
-import org.apache.hadoop.hive.metastore.MaterializationsRebuildLockCleanerTask;
-import org.apache.hadoop.hive.metastore.MetastoreTaskThread;
-import org.apache.hadoop.hive.metastore.RuntimeStatsCleanerTask;
-import org.apache.hadoop.hive.metastore.events.EventCleanerTask;
-import org.apache.hadoop.hive.metastore.security.MetastoreDelegationTokenManager;
-import org.apache.hadoop.hive.metastore.txn.AcidCompactionHistoryService;
-import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService;
-import org.apache.hadoop.hive.metastore.txn.AcidOpenTxnsCounterService;
-import org.apache.hadoop.hive.metastore.txn.AcidWriteSetService;
-import org.apache.hadoop.hive.metastore.utils.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URL;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * A set of definitions of config values used by the Metastore. One of the key aims of this
- * class is to provide backwards compatibility with existing Hive configuration keys while
- * allowing the metastore to have its own, Hive independent keys. For this reason access to the
- * underlying Configuration object should always be done via the static methods provided here
- * rather than directly via {@link Configuration#get(String)} and
- * {@link Configuration#set(String, String)}. All the methods of this class will handle checking
- * both the MetastoreConf key and the Hive key. The algorithm is, on reads, to check first the
- * MetastoreConf key, then the Hive key, then return the default if neither are set. On write
- * the Metastore key only is set.
- *
- * This class does not extend Configuration. Rather it provides static methods for operating on
- * a Configuration object. This allows it to work on HiveConf objects, which otherwise would not
- * be the case.
- */
-public class MetastoreConf {
-
- private static final Logger LOG = LoggerFactory.getLogger(MetastoreConf.class);
- private static final Pattern TIME_UNIT_SUFFIX = Pattern.compile("([0-9]+)([a-zA-Z]+)");
-
- private static final Map<String, ConfVars> metaConfs = new HashMap<>();
- private static URL hiveDefaultURL = null;
- private static URL hiveSiteURL = null;
- private static URL hiveMetastoreSiteURL = null;
- private static URL metastoreSiteURL = null;
- private static AtomicBoolean beenDumped = new AtomicBoolean();
-
- private static Map<String, ConfVars> keyToVars;
-
- @VisibleForTesting
- static final String TEST_ENV_WORKAROUND = "metastore.testing.env.workaround.dont.ever.set.this.";
-
- public static enum StatsUpdateMode {
- NONE, EXISTING, ALL
- }
-
- private static class TimeValue {
- final long val;
- final TimeUnit unit;
-
- private TimeValue(long val, TimeUnit unit) {
- this.val = val;
- this.unit = unit;
- }
-
- @Override
- public String toString() {
- switch (unit) {
- case NANOSECONDS: return Long.toString(val) + "ns";
- case MICROSECONDS: return Long.toString(val) + "us";
- case MILLISECONDS: return Long.toString(val) + "ms";
- case SECONDS: return Long.toString(val) + "s";
- case MINUTES: return Long.toString(val) + "m";
- case HOURS: return Long.toString(val) + "h";
- case DAYS: return Long.toString(val) + "d";
- }
- throw new RuntimeException("Unknown time unit " + unit);
- }
- }
-
- /**
- * Metastore related options that the db is initialized against. When a conf
- * var in this is list is changed, the metastore instance for the CLI will
- * be recreated so that the change will take effect.
- * TODO - I suspect the vast majority of these don't need to be here. But it requires testing
- * before just pulling them out.
- */
- public static final MetastoreConf.ConfVars[] metaVars = {
- ConfVars.WAREHOUSE,
- ConfVars.REPLDIR,
- ConfVars.THRIFT_URIS,
- ConfVars.SERVER_PORT,
- ConfVars.THRIFT_CONNECTION_RETRIES,
- ConfVars.THRIFT_FAILURE_RETRIES,
- ConfVars.CLIENT_CONNECT_RETRY_DELAY,
- ConfVars.CLIENT_SOCKET_TIMEOUT,
- ConfVars.CLIENT_SOCKET_LIFETIME,
- ConfVars.PWD,
- ConfVars.CONNECT_URL_HOOK,
- ConfVars.CONNECT_URL_KEY,
- ConfVars.SERVER_MIN_THREADS,
- ConfVars.SERVER_MAX_THREADS,
- ConfVars.TCP_KEEP_ALIVE,
- ConfVars.KERBEROS_KEYTAB_FILE,
- ConfVars.KERBEROS_PRINCIPAL,
- ConfVars.USE_THRIFT_SASL,
- ConfVars.TOKEN_SIGNATURE,
- ConfVars.CACHE_PINOBJTYPES,
- ConfVars.CONNECTION_POOLING_TYPE,
- ConfVars.VALIDATE_TABLES,
- ConfVars.DATANUCLEUS_INIT_COL_INFO,
- ConfVars.VALIDATE_COLUMNS,
- ConfVars.VALIDATE_CONSTRAINTS,
- ConfVars.STORE_MANAGER_TYPE,
- ConfVars.AUTO_CREATE_ALL,
- ConfVars.DATANUCLEUS_TRANSACTION_ISOLATION,
- ConfVars.DATANUCLEUS_CACHE_LEVEL2,
- ConfVars.DATANUCLEUS_CACHE_LEVEL2_TYPE,
- ConfVars.IDENTIFIER_FACTORY,
- ConfVars.DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK,
- ConfVars.AUTHORIZATION_STORAGE_AUTH_CHECKS,
- ConfVars.BATCH_RETRIEVE_MAX,
- ConfVars.EVENT_LISTENERS,
- ConfVars.TRANSACTIONAL_EVENT_LISTENERS,
- ConfVars.EVENT_CLEAN_FREQ,
- ConfVars.EVENT_EXPIRY_DURATION,
- ConfVars.EVENT_MESSAGE_FACTORY,
- ConfVars.FILTER_HOOK,
- ConfVars.RAW_STORE_IMPL,
- ConfVars.END_FUNCTION_LISTENERS,
- ConfVars.PART_INHERIT_TBL_PROPS,
- ConfVars.BATCH_RETRIEVE_OBJECTS_MAX,
- ConfVars.INIT_HOOKS,
- ConfVars.PRE_EVENT_LISTENERS,
- ConfVars.HMS_HANDLER_ATTEMPTS,
- ConfVars.HMS_HANDLER_INTERVAL,
- ConfVars.HMS_HANDLER_FORCE_RELOAD_CONF,
- ConfVars.PARTITION_NAME_WHITELIST_PATTERN,
- ConfVars.ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS,
- ConfVars.USERS_IN_ADMIN_ROLE,
- ConfVars.HIVE_TXN_MANAGER,
- ConfVars.TXN_TIMEOUT,
- ConfVars.TXN_MAX_OPEN_BATCH,
- ConfVars.TXN_RETRYABLE_SQLEX_REGEX,
- ConfVars.STATS_NDV_TUNER,
- ConfVars.STATS_NDV_DENSITY_FUNCTION,
- ConfVars.AGGREGATE_STATS_CACHE_ENABLED,
- ConfVars.AGGREGATE_STATS_CACHE_SIZE,
- ConfVars.AGGREGATE_STATS_CACHE_MAX_PARTITIONS,
- ConfVars.AGGREGATE_STATS_CACHE_FPP,
- ConfVars.AGGREGATE_STATS_CACHE_MAX_VARIANCE,
- ConfVars.AGGREGATE_STATS_CACHE_TTL,
- ConfVars.AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT,
- ConfVars.AGGREGATE_STATS_CACHE_MAX_READER_WAIT,
- ConfVars.AGGREGATE_STATS_CACHE_MAX_FULL,
- ConfVars.AGGREGATE_STATS_CACHE_CLEAN_UNTIL,
- ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES,
- ConfVars.FILE_METADATA_THREADS
- };
-
- /**
- * User configurable Metastore vars
- */
- private static final MetastoreConf.ConfVars[] metaConfVars = {
- ConfVars.TRY_DIRECT_SQL,
- ConfVars.TRY_DIRECT_SQL_DDL,
- ConfVars.CLIENT_SOCKET_TIMEOUT,
- ConfVars.PARTITION_NAME_WHITELIST_PATTERN,
- ConfVars.CAPABILITY_CHECK,
- ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES
- };
-
- static {
- for (ConfVars confVar : metaConfVars) {
- metaConfs.put(confVar.varname, confVar);
- metaConfs.put(confVar.hiveName, confVar);
- }
- }
-
- /**
- * Variables that we should never print the value of for security reasons.
- */
- private static final Set<String> unprintables = StringUtils.asSet(
- ConfVars.PWD.varname,
- ConfVars.PWD.hiveName,
- ConfVars.SSL_KEYSTORE_PASSWORD.varname,
- ConfVars.SSL_KEYSTORE_PASSWORD.hiveName,
- ConfVars.SSL_TRUSTSTORE_PASSWORD.varname,
- ConfVars.SSL_TRUSTSTORE_PASSWORD.hiveName
- );
-
- public static ConfVars getMetaConf(String name) {
- return metaConfs.get(name);
- }
-
- public enum ConfVars {
- // alpha order, PLEASE!
- ADDED_JARS("metastore.added.jars.path", "hive.added.jars.path", "",
- "This an internal parameter."),
- AGGREGATE_STATS_CACHE_CLEAN_UNTIL("metastore.aggregate.stats.cache.clean.until",
- "hive.metastore.aggregate.stats.cache.clean.until", 0.8,
- "The cleaner thread cleans until cache reaches this % full size."),
- AGGREGATE_STATS_CACHE_ENABLED("metastore.aggregate.stats.cache.enabled",
- "hive.metastore.aggregate.stats.cache.enabled", true,
- "Whether aggregate stats caching is enabled or not."),
- AGGREGATE_STATS_CACHE_FPP("metastore.aggregate.stats.cache.fpp",
- "hive.metastore.aggregate.stats.cache.fpp", 0.01,
- "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."),
- AGGREGATE_STATS_CACHE_MAX_FULL("metastore.aggregate.stats.cache.max.full",
- "hive.metastore.aggregate.stats.cache.max.full", 0.9,
- "Maximum cache full % after which the cache cleaner thread kicks in."),
- AGGREGATE_STATS_CACHE_MAX_PARTITIONS("metastore.aggregate.stats.cache.max.partitions",
- "hive.metastore.aggregate.stats.cache.max.partitions", 10000,
- "Maximum number of partitions that are aggregated per cache node."),
- AGGREGATE_STATS_CACHE_MAX_READER_WAIT("metastore.aggregate.stats.cache.max.reader.wait",
- "hive.metastore.aggregate.stats.cache.max.reader.wait", 1000, TimeUnit.MILLISECONDS,
- "Number of milliseconds a reader will wait to acquire the readlock before giving up."),
- AGGREGATE_STATS_CACHE_MAX_VARIANCE("metastore.aggregate.stats.cache.max.variance",
- "hive.metastore.aggregate.stats.cache.max.variance", 0.01,
- "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."),
- AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("metastore.aggregate.stats.cache.max.writer.wait",
- "hive.metastore.aggregate.stats.cache.max.writer.wait", 5000, TimeUnit.MILLISECONDS,
- "Number of milliseconds a writer will wait to acquire the writelock before giving up."),
- AGGREGATE_STATS_CACHE_SIZE("metastore.aggregate.stats.cache.size",
- "hive.metastore.aggregate.stats.cache.size", 10000,
- "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."),
- AGGREGATE_STATS_CACHE_TTL("metastore.aggregate.stats.cache.ttl",
- "hive.metastore.aggregate.stats.cache.ttl", 600, TimeUnit.SECONDS,
- "Number of seconds for a cached node to be active in the cache before they become stale."),
- ALTER_HANDLER("metastore.alter.handler", "hive.metastore.alter.impl",
- HiveAlterHandler.class.getName(),
- "Alter handler. For now defaults to the Hive one. Really need a better default option"),
- ASYNC_LOG_ENABLED("metastore.async.log.enabled", "hive.async.log.enabled", true,
- "Whether to enable Log4j2's asynchronous logging. Asynchronous logging can give\n" +
- " significant performance improvement as logging will be handled in separate thread\n" +
- " that uses LMAX disruptor queue for buffering log messages.\n" +
- " Refer https://logging.apache.org/log4j/2.x/manual/async.html for benefits and\n" +
- " drawbacks."),
- AUTHORIZATION_STORAGE_AUTH_CHECKS("metastore.authorization.storage.checks",
- "hive.metastore.authorization.storage.checks", false,
- "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" +
- "for operations like drop-partition (disallow the drop-partition if the user in\n" +
- "question doesn't have permissions to delete the corresponding directory\n" +
- "on the storage)."),
- AUTO_CREATE_ALL("datanucleus.schema.autoCreateAll", "datanucleus.schema.autoCreateAll", false,
- "Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once."
- + "To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not "
- + "recommended for production use cases, run schematool command instead." ),
- BATCH_RETRIEVE_MAX("metastore.batch.retrieve.max", "hive.metastore.batch.retrieve.max", 300,
- "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" +
- "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" +
- "but it may also cause higher memory requirement at the client side."),
- BATCH_RETRIEVE_OBJECTS_MAX("metastore.batch.retrieve.table.partition.max",
- "hive.metastore.batch.retrieve.table.partition.max", 1000,
- "Maximum number of objects that metastore internally retrieves in one batch."),
- CACHE_PINOBJTYPES("metastore.cache.pinobjtypes", "hive.metastore.cache.pinobjtypes",
- "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order",
- "List of comma separated metastore object types that should be pinned in the cache"),
- CACHED_RAW_STORE_IMPL("metastore.cached.rawstore.impl",
- "hive.metastore.cached.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore",
- "Name of the wrapped RawStore class"),
- CACHED_RAW_STORE_CACHE_UPDATE_FREQUENCY("metastore.cached.rawstore.cache.update.frequency",
- "hive.metastore.cached.rawstore.cache.update.frequency", 60, TimeUnit.SECONDS,
- "The time after which metastore cache is updated from metastore DB."),
- CACHED_RAW_STORE_CACHED_OBJECTS_WHITELIST("metastore.cached.rawstore.cached.object.whitelist",
- "hive.metastore.cached.rawstore.cached.object.whitelist", ".*", "Comma separated list of regular expressions \n " +
- "to select the tables (and its partitions, stats etc) that will be cached by CachedStore. \n" +
- "This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.blacklist. \n" +
- "Example: .*, db1.*, db2\\.tbl.*. The last item can potentially override patterns specified before."),
- CACHED_RAW_STORE_CACHED_OBJECTS_BLACKLIST("metastore.cached.rawstore.cached.object.blacklist",
- "hive.metastore.cached.rawstore.cached.object.blacklist", "", "Comma separated list of regular expressions \n " +
- "to filter out the tables (and its partitions, stats etc) that will be cached by CachedStore. \n" +
- "This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.whitelist. \n" +
- "Example: db2.*, db3\\.tbl1, db3\\..*. The last item can potentially override patterns specified before. \n" +
- "The blacklist also overrides the whitelist."),
- CACHED_RAW_STORE_MAX_CACHE_MEMORY("metastore.cached.rawstore.max.cache.memory",
- "hive.metastore.cached.rawstore.max.cache.memory", "1Gb", new SizeValidator(),
- "The maximum memory in bytes that the cached objects can use. "
- + "Memory used is calculated based on estimated size of tables and partitions in the cache. "
- + "Setting it to a negative value disables memory estimation."),
- CAPABILITY_CHECK("metastore.client.capability.check",
- "hive.metastore.client.capability.check", true,
- "Whether to check client capabilities for potentially breaking API usage."),
- CATALOG_DEFAULT("metastore.catalog.default", "metastore.catalog.default", "hive",
- "The default catalog to use when a catalog is not specified. Default is 'hive' (the " +
- "default catalog)."),
- CATALOGS_TO_CACHE("metastore.cached.rawstore.catalogs", "metastore.cached.rawstore.catalogs",
- "hive", "Comma separated list of catalogs to cache in the CachedStore. Default is 'hive' " +
- "(the default catalog). Empty string means all catalogs will be cached."),
- CLIENT_CONNECT_RETRY_DELAY("metastore.client.connect.retry.delay",
- "hive.metastore.client.connect.retry.delay", 1, TimeUnit.SECONDS,
- "Number of seconds for the client to wait between consecutive connection attempts"),
- CLIENT_KERBEROS_PRINCIPAL("metastore.client.kerberos.principal",
- "hive.metastore.client.kerberos.principal",
- "", // E.g. "hive-metastore/_HOST@EXAMPLE.COM".
- "The Kerberos principal associated with the HA cluster of hcat_servers."),
- CLIENT_SOCKET_LIFETIME("metastore.client.socket.lifetime",
- "hive.metastore.client.socket.lifetime", 0, TimeUnit.SECONDS,
- "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" +
- "reconnects on the next MetaStore operation. A value of 0s means the connection\n" +
- "has an infinite lifetime."),
- CLIENT_SOCKET_TIMEOUT("metastore.client.socket.timeout", "hive.metastore.client.socket.timeout", 600,
- TimeUnit.SECONDS, "MetaStore Client socket timeout in seconds"),
- COMPACTOR_HISTORY_REAPER_INTERVAL("metastore.compactor.history.reaper.interval",
- "hive.compactor.history.reaper.interval", 2, TimeUnit.MINUTES,
- "Determines how often compaction history reaper runs"),
- COMPACTOR_HISTORY_RETENTION_ATTEMPTED("metastore.compactor.history.retention.attempted",
- "hive.compactor.history.retention.attempted", 2,
- new RangeValidator(0, 100), "Determines how many attempted compaction records will be " +
- "retained in compaction history for a given table/partition."),
- COMPACTOR_HISTORY_RETENTION_FAILED("metastore.compactor.history.retention.failed",
- "hive.compactor.history.retention.failed", 3,
- new RangeValidator(0, 100), "Determines how many failed compaction records will be " +
- "retained in compaction history for a given table/partition."),
- COMPACTOR_HISTORY_RETENTION_SUCCEEDED("metastore.compactor.history.retention.succeeded",
- "hive.compactor.history.retention.succeeded", 3,
- new RangeValidator(0, 100), "Determines how many successful compaction records will be " +
- "retained in compaction history for a given table/partition."),
- COMPACTOR_INITIATOR_FAILED_THRESHOLD("metastore.compactor.initiator.failed.compacts.threshold",
- "hive.compactor.initiator.failed.compacts.threshold", 2,
- new RangeValidator(1, 20), "Number of consecutive compaction failures (per table/partition) " +
- "after which automatic compactions will not be scheduled any more. Note that this must be less " +
- "than hive.compactor.history.retention.failed."),
- COMPACTOR_INITIATOR_ON("metastore.compactor.initiator.on", "hive.compactor.initiator.on", false,
- "Whether to run the initiator and cleaner threads on this metastore instance or not.\n" +
- "Set this to true on one instance of the Thrift metastore service as part of turning\n" +
- "on Hive transactions. For a complete list of parameters required for turning on\n" +
- "transactions, see hive.txn.manager."),
- COMPACTOR_WORKER_THREADS("metastore.compactor.worker.threads",
- "hive.compactor.worker.threads", 0,
- "How many compactor worker threads to run on this metastore instance. Set this to a\n" +
- "positive number on one or more instances of the Thrift metastore service as part of\n" +
- "turning on Hive transactions. For a complete list of parameters required for turning\n" +
- "on transactions, see hive.txn.manager.\n" +
- "Worker threads spawn MapReduce jobs to do compactions. They do not do the compactions\n" +
- "themselves. Increasing the number of worker threads will decrease the time it takes\n" +
- "tables or partitions to be compacted once they are determined to need compaction.\n" +
- "It will also increase the background load on the Hadoop cluster as more MapReduce jobs\n" +
- "will be running in the background."),
- CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName",
- "javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver",
- "Driver class name for a JDBC metastore"),
- CONNECTION_POOLING_MAX_CONNECTIONS("datanucleus.connectionPool.maxPoolSize",
- "datanucleus.connectionPool.maxPoolSize", 10,
- "Specify the maximum number of connections in the connection pool. Note: The configured size will be used by\n" +
- "2 connection pools (TxnHandler and ObjectStore). When configuring the max connection pool size, it is\n" +
- "recommended to take into account the number of metastore instances and the number of HiveServer2 instances\n" +
- "configured with embedded metastore. To get optimal performance, set config to meet the following condition\n"+
- "(2 * pool_size * metastore_instances + 2 * pool_size * HS2_instances_with_embedded_metastore) = \n" +
- "(2 * physical_core_count + hard_disk_count)."),
- CONNECT_URL_HOOK("metastore.ds.connection.url.hook",
- "hive.metastore.ds.connection.url.hook", "",
- "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"),
- CONNECT_URL_KEY("javax.jdo.option.ConnectionURL",
- "javax.jdo.option.ConnectionURL",
- "jdbc:derby:;databaseName=metastore_db;create=true",
- "JDBC connect string for a JDBC metastore.\n" +
- "To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.\n" +
- "For example, jdbc:postgresql://myhost/db?ssl=true for postgres database."),
- CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType",
- "datanucleus.connectionPoolingType", "HikariCP", new StringSetValidator("BONECP", "DBCP",
- "HikariCP", "NONE"),
- "Specify connection pool library for datanucleus"),
- CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName",
- "javax.jdo.option.ConnectionUserName", "APP",
- "Username to use against metastore database"),
- CREATE_TABLES_AS_ACID("metastore.create.as.acid", "hive.create.as.acid", false,
- "Whether the eligible tables should be created as full ACID by default. Does \n" +
- "not apply to external tables, the ones using storage handlers, etc."),
- COUNT_OPEN_TXNS_INTERVAL("metastore.count.open.txns.interval", "hive.count.open.txns.interval",
- 1, TimeUnit.SECONDS, "Time in seconds between checks to count open transactions."),
- DATANUCLEUS_AUTOSTART("datanucleus.autoStartMechanismMode",
- "datanucleus.autoStartMechanismMode", "ignored", new StringSetValidator("ignored"),
- "Autostart mechanism for datanucleus. Currently ignored is the only option supported."),
- DATANUCLEUS_CACHE_LEVEL2("datanucleus.cache.level2", "datanucleus.cache.level2", false,
- "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"),
- DATANUCLEUS_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type",
- "datanucleus.cache.level2.type", "none", ""),
- DATANUCLEUS_INIT_COL_INFO("datanucleus.rdbms.initializeColumnInfo",
- "datanucleus.rdbms.initializeColumnInfo", "NONE",
- "initializeColumnInfo setting for DataNucleus; set to NONE at least on Postgres."),
- DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck",
- "datanucleus.plugin.pluginRegistryBundleCheck", "LOG",
- "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"),
- DATANUCLEUS_TRANSACTION_ISOLATION("datanucleus.transactionIsolation",
- "datanucleus.transactionIsolation", "read-committed",
- "Default transaction isolation level for identity generation."),
- DATANUCLEUS_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy",
- "datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""),
- DBACCESS_SSL_PROPS("metastore.dbaccess.ssl.properties", "hive.metastore.dbaccess.ssl.properties", "",
- "Comma-separated SSL properties for metastore to access database when JDO connection URL\n" +
- "enables SSL access. e.g. javax.net.ssl.trustStore=/tmp/truststore,javax.net.ssl.trustStorePassword=pwd."),
- DEFAULTPARTITIONNAME("metastore.default.partition.name",
- "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__",
- "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" +
- "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" +
- "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."),
- DELEGATION_KEY_UPDATE_INTERVAL("metastore.cluster.delegation.key.update-interval",
- "hive.cluster.delegation.key.update-interval", 1, TimeUnit.DAYS, ""),
- DELEGATION_TOKEN_GC_INTERVAL("metastore.cluster.delegation.token.gc-interval",
- "hive.cluster.delegation.token.gc-interval", 1, TimeUnit.HOURS, ""),
- DELEGATION_TOKEN_MAX_LIFETIME("metastore.cluster.delegation.token.max-lifetime",
- "hive.cluster.delegation.token.max-lifetime", 7, TimeUnit.DAYS, ""),
- DELEGATION_TOKEN_RENEW_INTERVAL("metastore.cluster.delegation.token.renew-interval",
- "hive.cluster.delegation.token.renew-interval", 1, TimeUnit.DAYS, ""),
- DELEGATION_TOKEN_STORE_CLS("metastore.cluster.delegation.token.store.class",
- "hive.cluster.delegation.token.store.class", MetastoreDelegationTokenManager.class.getName(),
- "Class to store delegation tokens"),
- DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit",
- "javax.jdo.option.DetachAllOnCommit", true,
- "Detaches all objects from session so that they can be used after transaction is committed"),
- DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE("metastore.direct.sql.max.elements.in.clause",
- "hive.direct.sql.max.elements.in.clause", 1000,
- "The maximum number of values in a IN clause. Once exceeded, it will be broken into\n" +
- " multiple OR separated IN clauses."),
- DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause",
- "hive.direct.sql.max.elements.values.clause",
- 1000, "The maximum number of values in a VALUES clause for INSERT statement."),
- DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length",
- "hive.direct.sql.max.query.length", 100, "The maximum\n" +
- " size of a query string (in KB)."),
- DIRECT_SQL_PARTITION_BATCH_SIZE("metastore.direct.sql.batch.size",
- "hive.metastore.direct.sql.batch.size", 0,
- "Batch size for partition and other object retrieval from the underlying DB in direct\n" +
- "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" +
- "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" +
- "may impede performance. -1 means no batching, 0 means automatic batching."),
- DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES("metastore.disallow.incompatible.col.type.changes",
- "hive.metastore.disallow.incompatible.col.type.changes", true,
- "If true, ALTER TABLE operations which change the type of a\n" +
- "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" +
- "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" +
- "datatypes can be converted from string to any type. The map is also serialized as\n" +
- "a string, which can be read as a string as well. However, with any binary\n" +
- "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" +
- "when subsequently trying to access old partitions.\n" +
- "\n" +
- "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" +
- "not blocked.\n" +
- "\n" +
- "See HIVE-4409 for more details."),
- DUMP_CONFIG_ON_CREATION("metastore.dump.config.on.creation", "metastore.dump.config.on.creation", true,
- "If true, a printout of the config file (minus sensitive values) will be dumped to the " +
- "log whenever newMetastoreConf() is called. Can produce a lot of logs"),
- END_FUNCTION_LISTENERS("metastore.end.function.listeners",
- "hive.metastore.end.function.listeners", "",
- "List of comma separated listeners for the end of metastore functions."),
- EVENT_CLEAN_FREQ("metastore.event.clean.freq", "hive.metastore.event.clean.freq", 0,
- TimeUnit.SECONDS, "Frequency at which timer task runs to purge expired events in metastore."),
- EVENT_EXPIRY_DURATION("metastore.event.expiry.duration", "hive.metastore.event.expiry.duration",
- 0, TimeUnit.SECONDS, "Duration after which events expire from events table"),
- EVENT_LISTENERS("metastore.event.listeners", "hive.metastore.event.listeners", "",
- "A comma separated list of Java classes that implement the org.apache.riven.MetaStoreEventListener" +
- " interface. The metastore event and corresponding listener method will be invoked in separate JDO transactions. " +
- "Alternatively, configure hive.metastore.transactional.event.listeners to ensure both are invoked in same JDO transaction."),
- EVENT_MESSAGE_FACTORY("metastore.event.message.factory",
- "hive.metastore.event.message.factory",
- "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory",
- "Factory class for making encoding and decoding messages in the events generated."),
- EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive",
- "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS,
- "time after which events will be removed from the database listener queue"),
- EVENT_DB_NOTIFICATION_API_AUTH("metastore.metastore.event.db.notification.api.auth",
- "hive.metastore.event.db.notification.api.auth", true,
- "Should metastore do authorization against database notification related APIs such as get_next_notification.\n" +
- "If set to true, then only the superusers in proxy settings have the permission"),
- EXECUTE_SET_UGI("metastore.execute.setugi", "hive.metastore.execute.setugi", true,
- "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" +
- "the client's reported user and group permissions. Note that this property must be set on \n" +
- "both the client and server sides. Further note that its best effort. \n" +
- "If client sets its to true and server sets it to false, client setting will be ignored."),
- EXPRESSION_PROXY_CLASS("metastore.expression.proxy", "hive.metastore.expression.proxy",
- "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore",
- "Class to use to process expressions in partition pruning."),
- FILE_METADATA_THREADS("metastore.file.metadata.threads",
- "hive.metastore.hbase.file.metadata.threads", 1,
- "Number of threads to use to read file metadata in background to cache it."),
- FILTER_HOOK("metastore.filter.hook", "hive.metastore.filter.hook",
- org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl.class.getName(),
- "Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager"
- + "is set to instance of HiveAuthorizerFactory, then this value is ignored."),
- FS_HANDLER_CLS("metastore.fs.handler.class", "hive.metastore.fs.handler.class",
- "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""),
- FS_HANDLER_THREADS_COUNT("metastore.fshandler.threads", "hive.metastore.fshandler.threads", 15,
- "Number of threads to be allocated for metastore handler for fs operations."),
- HMS_HANDLER_ATTEMPTS("metastore.hmshandler.retry.attempts", "hive.hmshandler.retry.attempts", 10,
- "The number of times to retry a HMSHandler call if there were a connection error."),
- HMS_HANDLER_FORCE_RELOAD_CONF("metastore.hmshandler.force.reload.conf",
- "hive.hmshandler.force.reload.conf", false,
- "Whether to force reloading of the HMSHandler configuration (including\n" +
- "the connection URL, before the next metastore query that accesses the\n" +
- "datastore. Once reloaded, this value is reset to false. Used for\n" +
- "testing only."),
- HMS_HANDLER_INTERVAL("metastore.hmshandler.retry.interval", "hive.hmshandler.retry.interval",
- 2000, TimeUnit.MILLISECONDS, "The time between HMSHandler retry attempts on failure."),
- IDENTIFIER_FACTORY("datanucleus.identifierFactory",
- "datanucleus.identifierFactory", "datanucleus1",
- "Name of the identifier factory to use when generating table/column names etc. \n" +
- "'datanucleus1' is used for backward compatibility with DataNucleus v1"),
- INIT_HOOKS("metastore.init.hooks", "hive.metastore.init.hooks", "",
- "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" +
- "An init hook is specified as the name of Java class which extends org.apache.riven.MetaStoreInitListener."),
- INIT_METADATA_COUNT_ENABLED("metastore.initial.metadata.count.enabled",
- "hive.metastore.initial.metadata.count.enabled", true,
- "Enable a metadata count at metastore startup for metrics."),
- INTEGER_JDO_PUSHDOWN("metastore.integral.jdo.pushdown",
- "hive.metastore.integral.jdo.pushdown", false,
- "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" +
- "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" +
- "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" +
- "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" +
- "is also irrelevant."),
- KERBEROS_KEYTAB_FILE("metastore.kerberos.keytab.file",
- "hive.metastore.kerberos.keytab.file", "",
- "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."),
- KERBEROS_PRINCIPAL("metastore.kerberos.principal", "hive.metastore.kerberos.principal",
- "hive-metastore/_HOST@EXAMPLE.COM",
- "The service principal for the metastore Thrift server. \n" +
- "The special string _HOST will be replaced automatically with the correct host name."),
- LIMIT_PARTITION_REQUEST("metastore.limit.partition.request",
- "hive.metastore.limit.partition.request", -1,
- "This limits the number of partitions (whole partition objects) that can be requested " +
- "from the metastore for a give table. MetaStore API methods using this are: \n" +
- "get_partitions, \n" +
- "get_partitions_with_auth, \n" +
- "get_partitions_by_filter, \n" +
- "get_partitions_by_expr.\n" +
- "The default value \"-1\" means no limit."),
- LOG4J_FILE("metastore.log4j.file", "hive.log4j.file", "",
- "Hive log4j configuration file.\n" +
- "If the property is not set, then logging will be initialized using metastore-log4j2.properties found on the classpath.\n" +
- "If the property is set, the value must be a valid URI (java.net.URI, e.g. \"file:///tmp/my-logging.xml\"), \n" +
- "which you can then extract a URL from and pass to PropertyConfigurator.configure(URL)."),
- MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass",
- "javax.jdo.PersistenceManagerFactoryClass",
- "org.datanucleus.api.jdo.JDOPersistenceManagerFactory",
- "class implementing the jdo persistence"),
- MATERIALIZATIONS_INVALIDATION_CACHE_IMPL("metastore.materializations.invalidation.impl",
- "hive.metastore.materializations.invalidation.impl", "DEFAULT",
- new StringSetValidator("DEFAULT", "DISABLE"),
- "The implementation that we should use for the materializations invalidation cache. \n" +
- " DEFAULT: Default implementation for invalidation cache\n" +
- " DISABLE: Disable invalidation cache (debugging purposes)"),
- MATERIALIZATIONS_INVALIDATION_CACHE_CLEAN_FREQUENCY("metastore.materializations.invalidation.clean.frequency",
- "hive.metastore.materializations.invalidation.clean.frequency",
- 3600, TimeUnit.SECONDS, "Frequency at which timer task runs to remove unnecessary transaction entries from" +
- "materializations invalidation cache."),
- MATERIALIZATIONS_INVALIDATION_CACHE_EXPIRY_DURATION("metastore.materializations.invalidation.max.duration",
- "hive.metastore.materializations.invalidation.max.duration",
- 86400, TimeUnit.SECONDS, "Maximum duration for query producing a materialization. After this time, transaction" +
- "entries that are not relevant for materializations can be removed from invalidation cache."),
-
- RUNTIME_STATS_CLEAN_FREQUENCY("runtime.stats.clean.frequency", "hive.metastore.runtime.stats.clean.frequency", 3600,
- TimeUnit.SECONDS, "Frequency at which timer task runs to remove outdated runtime stat entries."),
- RUNTIME_STATS_MAX_AGE("runtime.stats.max.age", "hive.metastore.runtime.stats.max.age", 86400 * 3, TimeUnit.SECONDS,
- "Stat entries which are older than this are removed."),
-
- // Parameters for exporting metadata on table drop (requires the use of the)
- // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener
- METADATA_EXPORT_LOCATION("metastore.metadata.export.location", "hive.metadata.export.location",
- "",
- "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
- "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" +
- "metadata being exported to the current user's home directory on HDFS."),
- MOVE_EXPORTED_METADATA_TO_TRASH("metastore.metadata.move.exported.metadata.to.trash",
- "hive.metadata.move.exported.metadata.to.trash", true,
- "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
- "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" +
- "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."),
- METRICS_ENABLED("metastore.metrics.enabled", "hive.metastore.metrics.enabled", false,
- "Enable metrics on the metastore."),
- METRICS_HADOOP2_COMPONENT_NAME("metastore.metrics.hadoop2.component", "hive.service.metrics.hadoop2.component", "hivemetastore",
- "Component name to provide to Hadoop2 Metrics system."),
- METRICS_JSON_FILE_INTERVAL("metastore.metrics.file.frequency",
- "hive.service.metrics.file.frequency", 1, TimeUnit.MINUTES,
- "For json metric reporter, the frequency of updating JSON metrics file."),
- METRICS_JSON_FILE_LOCATION("metastore.metrics.file.location",
- "hive.service.metrics.file.location", "/tmp/report.json",
- "For metric class json metric reporter, the location of local JSON metrics file. " +
- "This file will get overwritten at every interval."),
- METRICS_REPORTERS("metastore.metrics.reporters", "metastore.metrics.reporters", "json,jmx",
- new StringSetValidator("json", "jmx", "console", "hadoop"),
- "A comma separated list of metrics reporters to start"),
- MULTITHREADED("javax.jdo.option.Multithreaded", "javax.jdo.option.Multithreaded", true,
- "Set this to true if multiple threads access metastore through JDO concurrently."),
- MAX_OPEN_TXNS("metastore.max.open.txns", "hive.max.open.txns", 100000,
- "Maximum number of open transactions. If \n" +
- "current open transactions reach this limit, future open transaction requests will be \n" +
- "rejected, until this number goes below the limit."),
- NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead",
- "javax.jdo.option.NonTransactionalRead", true,
- "Reads outside of transactions"),
- NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES("metastore.notification.sequence.lock.max.retries",
- "hive.notification.sequence.lock.max.retries", 5,
- "Number of retries required to acquire a lock when getting the next notification sequential ID for entries "
- + "in the NOTIFICATION_LOG table."),
- NOTIFICATION_SEQUENCE_LOCK_RETRY_SLEEP_INTERVAL(
- "metastore.notification.sequence.lock.retry.sleep.interval",
- "hive.notification.sequence.lock.retry.sleep.interval", 500, TimeUnit.MILLISECONDS,
- "Sleep interval between retries to acquire a notification lock as described part of property "
- + NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES.name()),
- ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("metastore.orm.retrieveMapNullsAsEmptyStrings",
- "hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false,
- "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " +
- "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " +
- "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " +
- "pruning is the correct behaviour"),
- PARTITION_NAME_WHITELIST_PATTERN("metastore.partition.name.whitelist.pattern",
- "hive.metastore.partition.name.whitelist.pattern", "",
- "Partition names will be checked against this regex pattern and rejected if not matched."),
- PART_INHERIT_TBL_PROPS("metastore.partition.inherit.table.properties",
- "hive.metastore.partition.inherit.table.properties", "",
- "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" +
- "* implies all the keys will get inherited."),
- PRE_EVENT_LISTENERS("metastore.pre.event.listeners", "hive.metastore.pre.event.listeners", "",
- "List of comma separated listeners for metastore events."),
- PWD("javax.jdo.option.ConnectionPassword", "javax.jdo.option.ConnectionPassword", "mine",
- "password to use against metastore database"),
- RAW_STORE_IMPL("metastore.rawstore.impl", "hive.metastore.rawstore.impl",
- "org.apache.hadoop.hive.metastore.ObjectStore",
- "Name of the class that implements org.apache.riven.rawstore interface. \n" +
- "This class is used to store and retrieval of raw metadata objects such as table, database"),
- REPLCMDIR("metastore.repl.cmrootdir", "hive.repl.cmrootdir", "/user/hive/cmroot/",
- "Root dir for ChangeManager, used for deleted files."),
- REPLCMRETIAN("metastore.repl.cm.retain", "hive.repl.cm.retain", 24, TimeUnit.HOURS,
- "Time to retain removed files in cmrootdir."),
- REPLCMINTERVAL("metastore.repl.cm.interval", "hive.repl.cm.interval", 3600, TimeUnit.SECONDS,
- "Inteval for cmroot cleanup thread."),
- REPLCMENABLED("metastore.repl.cm.enabled", "hive.repl.cm.enabled", false,
- "Turn on ChangeManager, so delete files will go to cmrootdir."),
- REPLDIR("metastore.repl.rootdir", "hive.repl.rootdir", "/user/hive/repl/",
- "HDFS root dir for all replication dumps."),
- REPL_COPYFILE_MAXNUMFILES("metastore.repl.copyfile.maxnumfiles",
- "hive.exec.copyfile.maxnumfiles", 1L,
- "Maximum number of files Hive uses to do sequential HDFS copies between directories." +
- "Distributed copies (distcp) will be used instead for larger numbers of files so that copies can be done faster."),
- REPL_COPYFILE_MAXSIZE("metastore.repl.copyfile.maxsize",
- "hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/,
- "Maximum file size (in bytes) that Hive uses to do single HDFS copies between directories." +
- "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."),
- SCHEMA_INFO_CLASS("metastore.schema.info.class", "hive.metastore.schema.info.class",
- "org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo",
- "Fully qualified class name for the metastore schema information class \n"
- + "which is used by schematool to fetch the schema information.\n"
- + " This class should implement the IMetaStoreSchemaInfo interface"),
- SCHEMA_VERIFICATION("metastore.schema.verification", "hive.metastore.schema.verification", true,
- "Enforce metastore schema version consistency.\n" +
- "True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic\n" +
- " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" +
- " proper metastore schema migration. (Default)\n" +
- "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."),
- SCHEMA_VERIFICATION_RECORD_VERSION("metastore.schema.verification.record.version",
- "hive.metastore.schema.verification.record.version", false,
- "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" +
- " enabled the MS will be unusable."),
- SERDES_USING_METASTORE_FOR_SCHEMA("metastore.serdes.using.metastore.for.schema",
- "hive.serdes.using.metastore.for.schema",
- "org.apache.hadoop.hive.ql.io.orc.OrcSerde," +
- "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
- "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," +
- "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," +
- "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," +
- "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," +
- "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," +
- "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe",
- "SerDes retrieving schema from metastore. This is an internal parameter."),
- SERVER_MAX_MESSAGE_SIZE("metastore.server.max.message.size",
- "hive.metastore.server.max.message.size", 100*1024*1024L,
- "Maximum message size in bytes a HMS will accept."),
- SERVER_MAX_THREADS("metastore.server.max.threads",
- "hive.metastore.server.max.threads", 1000,
- "Maximum number of worker threads in the Thrift server's pool."),
- SERVER_MIN_THREADS("metastore.server.min.threads", "hive.metastore.server.min.threads", 200,
- "Minimum number of worker threads in the Thrift server's pool."),
- SERVER_PORT("metastore.thrift.port", "hive.metastore.port", 9083,
- "Hive metastore listener port"),
- SSL_KEYSTORE_PASSWORD("metastore.keystore.password", "hive.metastore.keystore.password", "",
- "Metastore SSL certificate keystore password."),
- SSL_KEYSTORE_PATH("metastore.keystore.path", "hive.metastore.keystore.path", "",
- "Metastore SSL certificate keystore location."),
- SSL_PROTOCOL_BLACKLIST("metastore.ssl.protocol.blacklist", "hive.ssl.protocol.blacklist",
- "SSLv2,SSLv3", "SSL Versions to disable for all Hive Servers"),
- SSL_TRUSTSTORE_PATH("metastore.truststore.path", "hive.metastore.truststore.path", "",
- "Metastore SSL certificate truststore location."),
- SSL_TRUSTSTORE_PASSWORD("metastore.truststore.password", "hive.metastore.truststore.password", "",
- "Metastore SSL certificate truststore password."),
- STATS_AUTO_GATHER("metastore.stats.autogather", "hive.stats.autogather", true,
- "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."),
- STATS_FETCH_BITVECTOR("metastore.stats.fetch.bitvector", "hive.stats.fetch.bitvector", false,
- "Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"),
- STATS_NDV_TUNER("metastore.stats.ndv.tuner", "hive.metastore.stats.ndv.tuner", 0.0,
- "Provides a tunable parameter between the lower bound and the higher bound of ndv for aggregate ndv across all the partitions. \n" +
- "The lower bound is equal to the maximum of ndv of all the partitions. The higher bound is equal to the sum of ndv of all the partitions.\n" +
- "Its value should be between 0.0 (i.e., choose lower bound) and 1.0 (i.e., choose higher bound)"),
- STATS_NDV_DENSITY_FUNCTION("metastore.stats.ndv.densityfunction",
- "hive.metastore.stats.ndv.densityfunction", false,
- "Whether to use density function to estimate the NDV for the whole table based on the NDV of partitions"),
- STATS_DEFAULT_AGGREGATOR("metastore.stats.default.aggregator", "hive.stats.default.aggregator",
- "",
- "The Java class (implementing the StatsAggregator interface) that is used by default if hive.stats.dbclass is custom type."),
- STATS_DEFAULT_PUBLISHER("metastore.stats.default.publisher", "hive.stats.default.publisher", "",
- "The Java class (implementing the StatsPublisher interface) that is used by default if hive.stats.dbclass is custom type."),
- STATS_AUTO_UPDATE("metastore.stats.auto.analyze", "hive.metastore.stats.auto.analyze", "none",
- new EnumValidator(StatsUpdateMode.values()),
- "Whether to update stats in the background; none - no, all - for all tables, existing - only existing, out of date, stats."),
- STATS_AUTO_UPDATE_NOOP_WAIT("metastore.stats.auto.analyze.noop.wait",
- "hive.metastore.stats.auto.analyze.noop.wait", 5L, TimeUnit.MINUTES,
- new TimeValidator(TimeUnit.MINUTES),
- "How long to sleep if there were no stats needing update during an update iteration.\n" +
- "This is a setting to throttle table/partition checks when nothing is being changed; not\n" +
- "the analyze queries themselves."),
- STATS_AUTO_UPDATE_WORKER_COUNT("metastore.stats.auto.analyze.worker.count",
- "hive.metastore.stats.auto.analyze.worker.count", 1,
- "Number of parallel analyze commands to run for background stats update."),
- STORAGE_SCHEMA_READER_IMPL("metastore.storage.schema.reader.impl", "metastore.storage.schema.reader.impl",
- DefaultStorageSchemaReader.class.getName(),
- "The class to use to read schemas from storage. It must implement " +
- "org.apache.hadoop.hive.metastore.StorageSchemaReader"),
- STORE_MANAGER_TYPE("datanucleus.storeManagerType", "datanucleus.storeManagerType", "rdbms", "metadata store type"),
- STRICT_MANAGED_TABLES("metastore.strict.managed.tables", "hive.strict.managed.tables", false,
- "Whether strict managed tables mode is enabled. With this mode enabled, " +
- "only transactional tables (both full and insert-only) are allowed to be created as managed tables"),
- SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES("metastore.support.special.characters.tablename",
- "hive.support.special.characters.tablename", true,
- "This flag should be set to true to enable support for special characters in table names.\n"
- + "When it is set to false, only [a-zA-Z_0-9]+ are supported.\n"
- + "The only supported special character right now is '/'. This flag applies only to quoted table names.\n"
- + "The default value is true."),
- TASK_THREADS_ALWAYS("metastore.task.threads.always", "metastore.task.threads.always",
- EventCleanerTask.class.getName() + "," + RuntimeStatsCleanerTask.class.getName() + "," +
- "org.apache.hadoop.hive.metastore.repl.DumpDirCleanerTask" + "," +
- "org.apache.hadoop.hive.metastore.HiveProtoEventsCleanerTask",
- "Comma separated list of tasks that will be started in separate threads. These will " +
- "always be started, regardless of whether the metastore is running in embedded mode " +
- "or in server mode. They must implement " + MetastoreTaskThread.class.getName()),
- TASK_THREADS_REMOTE_ONLY("metastore.task.threads.remote", "metastore.task.threads.remote",
- AcidHouseKeeperService.class.getName() + "," +
- AcidOpenTxnsCounterService.class.getName() + "," +
- AcidCompactionHistoryService.class.getName() + "," +
- AcidWriteSetService.class.getName() + "," +
- MaterializationsRebuildLockCleanerTask.class.getName(),
- "Command separated list of tasks that will be started in separate threads. These will be" +
- " started only when the metastore is running as a separate service. They must " +
- "implement " + MetastoreTaskThread.class.getName()),
- TCP_KEEP_ALIVE("metastore.server.tcp.keepalive",
- "hive.metastore.server.tcp.keepalive", true,
- "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."),
- THREAD_POOL_SIZE("metastore.thread.pool.size", "no.such", 10,
- "Number of threads in the thread pool. These will be used to execute all background " +
- "processes."),
- THRIFT_CONNECTION_RETRIES("metastore.connect.retries", "hive.metastore.connect.retries", 3,
- "Number of retries while opening a connection to metastore"),
- THRIFT_FAILURE_RETRIES("metastore.failure.retries", "hive.metastore.failure.retries", 1,
- "Number of retries upon failure of Thrift metastore calls"),
- THRIFT_URIS("metastore.thrift.uris", "hive.metastore.uris", "",
- "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."),
- THRIFT_URI_SELECTION("metastore.thrift.uri.selection", "hive.metastore.uri.selection", "RANDOM",
- new StringSetValidator("RANDOM", "SEQUENTIAL"),
- "Determines the selection mechanism used by metastore client to connect to remote " +
- "metastore. SEQUENTIAL implies that the first valid metastore from the URIs specified " +
- "as part of hive.metastore.uris will be picked. RANDOM implies that the metastore " +
- "will be picked randomly"),
- TIMEDOUT_TXN_REAPER_START("metastore.timedout.txn.reaper.start",
- "hive.timedout.txn.reaper.start", 100, TimeUnit.SECONDS,
- "Time delay of 1st reaper run after metastore start"),
- TIMEDOUT_TXN_REAPER_INTERVAL("metastore.timedout.txn.reaper.interval",
- "hive.timedout.txn.reaper.interval", 180, TimeUnit.SECONDS,
- "Time interval describing how often the reaper runs"),
- TOKEN_SIGNATURE("metastore.token.signature", "hive.metastore.token.signature", "",
- "The delegation token service name to match when selecting a token from the current user's tokens."),
- TRANSACTIONAL_EVENT_LISTENERS("metastore.transactional.event.listeners",
- "hive.metastore.transactional.event.listeners", "",
- "A comma separated list of Java classes that implement the org.apache.riven.MetaStoreEventListener" +
- " interface. Both the metastore event and corresponding listener method will be invoked in the same JDO transaction."),
- TRY_DIRECT_SQL("metastore.try.direct.sql", "hive.metastore.try.direct.sql", true,
- "Whether the metastore should try to use direct SQL queries instead of the\n" +
- "DataNucleus for certain read paths. This can improve metastore performance when\n" +
- "fetching many partitions or column statistics by orders of magnitude; however, it\n" +
- "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" +
- "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" +
- "work for all queries on your datastore. If all SQL queries fail (for example, your\n" +
- "metastore is backed by MongoDB), you might want to disable this to save the\n" +
- "try-and-fall-back cost."),
- TRY_DIRECT_SQL_DDL("metastore.try.direct.sql.ddl", "hive.metastore.try.direct.sql.ddl", true,
- "Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" +
- "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" +
- "select query has incorrect syntax or something similar inside a transaction, the\n" +
- "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" +
- "should disable the usage of direct SQL inside transactions if that happens in your case."),
- TXN_MAX_OPEN_BATCH("metastore.txn.max.open.batch", "hive.txn.max.open.batch", 1000,
- "Maximum number of transactions that can be fetched in one call to open_txns().\n" +
- "This controls how many transactions streaming agents such as Flume or Storm open\n" +
- "simultaneously. The streaming agent then writes that number of entries into a single\n" +
- "file (per Flume agent or Storm bolt). Thus increasing this value decreases the number\n" +
- "of delta files created by streaming agents. But it also increases the number of open\n" +
- "transactions that Hive has to track at any given time, which may negatively affect\n" +
- "read performance."),
- TXN_RETRYABLE_SQLEX_REGEX("metastore.txn.retryable.sqlex.regex",
- "hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" +
- "of regular expression patterns for SQL state, error code, and error message of\n" +
- "retryable SQLExceptions, that's suitable for the metastore DB.\n" +
- "For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n" +
- "The string that the regex will be matched against is of the following form, where ex is a SQLException:\n" +
- "ex.getMessage() + \" (SQLState=\" + ex.getSQLState() + \", ErrorCode=\" + ex.getErrorCode() + \")\""),
- TXN_STORE_IMPL("metastore.txn.store.impl", "hive.metastore.txn.store.impl",
- "org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler",
- "Name of class that implements org.apache.riven.txn.TxnStore. This " +
- "class is used to store and retrieve transactions and locks"),
- TXN_TIMEOUT("metastore.txn.timeout", "hive.txn.timeout", 300, TimeUnit.SECONDS,
- "time after which transactions are declared aborted if the client has not sent a heartbeat."),
- URI_RESOLVER("metastore.uri.resolver", "hive.metastore.uri.resolver", "",
- "If set, fully qualified class name of resolver for hive metastore uri's"),
- USERS_IN_ADMIN_ROLE("metastore.users.in.admin.role", "hive.users.in.admin.role", "", false,
- "Comma separated list of users who are in admin role for bootstrapping.\n" +
- "More users can be added in ADMIN role later."),
- USE_SSL("metastore.use.SSL", "hive.metastore.use.SSL", false,
- "Set this to true for using SSL encryption in HMS server."),
- USE_THRIFT_SASL("metastore.sasl.enabled", "hive.metastore.sasl.enabled", false,
- "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."),
- USE_THRIFT_FRAMED_TRANSPORT("metastore.thrift.framed.transport.enabled",
- "hive.metastore.thrift.framed.transport.enabled", false,
- "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."),
- USE_THRIFT_COMPACT_PROTOCOL("metastore.thrift.compact.protocol.enabled",
- "hive.metastore.thrift.compact.protocol.enabled", false,
- "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" +
- "Setting it to true will break compatibility with older clients running TBinaryProtocol."),
- VALIDATE_COLUMNS("datanucleus.schema.validateColumns", "datanucleus.schema.validateColumns", false,
- "validates existing schema against code. turn this on if you want to verify existing schema"),
- VALIDATE_CONSTRAINTS("datanucleus.schema.validateConstraints",
- "datanucleus.schema.validateConstraints", false,
- "validates existing schema against code. turn this on if you want to verify existing schema"),
- VALIDATE_TABLES("datanucleus.schema.validateTables",
- "datanucleus.schema.validateTables", false,
- "validates existing schema against code. turn this on if you want to verify existing schema"),
- WAREHOUSE("metastore.warehouse.dir", "hive.metastore.warehouse.dir", "/user/hive/warehouse",
- "location of default database for the warehouse"),
- WAREHOUSE_EXTERNAL("metastore.warehouse.external.dir",
- "hive.metastore.warehouse.external.dir", "",
- "Default location for external tables created in the warehouse. " +
- "If not set or null, then the normal warehouse location will be used as the default location."),
- WRITE_SET_REAPER_INTERVAL("metastore.writeset.reaper.interval",
- "hive.writeset.reaper.interval", 60, TimeUnit.SECONDS,
- "Frequency of WriteSet reaper runs"),
- WM_DEFAULT_POOL_SIZE("metastore.wm.default.pool.size",
- "hive.metastore.wm.default.pool.size", 4,
- "The size of a default pool to create when creating an empty resource plan;\n" +
- "If not positive, no default pool will be created."),
- RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size",
- "metastore.rawstore.batch.size", -1,
- "Batch size for partition and other object retrieval from the underlying DB in JDO.\n" +
- "The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" +
- "too large. Use this parameter to break the query into multiple batches. -1 means no batching."),
-
- // Hive values we have copied and use as is
- // These two are used to indicate that we are running tests
- HIVE_IN_TEST("hive.in.test", "hive.in.test", false, "internal usage only, true in test mode"),
- HIVE_IN_TEZ_TEST("hive.in.tez.test", "hive.in.tez.test", false,
- "internal use only, true when in testing tez"),
- // We need to track this as some listeners pass it through our config and we need to honor
- // the system properties.
- HIVE_AUTHORIZATION_MANAGER("hive.security.authorization.manager",
- "hive.security.authorization.manager",
- "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory",
- "The Hive client authorization manager class name. The user defined authorization class should implement \n" +
- "interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider."),
- HIVE_METASTORE_AUTHENTICATOR_MANAGER("hive.security.metastore.authenticator.manager",
- "hive.security.metastore.authenticator.manager",
- "org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator",
- "authenticator manager class name to be used in the metastore for authentication. \n" +
- "The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider."),
- HIVE_METASTORE_AUTHORIZATION_AUTH_READS("hive.security.metastore.authorization.auth.reads",
- "hive.security.metastore.authorization.auth.reads", true,
- "If this is true, metastore authorizer authorizes read actions on database, table"),
- // The metastore shouldn't care what txn manager Hive is running, but in various tests it
- // needs to set these values. We should do the work to detangle this.
- HIVE_TXN_MANAGER("hive.txn.manager", "hive.txn.manager",
- "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager",
- "Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" +
- "transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" +
- "hive.compactor.worker.threads, hive.support.concurrency (true),\n" +
- "and hive.exec.dynamic.partition.mode (nonstrict).\n" +
- "The default DummyTxnManager replicates pre-Hive-0.13 behavior and provides\n" +
- "no transactions."),
- // Metastore always support concurrency, but certain ACID tests depend on this being set. We
- // need to do the work to detangle this
- HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", "hive.support.concurrency", false,
- "Whether Hive supports concurrency control or not. \n" +
- "A ZooKeeper instance must be up and running when using zookeeper Hive lock manager "),
-
- // Deprecated Hive values that we are keeping for backwards compatibility.
- @Deprecated
- HIVE_CODAHALE_METRICS_REPORTER_CLASSES("hive.service.metrics.codahale.reporter.classes",
- "hive.service.metrics.codahale.reporter.classes", "",
- "Use METRICS_REPORTERS instead. Comma separated list of reporter implementation classes " +
- "for metric class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics. Overrides "
- + "HIVE_METRICS_REPORTER conf if present. This will be overridden by " +
- "METRICS_REPORTERS if it is present"),
- @Deprecated
- HIVE_METRICS_REPORTER("hive.service.metrics.reporter", "hive.service.metrics.reporter", "",
- "Reporter implementations for metric class "
- + "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics;" +
- "Deprecated, use METRICS_REPORTERS instead. This configuraiton will be"
- + " overridden by HIVE_CODAHALE_METRICS_REPORTER_CLASSES and METRICS_REPORTERS if " +
- "present. Comma separated list of JMX, CONSOLE, JSON_FILE, HADOOP2"),
-
- // These are all values that we put here just for testing
- STR_TEST_ENTRY("test.str", "hive.test.str", "defaultval", "comment"),
- STR_SET_ENTRY("test.str.set", "hive.test.str.set", "a", new StringSetValidator("a", "b", "c"), ""),
- STR_LIST_ENTRY("test.str.list", "hive.test.str.list", "a,b,c",
- "no comment"),
- LONG_TEST_ENTRY("test.long", "hive.test.long", 42, "comment"),
- DOUBLE_TEST_ENTRY("test.double", "hive.test.double", 3.141592654, "comment"),
- TIME_TEST_ENTRY("test.time", "hive.test.time", 1, TimeUnit.SECONDS, "comment"),
- TIME_VALIDATOR_ENTRY_INCLUSIVE("test.time.validator.inclusive", "hive.test.time.validator.inclusive", 1,
- TimeUnit.SECONDS,
- new TimeValidator(TimeUnit.MILLISECONDS, 500L, true, 1500L, true), "comment"),
- TIME_VALIDATOR_ENTRY_EXCLUSIVE("test.time.validator.exclusive", "hive.test.time.validator.exclusive", 1,
- TimeUnit.SECONDS,
- new TimeValidator(TimeUnit.MILLISECONDS, 500L, false, 1500L, false), "comment"),
- BOOLEAN_TEST_ENTRY("test.bool", "hive.test.bool", true, "comment"),
- CLASS_TEST_ENTRY("test.class", "hive.test.class", "", "comment");
-
- private final String varname;
- private final String hiveName;
- private final Object defaultVal;
- private final Validator validator;
- private final boolean caseSensitive;
- private final String description;
-
- ConfVars(String varname, String hiveName, String defaultVal, String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- validator = null;
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, String defaultVal, Validator validator,
- String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- this.validator = validator;
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, String defaultVal, boolean caseSensitive,
- String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- validator = null;
- this.caseSensitive = caseSensitive;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, long defaultVal, String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- validator = null;
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, long defaultVal, Validator validator,
- String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- this.validator = validator;
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, boolean defaultVal, String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- validator = null;
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, double defaultVal, String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = defaultVal;
- validator = null;
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, long defaultVal, TimeUnit unit, String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = new TimeValue(defaultVal, unit);
- validator = new TimeValidator(unit);
- caseSensitive = false;
- this.description = description;
- }
-
- ConfVars(String varname, String hiveName, long defaultVal, TimeUnit unit,
- Validator validator, String description) {
- this.varname = varname;
- this.hiveName = hiveName;
- this.defaultVal = new TimeValue(defaultVal, unit);
- this.validator = validator;
- caseSensitive = false;
- this.description = description;
- }
-
- public void validate(String value) throws IllegalArgumentException {
- if (validator != null) {
- validator.validate(value);
- }
- }
-
- public boolean isCaseSensitive() {
- return caseSensitive;
- }
-
- /**
- * If you are calling this, you're probably doing it wrong. You shouldn't need to use the
- * underlying variable name. Use one of the getVar methods instead. Only use this if you
- * are 100% sure you know you're doing. The reason for this is that MetastoreConf goes to a
- * lot of trouble to make sure it checks both Hive and Metastore values for config keys. If
- * you call {@link Configuration#get(String)} you are undermining that.
- * @return variable name
- */
- public String getVarname() {
- return varname;
- }
-
- /**
- * Use this method if you need to set a system property and are going to instantiate the
- * configuration file via HiveConf. This is because HiveConf only looks for values it knows,
- * so it will miss all of the metastore.* ones. Do not use this to explicitly set or get the
- * underlying config value unless you are 100% sure you know what you're doing.
- * The reason for this is that MetastoreConf goes to a
- * lot of trouble to make sure it checks both Hive and Metastore values for config keys. If
- * you call {@link Configuration#get(String)} you are undermining that.
- * @return hive.* configuration key
- */
- public String getHiveName() {
- return hiveName;
- }
-
- public Object getDefaultVal() {
- return defaultVal;
- }
-
- public String getDescription() {
- return description;
- }
-
- /**
- * This is useful if you need the variable name for a LOG message or
- * {@link System#setProperty(String, String)}, beware however that you should only use this
- * with setProperty if you're going to create a configuration via
- * {@link MetastoreConf#newMetastoreConf()}. If you are going to create it with HiveConf,
- * then use {@link #getHiveName()}.
- * @return metastore.* configuration key
- */
- @Override
- public String toString() {
- return varname;
- }
- }
-
- public static final ConfVars[] dataNucleusAndJdoConfs = {
- ConfVars.AUTO_CREATE_ALL,
- ConfVars.CONNECTION_DRIVER,
- ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS,
- ConfVars.CONNECTION_POOLING_TYPE,
- ConfVars.CONNECT_URL_KEY,
- ConfVars.CONNECTION_USER_NAME,
- ConfVars.DATANUCLEUS_AUTOSTART,
- ConfVars.DATANUCLEUS_CACHE_LEVEL2,
- ConfVars.DATANUCLEUS_CACHE_LEVEL2_TYPE,
- ConfVars.DATANUCLEUS_INIT_COL_INFO,
- ConfVars.DATANUCLEUS_PLUGIN_REGISTRY_BUNDLE_CHECK,
- ConfVars.DATANUCLEUS_TRANSACTION_ISOLATION,
- ConfVars.DATANUCLEUS_USE_LEGACY_VALUE_STRATEGY,
- ConfVars.DETACH_ALL_ON_COMMIT,
- ConfVars.IDENTIFIER_FACTORY,
- ConfVars.MANAGER_FACTORY_CLASS,
- ConfVars.MULTITHREADED,
- ConfVars.NON_TRANSACTIONAL_READ,
- ConfVars.PWD,
- ConfVars.STORE_MANAGER_TYPE,
- ConfVars.VALIDATE_COLUMNS,
- ConfVars.VALIDATE_CONSTRAINTS,
- ConfVars.VALIDATE_TABLES
- };
-
- // Make sure no one calls this
- private MetastoreConf() {
- throw new RuntimeException("You should never be creating one of these!");
- }
-
- public static void setHiveSiteLocation(URL location) {
- hiveSiteURL = location;
- }
-
- public static Configuration newMetastoreConf() {
- return newMetastoreConf(new Configuration());
- }
-
- public static Configuration newMetastoreConf(Configuration conf) {
-
- ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
- if (classLoader == null) {
- classLoader = MetastoreConf.class.getClassLoader();
- }
- // We don't add this to the resources because we don't want to read config values from it.
- // But we do find it because we want to remember where it is for later in case anyone calls
- // getHiveDefaultLocation().
- hiveDefaultURL = classLoader.getResource("hive-default.xml");
-
- // Add in hive-site.xml. We add this first so that it gets overridden by the new metastore
- // specific files if they exist.
- if(hiveSiteURL == null) {
- /*
- * this 'if' is pretty lame - QTestUtil.QTestUtil() uses hiveSiteURL to load a specific
- * hive-site.xml from data/conf/<subdir> so this makes it follow the same logic - otherwise
- * HiveConf and MetastoreConf may load different hive-site.xml ( For example,
- * HiveConf uses data/conf/spark/hive-site.xml and MetastoreConf data/conf/hive-site.xml)
- */
- hiveSiteURL = findConfigFile(classLoader, "hive-site.xml");
- }
- if (hiveSiteURL != null) {
- conf.addResource(hiveSiteURL);
- }
-
- // Now add hivemetastore-site.xml. Again we add this before our own config files so that the
- // newer overrides the older.
- hiveMetastoreSiteURL = findConfigFile(classLoader, "hivemetastore-site.xml");
- if (hiveMetastoreSiteURL != null) {
- conf.addResource(hiveMetastoreSiteURL);
- }
-
- // Add in our conf file
- metastoreSiteURL = findConfigFile(classLoader, "metastore-site.xml");
- if (metastoreSiteURL != null) {
- conf.addResource(metastoreSiteURL);
- }
-
- // If a system property that matches one of our conf value names is set then use the value
- // it's set to to set our own conf value.
- for (ConfVars var : ConfVars.values()) {
- if (System.getProperty(var.varname) != null) {
- LOG.debug("Setting conf value " + var.varname + " using value " +
- System.getProperty(var.varname));
- conf.set(var.varname, System.getProperty(var.varname));
- }
- }
-
- // Pick up any system properties that start with "hive." and set them in our config. This
- // way we can properly pull any Hive values from the environment without needing to know all
- // of the Hive config values.
- System.getProperties().stringPropertyNames().stream()
- .filter(s -> s.startsWith("hive."))
- .forEach(s -> {
- String v = System.getProperty(s);
- LOG.debug("Picking up system property " + s + " with value " + v);
- conf.set(s, v);
- });
-
- // If we are going to validate the schema, make sure we don't create it
- if (getBoolVar(conf, ConfVars.SCHEMA_VERIFICATION)) {
- setBoolVar(conf, ConfVars.AUTO_CREATE_ALL, false);
- }
-
- if (!beenDumped.getAndSet(true) && getBoolVar(conf, ConfVars.DUMP_CONFIG_ON_CREATION) &&
- LOG.isDebugEnabled()) {
- LOG.debug(dumpConfig(conf));
- }
- return conf;
- }
-
- private static URL findConfigFile(ClassLoader classLoader, String name) {
- // First, look in the classpath
- URL result = classLoader.getResource(name);
- if (result == null) {
- // Nope, so look to see if our conf dir has been explicitly set
- result = seeIfConfAtThisLocation("METASTORE_CONF_DIR", name, false);
- if (result == null) {
- // Nope, so look to see if our home dir has been explicitly set
- result = seeIfConfAtThisLocation("METASTORE_HOME", name, true);
- if (result == null) {
- // Nope, so look to see if Hive's conf dir has been explicitly set
- result = seeIfConfAtThisLocation("HIVE_CONF_DIR", name, false);
- if (result == null) {
- // Nope, so look to see if Hive's home dir has been explicitly set
- result = seeIfConfAtThisLocation("HIVE_HOME", name, true);
- if (result == null) {
- // Nope, so look to see if we can find a conf file by finding our jar, going up one
- // directory, and looking for a conf directory.
- URI jarUri = null;
- try {
- jarUri = MetastoreConf.class.getProtectionDomain().getCodeSource().getLocation().toURI();
- } catch (Throwable e) {
- LOG.warn("Cannot get jar URI", e);
- }
- result = seeIfConfAtThisLocation(new File(jarUri).getParent(), name, true);
- // At this point if we haven't found it, screw it, we don't know where it is
- if (result == null) {
- LOG.info("Unable to find config file " + name);
- }
- }
- }
- }
- }
- }
- LOG.info("Found configuration file " + result);
- return result;
- }
-
- private static URL seeIfConfAtThisLocation(String envVar, String name, boolean inConfDir) {
- String path = System.getenv(envVar);
- if (path == null) {
- // Workaround for testing since tests can't set the env vars.
- path = System.getProperty(TEST_ENV_WORKAROUND + envVar);
- }
- if (path != null) {
- String suffix = inConfDir ? "conf" + File.separatorChar + name : name;
- return checkConfigFile(new File(path, suffix));
- }
- return null;
- }
-
- private static URL checkConfigFile(File f) {
- try {
- return (f.exists() && f.isFile()) ? f.toURI().toURL() : null;
- } catch (Throwable e) {
- LOG.warn("Error looking for config " + f, e);
- return null;
- }
- }
-
- // In all of the getters, we try the metastore value name first. If it is not set we try the
- // Hive value name.
-
- /**
- * Get the variable as a string
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @return value, or default value if value not in config file
- */
- public static String getVar(Configuration conf, ConfVars var) {
- assert var.defaultVal.getClass() == String.class;
- String val = conf.get(var.varname);
- return val == null ? conf.get(var.hiveName, (String)var.defaultVal) : val;
- }
-
- /**
- * Get the variable as a string
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @param defaultVal default to return if the variable is unset
- * @return value, or default value passed in if the value is not in the config file
- */
- public static String getVar(Configuration conf, ConfVars var, String defaultVal) {
- assert var.defaultVal.getClass() == String.class;
- String val = conf.get(var.varname);
- return val == null ? conf.get(var.hiveName, defaultVal) : val;
- }
-
- /**
- * Treat a configuration value as a comma separated list.
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @return collection of strings. If the value is unset it will return an empty collection.
- */
- public static Collection<String> getStringCollection(Configuration conf, ConfVars var) {
- assert var.defaultVal.getClass() == String.class;
- String val = conf.get(var.varname);
- if (val == null) {
- val = conf.get(var.hiveName, (String)var.defaultVal);
- }
- if (val == null) {
- return Collections.emptySet();
- }
- return StringUtils.asSet(val.split(","));
- }
-
- /**
- * Set the variable as a string
- * @param conf configuration file to set it in
- * @param var variable to set
- * @param val value to set it to
- */
- public static void setVar(Configuration conf, ConfVars var, String val) {
- assert var.defaultVal.getClass() == String.class;
- conf.set(var.varname, val);
- }
-
- /**
- * Get the variable as a int. Note that all integer valued variables are stored as longs, thus
- * this downcasts from a long to an in.
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @return value, or default value if value not in config file
- */
- public static int getIntVar(Configuration conf, ConfVars var) {
- long val = getLongVar(conf, var);
- assert val <= Integer.MAX_VALUE;
- return (int)val;
- }
-
- /**
- * Get the variable as a long
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @return value, or default value if value not in config file
- */
- public static long getLongVar(Configuration conf, ConfVars var) {
- assert var.defaultVal.getClass() == Long.class;
- String val = conf.get(var.varname);
- return val == null ? conf.getLong(var.hiveName, (Long)var.defaultVal) : Long.valueOf(val);
- }
-
- /**
- * Set the variable as a long
- * @param conf configuration file to set it in
- * @param var variable to set
- * @param val value to set it to
- */
- public static void setLongVar(Configuration conf, ConfVars var, long val) {
- assert var.defaultVal.getClass() == Long.class;
- conf.setLong(var.varname, val);
- }
-
- /**
- * Get the variable as a boolean
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @return value, or default value if value not in config file
- */
- public static boolean getBoolVar(Configuration conf, ConfVars var) {
- assert var.defaultVal.getClass() == Boolean.class;
- String val = conf.get(var.varname);
- return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val);
- }
-
- /**
- * Set the variable as a boolean
- * @param conf configuration file to set it in
- * @param var variable to set
- * @param val value to set it to
- */
- public static void setBoolVar(Configuration conf, ConfVars var, boolean val) {
- assert var.defaultVal.getClass() == Boolean.class;
- conf.setBoolean(var.varname, val);
- }
-
- /**
- * Get the variable as a double
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @return value, or default value if value not in config file
- */
- public static double getDoubleVar(Configuration conf, ConfVars var) {
- assert var.defaultVal.getClass() == Double.class;
- String val = conf.get(var.varname);
- return val == null ? conf.getDouble(var.hiveName, (Double)var.defaultVal) : Double.valueOf(val);
- }
-
- /**
- * Set the variable as a double
- * @param conf configuration file to set it in
- * @param var variable to set
- * @param val value to set it to
- */
- public static void setDoubleVar(Configuration conf, ConfVars var, double val) {
- assert var.defaultVal.getClass() == Double.class;
- conf.setDouble(var.varname, val);
- }
-
- public static long getSizeVar(Configuration conf, ConfVars var) {
- return SizeValidator.toSizeBytes(getVar(conf, var));
- }
-
- /**
- * Get a class instance based on a configuration value
- * @param conf configuration file to retrieve it from
- * @param var variable to retrieve
- * @param defaultValue default class to return if the value isn't set
- * @param xface interface that class must implement
- * @param <I> interface that class implements
- * @return instance of the class
- */
- public static <I> Class<? extends I> getClass(Configuration conf, ConfVars var,
- Class<? extends I> defaultValue,
- Class<I> xface) {
- assert var.defaultVal.getClass() == String.class;
- String val = conf.get(var.varname);
- return val == null ? conf.getClass(var.hiveName, defaultValue, xface) :
- conf.getClass(var.varname, defaultValue, xface);
- }
-
- /**
- * Set the class name in the configuration file
- * @param conf configuration file to set it in
- * @param var variable to set
- * @param theClass the class to set it to
- * @param xface interface that the class implements. I don't know why this is required, but
- * the underlying {@link Configuration#setClass(String, Class, Class)} requires it.
- * @param <I> interface the class implements.
- */
- public static <I> void setClass(Configuration conf, ConfVars var, Class<? extends I> theClass,
- Class<I> xface) {
- assert var.defaultVal.getClass() == String.class;
- conf.setClass(var.varname, theClass, xface);
- }
-
-
-
- /**
- * Get the variable as a long indicating a period of time
- * @param conf configuration to retrieve it from
- * @param var variable to retrieve
- * @param outUnit Timeout to return value in
- * @return value, or default value if value not in config file
- */
- public static long getTimeVar(Configuration conf, ConfVars var, TimeUnit outUnit) {
- assert var.defaultVal.getClass() == TimeValue.class;
- String val = conf.get(var.varname);
-
- if (val == null) {
- // Look for it under the old Hive name
- val = conf.get(var.hiveName);
- }
-
- if (val != null) {
- return convertTimeStr(val, ((TimeValue)var.defaultVal).unit, outUnit);
- } else {
- return outUnit.convert(((TimeValue)var.defaultVal).val, ((TimeValue)var.defaultVal).unit);
- }
- }
-
- /**
- * Set the variable as a string
- * @param conf configuration file to se
<TRUNCATED>