You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2016/09/30 02:14:47 UTC
[30/61] [partial] incubator-impala git commit: IMPALA-3786: Replace
"cloudera" with "apache" (part 1)
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java b/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java
deleted file mode 100644
index 8affb11..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/DatabaseNotFoundException.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-
-/**
- * Thrown when a database cannot be found in the catalog.
- */
-public class DatabaseNotFoundException extends CatalogException {
- // Dummy serial ID to satisfy Eclipse
- private static final long serialVersionUID = -2203080667446640542L;
-
- public DatabaseNotFoundException(String s) { super(s); }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/Db.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Db.java b/fe/src/main/java/com/cloudera/impala/catalog/Db.java
deleted file mode 100644
index a9150fe..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/Db.java
+++ /dev/null
@@ -1,495 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.thrift.protocol.TCompactProtocol;
-import org.apache.thrift.TException;
-import org.apache.thrift.TSerializer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.cloudera.impala.catalog.Function;
-import com.cloudera.impala.common.ImpalaException;
-import com.cloudera.impala.common.ImpalaRuntimeException;
-import com.cloudera.impala.common.JniUtil;
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TDatabase;
-import com.cloudera.impala.thrift.TFunction;
-import com.cloudera.impala.thrift.TFunctionBinaryType;
-import com.cloudera.impala.thrift.TFunctionCategory;
-import com.cloudera.impala.util.PatternMatcher;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-/**
- * Internal representation of db-related metadata. Owned by Catalog instance.
- * Not thread safe.
- *
- * Tables are stored in a map from the table name to the table object. They may
- * be loaded 'eagerly' at construction or 'lazily' on first reference.
- * Tables are accessed via getTable which may trigger a metadata read in two cases:
- * * if the table has never been loaded
- * * if the table loading failed on the previous attempt
- *
- * Native user added functions are persisted to the parameters map of the hive metastore
- * db object corresponding to this instance. This map's key is the function signature and
- * value is the base64 representation of the thrift serialized function object.
- *
- */
-public class Db implements CatalogObject {
- private static final Logger LOG = LoggerFactory.getLogger(Db.class);
- private final Catalog parentCatalog_;
- private final TDatabase thriftDb_;
- private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION;
-
- public static final String FUNCTION_INDEX_PREFIX = "impala_registered_function_";
-
- // Hive metastore imposes a limit of 4000 bytes on the key and value strings
- // in DB parameters map. We need ensure that this limit isn't crossed
- // while serializing functions to the metastore.
- private static final int HIVE_METASTORE_DB_PARAM_LIMIT_BYTES = 4000;
-
- // Table metadata cache.
- private final CatalogObjectCache<Table> tableCache_;
-
- // All of the registered user functions. The key is the user facing name (e.g. "myUdf"),
- // and the values are all the overloaded variants (e.g. myUdf(double), myUdf(string))
- // This includes both UDFs and UDAs. Updates are made thread safe by synchronizing
- // on this map. When a new Db object is initialized, this list is updated with the
- // UDF/UDAs already persisted, if any, in the metastore DB. Functions are sorted in a
- // canonical order defined by FunctionResolutionOrder.
- private final HashMap<String, List<Function>> functions_;
-
- // If true, this database is an Impala system database.
- // (e.g. can't drop it, can't add tables to it, etc).
- private boolean isSystemDb_ = false;
-
- public Db(String name, Catalog catalog,
- org.apache.hadoop.hive.metastore.api.Database msDb) {
- thriftDb_ = new TDatabase(name.toLowerCase());
- parentCatalog_ = catalog;
- thriftDb_.setMetastore_db(msDb);
- tableCache_ = new CatalogObjectCache<Table>();
- functions_ = new HashMap<String, List<Function>>();
- }
-
- public void setIsSystemDb(boolean b) { isSystemDb_ = b; }
-
- /**
- * Creates a Db object with no tables based on the given TDatabase thrift struct.
- */
- public static Db fromTDatabase(TDatabase db, Catalog parentCatalog) {
- return new Db(db.getDb_name(), parentCatalog, db.getMetastore_db());
- }
-
- /**
- * Updates the hms parameters map by adding the input <k,v> pair.
- */
- private void putToHmsParameters(String k, String v) {
- org.apache.hadoop.hive.metastore.api.Database msDb = thriftDb_.metastore_db;
- Preconditions.checkNotNull(msDb);
- Map<String, String> hmsParams = msDb.getParameters();
- if (hmsParams == null) hmsParams = Maps.newHashMap();
- hmsParams.put(k,v);
- msDb.setParameters(hmsParams);
- }
-
- /**
- * Updates the hms parameters map by removing the <k,v> pair corresponding to
- * input key <k>. Returns true if the parameters map contains a pair <k,v>
- * corresponding to input k and it is removed, false otherwise.
- */
- private boolean removeFromHmsParameters(String k) {
- org.apache.hadoop.hive.metastore.api.Database msDb = thriftDb_.metastore_db;
- Preconditions.checkNotNull(msDb);
- if (msDb.getParameters() == null) return false;
- return msDb.getParameters().remove(k) != null;
- }
-
- public boolean isSystemDb() { return isSystemDb_; }
- public TDatabase toThrift() { return thriftDb_; }
- @Override
- public String getName() { return thriftDb_.getDb_name(); }
- @Override
- public TCatalogObjectType getCatalogObjectType() {
- return TCatalogObjectType.DATABASE;
- }
-
- /**
- * Adds a table to the table cache.
- */
- public void addTable(Table table) {
- tableCache_.add(table);
- }
-
- /**
- * Gets all table names in the table cache.
- */
- public List<String> getAllTableNames() {
- return Lists.newArrayList(tableCache_.keySet());
- }
-
- public boolean containsTable(String tableName) {
- return tableCache_.contains(tableName.toLowerCase());
- }
-
- /**
- * Returns the Table with the given name if present in the table cache or null if the
- * table does not exist in the cache.
- */
- public Table getTable(String tblName) {
- return tableCache_.get(tblName);
- }
-
- /**
- * Removes the table name and any cached metadata from the Table cache.
- */
- public Table removeTable(String tableName) {
- return tableCache_.remove(tableName.toLowerCase());
- }
-
- /**
- * Comparator that sorts function overloads. We want overloads to be always considered
- * in a canonical order so that overload resolution in the case of multiple valid
- * overloads does not depend on the order in which functions are added to the Db. The
- * order is based on the PrimitiveType enum because this was the order used implicitly
- * for builtin operators and functions in earlier versions of Impala.
- */
- private static class FunctionResolutionOrder implements Comparator<Function> {
- @Override
- public int compare(Function f1, Function f2) {
- int numSharedArgs = Math.min(f1.getNumArgs(), f2.getNumArgs());
- for (int i = 0; i < numSharedArgs; ++i) {
- int cmp = typeCompare(f1.getArgs()[i], f2.getArgs()[i]);
- if (cmp < 0) {
- return -1;
- } else if (cmp > 0) {
- return 1;
- }
- }
- // Put alternative with fewer args first.
- if (f1.getNumArgs() < f2.getNumArgs()) {
- return -1;
- } else if (f1.getNumArgs() > f2.getNumArgs()) {
- return 1;
- }
- return 0;
- }
-
- private int typeCompare(Type t1, Type t2) {
- Preconditions.checkState(!t1.isComplexType());
- Preconditions.checkState(!t2.isComplexType());
- return Integer.compare(t1.getPrimitiveType().ordinal(),
- t2.getPrimitiveType().ordinal());
- }
- }
-
- private static final FunctionResolutionOrder FUNCTION_RESOLUTION_ORDER =
- new FunctionResolutionOrder();
-
- /**
- * Returns the metastore.api.Database object this Database was created from.
- * Returns null if it is not related to a hive database such as builtins_db.
- */
- public org.apache.hadoop.hive.metastore.api.Database getMetaStoreDb() {
- return thriftDb_.getMetastore_db();
- }
-
- /**
- * Returns the number of functions in this database.
- */
- public int numFunctions() {
- synchronized (functions_) {
- return functions_.size();
- }
- }
-
- /**
- * See comment in Catalog.
- */
- public boolean containsFunction(String name) {
- synchronized (functions_) {
- return functions_.get(name) != null;
- }
- }
-
- /*
- * See comment in Catalog.
- */
- public Function getFunction(Function desc, Function.CompareMode mode) {
- synchronized (functions_) {
- List<Function> fns = functions_.get(desc.functionName());
- if (fns == null) return null;
-
- // First check for identical
- for (Function f: fns) {
- if (f.compare(desc, Function.CompareMode.IS_IDENTICAL)) return f;
- }
- if (mode == Function.CompareMode.IS_IDENTICAL) return null;
-
- // Next check for indistinguishable
- for (Function f: fns) {
- if (f.compare(desc, Function.CompareMode.IS_INDISTINGUISHABLE)) return f;
- }
- if (mode == Function.CompareMode.IS_INDISTINGUISHABLE) return null;
-
- // Next check for strict supertypes
- for (Function f: fns) {
- if (f.compare(desc, Function.CompareMode.IS_SUPERTYPE_OF)) return f;
- }
- if (mode == Function.CompareMode.IS_SUPERTYPE_OF) return null;
-
- // Finally check for non-strict supertypes
- for (Function f: fns) {
- if (f.compare(desc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF)) return f;
- }
- }
- return null;
- }
-
- public Function getFunction(String signatureString) {
- synchronized (functions_) {
- for (List<Function> fns: functions_.values()) {
- for (Function f: fns) {
- if (f.signatureString().equals(signatureString)) return f;
- }
- }
- }
- return null;
- }
-
- /**
- * Adds the user defined function fn to metastore DB params. fn is
- * serialized to thrift using TBinaryProtocol and then base64-encoded
- * to be compatible with the HMS' representation of params.
- */
- private boolean addFunctionToDbParams(Function fn) {
- Preconditions.checkState(
- fn.getBinaryType() != TFunctionBinaryType.BUILTIN &&
- fn.getBinaryType() != TFunctionBinaryType.JAVA);
- try {
- TSerializer serializer =
- new TSerializer(new TCompactProtocol.Factory());
- byte[] serializedFn = serializer.serialize(fn.toThrift());
- String base64Fn = Base64.encodeBase64String(serializedFn);
- String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString();
- if (base64Fn.length() > HIVE_METASTORE_DB_PARAM_LIMIT_BYTES) {
- throw new ImpalaRuntimeException(
- "Serialized function size exceeded HMS 4K byte limit");
- }
- putToHmsParameters(fnKey, base64Fn);
- } catch (ImpalaException | TException e) {
- LOG.error("Error adding function " + fn.getName() + " to DB params", e);
- return false;
- }
- return true;
- }
-
- public boolean addFunction(Function fn) {
- // We use the db parameters map to persist native and IR functions.
- boolean addToDbParams =
- (fn.getBinaryType() == TFunctionBinaryType.NATIVE ||
- fn.getBinaryType() == TFunctionBinaryType.IR);
- return addFunction(fn, addToDbParams);
- }
-
- /**
- * Registers the function fn to this database. If addToDbParams is true,
- * fn is added to the metastore DB params. Returns false if the function
- * fn already exists or when a failure is encountered while adding it to
- * the metastore DB params and true otherwise.
- */
- public boolean addFunction(Function fn, boolean addToDbParams) {
- Preconditions.checkState(fn.dbName().equals(getName()));
- synchronized (functions_) {
- if (getFunction(fn, Function.CompareMode.IS_INDISTINGUISHABLE) != null) {
- return false;
- }
- List<Function> fns = functions_.get(fn.functionName());
- if (fns == null) {
- fns = Lists.newArrayList();
- functions_.put(fn.functionName(), fns);
- }
- if (addToDbParams && !addFunctionToDbParams(fn)) return false;
- fns.add(fn);
- Collections.sort(fns, FUNCTION_RESOLUTION_ORDER);
- return true;
- }
- }
-
- /**
- * See comment in Catalog.
- */
- public Function removeFunction(Function desc) {
- synchronized (functions_) {
- Function fn = getFunction(desc, Function.CompareMode.IS_INDISTINGUISHABLE);
- if (fn == null) return null;
- List<Function> fns = functions_.get(desc.functionName());
- Preconditions.checkNotNull(fns);
- fns.remove(fn);
- if (fns.isEmpty()) functions_.remove(desc.functionName());
- if (fn.getBinaryType() == TFunctionBinaryType.JAVA) return fn;
- // Remove the function from the metastore database parameters
- String fnKey = FUNCTION_INDEX_PREFIX + fn.signatureString();
- boolean removeFn = removeFromHmsParameters(fnKey);
- Preconditions.checkState(removeFn);
- return fn;
- }
- }
-
- /**
- * Removes a Function with the matching signature string. Returns the removed Function
- * if a Function was removed as a result of this call, null otherwise.
- * TODO: Move away from using signature strings and instead use Function IDs.
- */
- public Function removeFunction(String signatureStr) {
- synchronized (functions_) {
- Function targetFn = getFunction(signatureStr);
- if (targetFn != null) return removeFunction(targetFn);
- }
- return null;
- }
-
- /**
- * Add a builtin with the specified name and signatures to this db.
- * This defaults to not using a Prepare/Close function.
- */
- public void addScalarBuiltin(String fnName, String symbol, boolean userVisible,
- boolean varArgs, Type retType, Type ... args) {
- addScalarBuiltin(fnName, symbol, userVisible, null, null, varArgs, retType, args);
- }
-
- /**
- * Add a builtin with the specified name and signatures to this db.
- */
- public void addScalarBuiltin(String fnName, String symbol, boolean userVisible,
- String prepareFnSymbol, String closeFnSymbol, boolean varArgs, Type retType,
- Type ... args) {
- Preconditions.checkState(isSystemDb());
- addBuiltin(ScalarFunction.createBuiltin(
- fnName, Lists.newArrayList(args), varArgs, retType,
- symbol, prepareFnSymbol, closeFnSymbol, userVisible));
- }
-
- /**
- * Adds a builtin to this database. The function must not already exist.
- */
- public void addBuiltin(Function fn) {
- Preconditions.checkState(isSystemDb());
- Preconditions.checkState(fn != null);
- Preconditions.checkState(getFunction(fn, Function.CompareMode.IS_IDENTICAL) == null);
- addFunction(fn, false);
- }
-
- /**
- * Returns a map of functionNames to list of (overloaded) functions with that name.
- * This is not thread safe so a higher level lock must be taken while iterating
- * over the returned functions.
- */
- protected HashMap<String, List<Function>> getAllFunctions() {
- return functions_;
- }
-
- /**
- * Returns a list of transient functions in this Db.
- */
- protected List<Function> getTransientFunctions() {
- List<Function> result = Lists.newArrayList();
- synchronized (functions_) {
- for (String fnKey: functions_.keySet()) {
- for (Function fn: functions_.get(fnKey)) {
- if (fn.userVisible() && !fn.isPersistent()) {
- result.add(fn);
- }
- }
- }
- }
- return result;
- }
-
- /**
- * Returns all functions that match the pattern of 'matcher'.
- */
- public List<Function> getFunctions(TFunctionCategory category,
- PatternMatcher matcher) {
- Preconditions.checkNotNull(matcher);
- List<Function> result = Lists.newArrayList();
- synchronized (functions_) {
- for (Map.Entry<String, List<Function>> fns: functions_.entrySet()) {
- if (!matcher.matches(fns.getKey())) continue;
- for (Function fn: fns.getValue()) {
- if ((category == null || Function.categoryMatch(fn, category))
- && fn.userVisible()) {
- result.add(fn);
- }
- }
- }
- }
- return result;
- }
-
- /**
- * Returns all functions with the given name
- */
- public List<Function> getFunctions(String name) {
- List<Function> result = Lists.newArrayList();
- Preconditions.checkNotNull(name);
- synchronized (functions_) {
- if (!functions_.containsKey(name)) return result;
- for (Function fn: functions_.get(name)) {
- if (fn.userVisible()) result.add(fn);
- }
- }
- return result;
- }
-
- /**
- * Returns all functions with the given name and category.
- */
- public List<Function> getFunctions(TFunctionCategory category, String name) {
- List<Function> result = Lists.newArrayList();
- Preconditions.checkNotNull(category);
- Preconditions.checkNotNull(name);
- synchronized (functions_) {
- if (!functions_.containsKey(name)) return result;
- for (Function fn: functions_.get(name)) {
- if (fn.userVisible() && Function.categoryMatch(fn, category)) {
- result.add(fn);
- }
- }
- }
- return result;
- }
-
- @Override
- public long getCatalogVersion() { return catalogVersion_; }
- @Override
- public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; }
- public Catalog getParentCatalog() { return parentCatalog_; }
-
- @Override
- public boolean isLoaded() { return true; }
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/Function.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Function.java b/fe/src/main/java/com/cloudera/impala/catalog/Function.java
deleted file mode 100644
index 406e958..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/Function.java
+++ /dev/null
@@ -1,488 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import java.util.List;
-
-import com.cloudera.impala.analysis.FunctionName;
-import com.cloudera.impala.analysis.HdfsUri;
-import com.cloudera.impala.common.AnalysisException;
-import com.cloudera.impala.common.InternalException;
-import com.cloudera.impala.service.FeSupport;
-import com.cloudera.impala.thrift.TAggregateFunction;
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TColumnType;
-import com.cloudera.impala.thrift.TFunction;
-import com.cloudera.impala.thrift.TFunctionBinaryType;
-import com.cloudera.impala.thrift.TFunctionCategory;
-import com.cloudera.impala.thrift.TScalarFunction;
-import com.cloudera.impala.thrift.TSymbolLookupParams;
-import com.cloudera.impala.thrift.TSymbolLookupResult;
-import com.cloudera.impala.thrift.TSymbolType;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-
-/**
- * Base class for all functions.
- * Each function can be of the following 4 types.
- * - Native/IR stored in db params (persisted, visible to Impala)
- * - Hive UDFs stored in the HMS (visible to Hive + Impala)
- * - Java UDFs which are not persisted (visible to Impala but not Hive)
- * - Builtin functions, which are recreated after every restart of the
- * catalog. (persisted, visible to Impala)
- */
-public class Function implements CatalogObject {
- // Enum for how to compare function signatures.
- // For decimal types, the type in the function can be a wildcard, i.e. decimal(*,*).
- // The wildcard can *only* exist as function type, the caller will always be a
- // fully specified decimal.
- // For the purposes of function type resolution, decimal(*,*) will match exactly
- // with any fully specified decimal (i.e. fn(decimal(*,*)) matches identically for
- // the call to fn(decimal(1,0)).
- public enum CompareMode {
- // Two signatures are identical if the number of arguments and their types match
- // exactly and either both signatures are varargs or neither.
- IS_IDENTICAL,
-
- // Two signatures are indistinguishable if there is no way to tell them apart
- // when matching a particular instantiation. That is, their fixed arguments
- // match exactly and the remaining varargs have the same type.
- // e.g. fn(int, int, int) and fn(int...)
- // Argument types that are NULL are ignored when doing this comparison.
- // e.g. fn(NULL, int) is indistinguishable from fn(int, int)
- IS_INDISTINGUISHABLE,
-
- // X is a supertype of Y if Y.arg[i] can be strictly implicitly cast to X.arg[i]. If
- /// X has vargs, the remaining arguments of Y must be strictly implicitly castable
- // to the var arg type. The key property this provides is that X can be used in place
- // of Y. e.g. fn(int, double, string...) is a supertype of fn(tinyint, float, string,
- // string)
- IS_SUPERTYPE_OF,
-
- // Nonstrict supertypes broaden the definition of supertype to accept implicit casts
- // of arguments that may result in loss of precision - e.g. decimal to float.
- IS_NONSTRICT_SUPERTYPE_OF,
- }
-
- // User specified function name e.g. "Add"
- private FunctionName name_;
-
- private final Type retType_;
- // Array of parameter types. empty array if this function does not have parameters.
- private Type[] argTypes_;
-
- // If true, this function has variable arguments.
- // TODO: we don't currently support varargs with no fixed types. i.e. fn(...)
- private boolean hasVarArgs_;
-
- // If true (default), this function is called directly by the user. For operators,
- // this is false. If false, it also means the function is not visible from
- // 'show functions'.
- private boolean userVisible_;
-
- // Absolute path in HDFS for the binary that contains this function.
- // e.g. /udfs/udfs.jar
- private HdfsUri location_;
- private TFunctionBinaryType binaryType_;
-
- // Set to true for functions that survive service restarts, including all builtins,
- // native and IR functions, but only Java functions created without a signature.
- private boolean isPersistent_;
- private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION;
-
- public Function(FunctionName name, Type[] argTypes,
- Type retType, boolean varArgs) {
- this.name_ = name;
- this.hasVarArgs_ = varArgs;
- if (argTypes == null) {
- argTypes_ = new Type[0];
- } else {
- this.argTypes_ = argTypes;
- }
- if (retType == null) {
- this.retType_ = ScalarType.INVALID;
- } else {
- this.retType_ = retType;
- }
- this.userVisible_ = true;
- }
-
- public Function(FunctionName name, List<Type> args,
- Type retType, boolean varArgs) {
- this(name, (Type[])null, retType, varArgs);
- if (args != null && args.size() > 0) {
- argTypes_ = args.toArray(new Type[args.size()]);
- } else {
- argTypes_ = new Type[0];
- }
- }
-
- /**
- * Static helper method to create a function with a given TFunctionBinaryType.
- */
- public static Function createFunction(String db, String fnName, List<Type> args,
- Type retType, boolean varArgs, TFunctionBinaryType fnType) {
- Function fn =
- new Function(new FunctionName(db, fnName), args, retType, varArgs);
- fn.setBinaryType(fnType);
- return fn;
- }
-
- public FunctionName getFunctionName() { return name_; }
- public String functionName() { return name_.getFunction(); }
- public String dbName() { return name_.getDb(); }
- public Type getReturnType() { return retType_; }
- public Type[] getArgs() { return argTypes_; }
- // Returns the number of arguments to this function.
- public int getNumArgs() { return argTypes_.length; }
- public HdfsUri getLocation() { return location_; }
- public TFunctionBinaryType getBinaryType() { return binaryType_; }
- public boolean hasVarArgs() { return hasVarArgs_; }
- public boolean isPersistent() { return isPersistent_; }
- public boolean userVisible() { return userVisible_; }
- public Type getVarArgsType() {
- if (!hasVarArgs_) return Type.INVALID;
- Preconditions.checkState(argTypes_.length > 0);
- return argTypes_[argTypes_.length - 1];
- }
-
- public void setName(FunctionName name) { name_ = name; }
- public void setLocation(HdfsUri loc) { location_ = loc; }
- public void setBinaryType(TFunctionBinaryType type) { binaryType_ = type; }
- public void setHasVarArgs(boolean v) { hasVarArgs_ = v; }
- public void setIsPersistent(boolean v) { isPersistent_ = v; }
- public void setUserVisible(boolean b) { userVisible_ = b; }
-
- // Returns a string with the signature in human readable format:
- // FnName(argtype1, argtyp2). e.g. Add(int, int)
- public String signatureString() {
- StringBuilder sb = new StringBuilder();
- sb.append(name_.getFunction())
- .append("(")
- .append(Joiner.on(", ").join(argTypes_));
- if (hasVarArgs_) sb.append("...");
- sb.append(")");
- return sb.toString();
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof Function)) return false;
- return compare((Function)o, CompareMode.IS_IDENTICAL);
- }
-
- // Compares this to 'other' for mode.
- public boolean compare(Function other, CompareMode mode) {
- switch (mode) {
- case IS_IDENTICAL: return isIdentical(other);
- case IS_INDISTINGUISHABLE: return isIndistinguishable(other);
- case IS_SUPERTYPE_OF: return isSuperTypeOf(other, true);
- case IS_NONSTRICT_SUPERTYPE_OF: return isSuperTypeOf(other, false);
- default:
- Preconditions.checkState(false);
- return false;
- }
- }
- /**
- * Returns true if 'this' is a supertype of 'other'. Each argument in other must
- * be implicitly castable to the matching argument in this. If strict is true,
- * only consider conversions where there is no loss of precision.
- */
- private boolean isSuperTypeOf(Function other, boolean strict) {
- if (!other.name_.equals(name_)) return false;
- if (!this.hasVarArgs_ && other.argTypes_.length != this.argTypes_.length) {
- return false;
- }
- if (this.hasVarArgs_ && other.argTypes_.length < this.argTypes_.length) return false;
- for (int i = 0; i < this.argTypes_.length; ++i) {
- if (!Type.isImplicitlyCastable(other.argTypes_[i], this.argTypes_[i], strict)) {
- return false;
- }
- }
- // Check trailing varargs.
- if (this.hasVarArgs_) {
- for (int i = this.argTypes_.length; i < other.argTypes_.length; ++i) {
- if (other.argTypes_[i].matchesType(this.getVarArgsType())) continue;
- if (!Type.isImplicitlyCastable(other.argTypes_[i], this.getVarArgsType(),
- strict)) {
- return false;
- }
- }
- }
- return true;
- }
-
- /**
- * Converts any CHAR arguments to be STRING arguments
- */
- public Function promoteCharsToStrings() {
- Type[] promoted = argTypes_.clone();
- for (int i = 0; i < promoted.length; ++i) {
- if (promoted[i].isScalarType(PrimitiveType.CHAR)) promoted[i] = ScalarType.STRING;
- }
- return new Function(name_, promoted, retType_, hasVarArgs_);
- }
-
- /**
- * Given a list of functions which are a super type of this function, select the best
- * match. This is the one which requires the fewest type promotions.
- */
- public Function selectClosestSuperType(List<Function> candidates) {
- Preconditions.checkArgument(candidates.size() > 0);
- if (candidates.size() == 1) return candidates.get(0);
-
- // Always promote CHAR to STRING before attempting any other promotions.
- Function withStrs = promoteCharsToStrings();
- for (Function f: candidates) {
- if (withStrs.isIndistinguishable(f)) return f;
- }
- // Otherwise, we use the previous rules of resolution which are to take the first
- // one in the list.
- return candidates.get(0);
- }
-
- private boolean isIdentical(Function o) {
- if (!o.name_.equals(name_)) return false;
- if (o.argTypes_.length != this.argTypes_.length) return false;
- if (o.hasVarArgs_ != this.hasVarArgs_) return false;
- for (int i = 0; i < this.argTypes_.length; ++i) {
- if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false;
- }
- return true;
- }
-
- private boolean isIndistinguishable(Function o) {
- if (!o.name_.equals(name_)) return false;
- int minArgs = Math.min(o.argTypes_.length, this.argTypes_.length);
- // The first fully specified args must be identical.
- for (int i = 0; i < minArgs; ++i) {
- if (o.argTypes_[i].isNull() || this.argTypes_[i].isNull()) continue;
- if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false;
- }
- if (o.argTypes_.length == this.argTypes_.length) return true;
-
- if (o.hasVarArgs_ && this.hasVarArgs_) {
- if (!o.getVarArgsType().matchesType(this.getVarArgsType())) return false;
- if (this.getNumArgs() > o.getNumArgs()) {
- for (int i = minArgs; i < this.getNumArgs(); ++i) {
- if (this.argTypes_[i].isNull()) continue;
- if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false;
- }
- } else {
- for (int i = minArgs; i < o.getNumArgs(); ++i) {
- if (o.argTypes_[i].isNull()) continue;
- if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false;
- }
- }
- return true;
- } else if (o.hasVarArgs_) {
- // o has var args so check the remaining arguments from this
- if (o.getNumArgs() > minArgs) return false;
- for (int i = minArgs; i < this.getNumArgs(); ++i) {
- if (this.argTypes_[i].isNull()) continue;
- if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false;
- }
- return true;
- } else if (this.hasVarArgs_) {
- // this has var args so check the remaining arguments from s
- if (this.getNumArgs() > minArgs) return false;
- for (int i = minArgs; i < o.getNumArgs(); ++i) {
- if (o.argTypes_[i].isNull()) continue;
- if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false;
- }
- return true;
- } else {
- // Neither has var args and the lengths don't match
- return false;
- }
- }
-
- @Override
- public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.FUNCTION; }
-
- @Override
- public long getCatalogVersion() { return catalogVersion_; }
-
- @Override
- public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; }
-
- @Override
- public String getName() { return getFunctionName().toString(); }
-
- // Child classes must override this function.
- public String toSql(boolean ifNotExists) { return ""; }
-
- public TFunction toThrift() {
- TFunction fn = new TFunction();
- fn.setSignature(signatureString());
- fn.setName(name_.toThrift());
- fn.setBinary_type(binaryType_);
- if (location_ != null) fn.setHdfs_location(location_.toString());
- fn.setArg_types(Type.toThrift(argTypes_));
- fn.setRet_type(getReturnType().toThrift());
- fn.setHas_var_args(hasVarArgs_);
- fn.setIs_persistent(isPersistent_);
- // TODO: Comment field is missing?
- // fn.setComment(comment_)
- return fn;
- }
-
- public static Function fromThrift(TFunction fn) {
- List<Type> argTypes = Lists.newArrayList();
- for (TColumnType t: fn.getArg_types()) {
- argTypes.add(Type.fromThrift(t));
- }
-
- Function function = null;
- if (fn.isSetScalar_fn()) {
- TScalarFunction scalarFn = fn.getScalar_fn();
- function = new ScalarFunction(FunctionName.fromThrift(fn.getName()), argTypes,
- Type.fromThrift(fn.getRet_type()), new HdfsUri(fn.getHdfs_location()),
- scalarFn.getSymbol(), scalarFn.getPrepare_fn_symbol(),
- scalarFn.getClose_fn_symbol());
- } else if (fn.isSetAggregate_fn()) {
- TAggregateFunction aggFn = fn.getAggregate_fn();
- function = new AggregateFunction(FunctionName.fromThrift(fn.getName()), argTypes,
- Type.fromThrift(fn.getRet_type()),
- Type.fromThrift(aggFn.getIntermediate_type()),
- new HdfsUri(fn.getHdfs_location()), aggFn.getUpdate_fn_symbol(),
- aggFn.getInit_fn_symbol(), aggFn.getSerialize_fn_symbol(),
- aggFn.getMerge_fn_symbol(), aggFn.getGet_value_fn_symbol(),
- null, aggFn.getFinalize_fn_symbol());
- } else {
- // In the case where we are trying to look up the object, we only have the
- // signature.
- function = new Function(FunctionName.fromThrift(fn.getName()),
- argTypes, Type.fromThrift(fn.getRet_type()), fn.isHas_var_args());
- }
- function.setBinaryType(fn.getBinary_type());
- function.setHasVarArgs(fn.isHas_var_args());
- if (fn.isSetIs_persistent()) {
- function.setIsPersistent(fn.isIs_persistent());
- } else {
- function.setIsPersistent(false);
- }
- return function;
- }
-
- @Override
- public boolean isLoaded() { return true; }
-
- // Returns the resolved symbol in the binary. The BE will do a lookup of 'symbol'
- // in the binary and try to resolve unmangled names.
- // If this function is expecting a return argument, retArgType is that type. It should
- // be null if this function isn't expecting a return argument.
- public String lookupSymbol(String symbol, TSymbolType symbolType, Type retArgType,
- boolean hasVarArgs, Type... argTypes) throws AnalysisException {
- if (symbol.length() == 0) {
- if (binaryType_ == TFunctionBinaryType.BUILTIN) {
- // We allow empty builtin symbols in order to stage work in the FE before its
- // implemented in the BE
- return symbol;
- }
- throw new AnalysisException("Could not find symbol ''");
- }
-
- TSymbolLookupParams lookup = new TSymbolLookupParams();
- // Builtin functions do not have an external library, they are loaded directly from
- // the running process
- lookup.location = binaryType_ != TFunctionBinaryType.BUILTIN ?
- location_.toString() : "";
- lookup.symbol = symbol;
- lookup.symbol_type = symbolType;
- lookup.fn_binary_type = binaryType_;
- lookup.arg_types = Type.toThrift(argTypes);
- lookup.has_var_args = hasVarArgs;
- if (retArgType != null) lookup.setRet_arg_type(retArgType.toThrift());
-
- try {
- TSymbolLookupResult result = FeSupport.LookupSymbol(lookup);
- switch (result.result_code) {
- case SYMBOL_FOUND:
- return result.symbol;
- case BINARY_NOT_FOUND:
- Preconditions.checkState(binaryType_ != TFunctionBinaryType.BUILTIN);
- throw new AnalysisException(
- "Could not load binary: " + location_.getLocation() + "\n" +
- result.error_msg);
- case SYMBOL_NOT_FOUND:
- throw new AnalysisException(result.error_msg);
- default:
- // Should never get here.
- throw new AnalysisException("Internal Error");
- }
- } catch (InternalException e) {
- // Should never get here.
- e.printStackTrace();
- throw new AnalysisException("Could not find symbol: " + symbol, e);
- }
- }
-
- public String lookupSymbol(String symbol, TSymbolType symbolType)
- throws AnalysisException {
- Preconditions.checkState(
- symbolType == TSymbolType.UDF_PREPARE || symbolType == TSymbolType.UDF_CLOSE);
- return lookupSymbol(symbol, symbolType, null, false);
- }
-
- public static String getUdfType(Type t) {
- switch (t.getPrimitiveType()) {
- case BOOLEAN:
- return "BooleanVal";
- case TINYINT:
- return "TinyIntVal";
- case SMALLINT:
- return "SmallIntVal";
- case INT:
- return "IntVal";
- case BIGINT:
- return "BigIntVal";
- case FLOAT:
- return "FloatVal";
- case DOUBLE:
- return "DoubleVal";
- case STRING:
- case VARCHAR:
- case CHAR:
- return "StringVal";
- case TIMESTAMP:
- return "TimestampVal";
- case DECIMAL:
- return "DecimalVal";
- default:
- Preconditions.checkState(false, t.toString());
- return "";
- }
- }
-
- /**
- * Returns true if the given function matches the specified category.
- */
- public static boolean categoryMatch(Function fn, TFunctionCategory category) {
- Preconditions.checkNotNull(category);
- return (category == TFunctionCategory.SCALAR && fn instanceof ScalarFunction)
- || (category == TFunctionCategory.AGGREGATE
- && fn instanceof AggregateFunction
- && ((AggregateFunction)fn).isAggregateFn())
- || (category == TFunctionCategory.ANALYTIC
- && fn instanceof AggregateFunction
- && ((AggregateFunction)fn).isAnalyticFn());
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java b/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java
deleted file mode 100644
index 37fa853..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseColumn.java
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import com.cloudera.impala.thrift.TColumn;
-
-// Describes an HBase column mapped to a Hive column (as described in the metastore).
-// this.name describes the column name in Hive.
-// This class adds the HBase columnFamily and columnQualifier,
-// so we can read the column from HBase directly.
-public class HBaseColumn extends Column implements Comparable<HBaseColumn> {
- private final String columnFamily_;
- private final String columnQualifier_;
- private final boolean binaryEncoded_;
-
- public HBaseColumn(String name, String columnFamily, String columnQualifier,
- boolean binaryEncoded, Type type, String comment, int position) {
- super(name, type, comment, position);
- columnFamily_ = columnFamily;
- columnQualifier_ = columnQualifier;
- binaryEncoded_ = binaryEncoded;
- }
-
- public String getColumnFamily() { return columnFamily_; }
- public String getColumnQualifier() { return columnQualifier_; }
- public boolean isBinaryEncoded() { return binaryEncoded_; }
-
- @Override
- // We order the HBase columns in the matadata based on columnFamily,columnQualifier,
- // to more easily map slots from HBase's Result.raw() to target slots in the backend.
- public int compareTo(HBaseColumn o) {
- int familyCmp = columnFamily_.compareTo(o.columnFamily_);
- if (familyCmp != 0) {
- return familyCmp;
- }
- int qualifierCmp = columnQualifier_.compareTo(o.columnQualifier_);
- return qualifierCmp;
- }
-
- @Override
- public TColumn toThrift() {
- TColumn colDesc = new TColumn(name_, type_.toThrift());
- if (comment_ != null) colDesc.setComment(comment_);
- colDesc.setCol_stats(getStats().toThrift());
- colDesc.setPosition(position_);
- colDesc.setIs_hbase_column(true);
- colDesc.setColumn_family(columnFamily_);
- colDesc.setColumn_qualifier(columnQualifier_);
- colDesc.setIs_binary(binaryEncoded_);
- return colDesc;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java b/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
deleted file mode 100644
index d96314e..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HBaseTable.java
+++ /dev/null
@@ -1,853 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.ServerLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hive.hbase.HBaseSerDe;
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.log4j.Logger;
-
-import com.cloudera.impala.common.Pair;
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.TColumn;
-import com.cloudera.impala.thrift.THBaseTable;
-import com.cloudera.impala.thrift.TResultSet;
-import com.cloudera.impala.thrift.TResultSetMetadata;
-import com.cloudera.impala.thrift.TTable;
-import com.cloudera.impala.thrift.TTableDescriptor;
-import com.cloudera.impala.thrift.TTableType;
-import com.cloudera.impala.util.StatsHelper;
-import com.cloudera.impala.util.TResultRowBuilder;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-/**
- * Impala representation of HBase table metadata,
- * as loaded from Hive's metastore.
- * This implies that we inherit the metastore's limitations related to HBase,
- * for example the lack of support for composite HBase row keys.
- * We sort the HBase columns (cols) by family/qualifier
- * to simplify the retrieval logic in the backend, since
- * HBase returns data ordered by family/qualifier.
- * This implies that a "select *"-query on an HBase table
- * will not have the columns ordered as they were declared in the DDL.
- * They will be ordered by family/qualifier.
- *
- */
-public class HBaseTable extends Table {
- // Maximum deviation from the average to stop querying more regions
- // to estimate the row count
- private static final double DELTA_FROM_AVERAGE = 0.15;
-
- private static final Logger LOG = Logger.getLogger(HBaseTable.class);
-
- // Copied from Hive's HBaseStorageHandler.java.
- public static final String DEFAULT_PREFIX = "default.";
-
- // Number of rows fetched during the row count estimation per region
- public static final int ROW_COUNT_ESTIMATE_BATCH_SIZE = 10;
-
- // Minimum number of regions that are checked to estimate the row count
- private static final int MIN_NUM_REGIONS_TO_CHECK = 5;
-
- // Column referring to HBase row key.
- // Hive (including metastore) currently doesn't support composite HBase keys.
- protected HBaseColumn rowKey_;
-
- // Name of table in HBase.
- // 'this.name' is the alias of the HBase table in Hive.
- protected String hbaseTableName_;
-
- // Input format class for HBase tables read by Hive.
- private static final String HBASE_INPUT_FORMAT =
- "org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat";
-
- // Serialization class for HBase tables set in the corresponding Metastore table.
- private static final String HBASE_SERIALIZATION_LIB =
- "org.apache.hadoop.hive.hbase.HBaseSerDe";
-
- // Storage handler class for HBase tables read by Hive.
- private static final String HBASE_STORAGE_HANDLER =
- "org.apache.hadoop.hive.hbase.HBaseStorageHandler";
-
- // Column family of HBase row key
- private static final String ROW_KEY_COLUMN_FAMILY = ":key";
-
- // Keep the conf around
- private final static Configuration hbaseConf_ = HBaseConfiguration.create();
-
- // Cached column families. Used primarily for speeding up row stats estimation
- // (see CDH-19292).
- private HColumnDescriptor[] columnFamilies_ = null;
-
- protected HBaseTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTbl,
- Db db, String name, String owner) {
- super(id, msTbl, db, name, owner);
- }
-
- /**
- * Connection instances are expensive to create. The HBase documentation recommends
- * one and then sharing it among threads. All operations on a connection are
- * thread-safe.
- */
- private static class ConnectionHolder {
- private static Connection connection_ = null;
-
- public static synchronized Connection getConnection(Configuration conf)
- throws IOException {
- if (connection_ == null || connection_.isClosed()) {
- connection_ = ConnectionFactory.createConnection(conf);
- }
- return connection_;
- }
- }
-
- /**
- * Table client objects are thread-unsafe and cheap to create. The HBase docs recommend
- * creating a new one for each task and then closing when done.
- */
- public org.apache.hadoop.hbase.client.Table getHBaseTable() throws IOException {
- return ConnectionHolder.getConnection(hbaseConf_)
- .getTable(TableName.valueOf(hbaseTableName_));
- }
-
- private void closeHBaseTable(org.apache.hadoop.hbase.client.Table table) {
- try {
- table.close();
- } catch (IOException e) {
- LOG.error("Error closing HBase table: " + hbaseTableName_, e);
- }
- }
-
- /**
- * Get the cluster status, making sure we close the admin client afterwards.
- */
- public ClusterStatus getClusterStatus() throws IOException {
- Admin admin = null;
- ClusterStatus clusterStatus = null;
- try {
- Connection connection = ConnectionHolder.getConnection(hbaseConf_);
- admin = connection.getAdmin();
- clusterStatus = admin.getClusterStatus();
- } finally {
- if (admin != null) admin.close();
- }
- return clusterStatus;
- }
-
- /**
- * Parse the column description string to the column families and column
- * qualifies. This is a copy of HBaseSerDe.parseColumnMapping and
- * parseColumnStorageTypes with parts we don't use removed. The hive functions
- * are not public.
-
- * tableDefaultStorageIsBinary - true if table is default to binary encoding
- * columnsMappingSpec - input string format describing the table
- * fieldSchemas - input field schema from metastore table
- * columnFamilies/columnQualifiers/columnBinaryEncodings - out parameters that will be
- * filled with the column family, column qualifier and encoding for each column.
- */
- private void parseColumnMapping(boolean tableDefaultStorageIsBinary,
- String columnsMappingSpec, List<FieldSchema> fieldSchemas,
- List<String> columnFamilies, List<String> columnQualifiers,
- List<Boolean> colIsBinaryEncoded) throws SerDeException {
- if (columnsMappingSpec == null) {
- throw new SerDeException(
- "Error: hbase.columns.mapping missing for this HBase table.");
- }
-
- if (columnsMappingSpec.equals("") ||
- columnsMappingSpec.equals(HBaseSerDe.HBASE_KEY_COL)) {
- throw new SerDeException("Error: hbase.columns.mapping specifies only "
- + "the HBase table row key. A valid Hive-HBase table must specify at "
- + "least one additional column.");
- }
-
- int rowKeyIndex = -1;
- String[] columnSpecs = columnsMappingSpec.split(",");
- // If there was an implicit key column mapping, the number of columns (fieldSchemas)
- // will be one more than the number of column mapping specs.
- int fsStartIdxOffset = fieldSchemas.size() - columnSpecs.length;
- if (fsStartIdxOffset != 0 && fsStartIdxOffset != 1) {
- // This should never happen - Hive blocks creating a mismatched table and both Hive
- // and Impala currently block all column-level DDL on HBase tables.
- throw new SerDeException(String.format("Number of entries in " +
- "'hbase.columns.mapping' does not match the number of columns in the " +
- "table: %d != %d (counting the key if implicit)",
- columnSpecs.length, fieldSchemas.size()));
- }
-
- for (int i = 0; i < columnSpecs.length; ++i) {
- String mappingSpec = columnSpecs[i];
- String[] mapInfo = mappingSpec.split("#");
- // Trim column info so that serdeproperties with new lines still parse correctly.
- String colInfo = mapInfo[0].trim();
-
- int idxFirst = colInfo.indexOf(":");
- int idxLast = colInfo.lastIndexOf(":");
-
- if (idxFirst < 0 || !(idxFirst == idxLast)) {
- throw new SerDeException("Error: the HBase columns mapping contains a "
- + "badly formed column family, column qualifier specification.");
- }
-
- if (colInfo.equals(HBaseSerDe.HBASE_KEY_COL)) {
- Preconditions.checkState(fsStartIdxOffset == 0);
- rowKeyIndex = i;
- columnFamilies.add(colInfo);
- columnQualifiers.add(null);
- } else {
- String[] parts = colInfo.split(":");
- Preconditions.checkState(parts.length > 0 && parts.length <= 2);
- columnFamilies.add(parts[0]);
- if (parts.length == 2) {
- columnQualifiers.add(parts[1]);
- } else {
- columnQualifiers.add(null);
- }
- }
-
- // Set column binary encoding
- FieldSchema fieldSchema = fieldSchemas.get(i + fsStartIdxOffset);
- boolean supportsBinaryEncoding = supportsBinaryEncoding(fieldSchema);
- if (mapInfo.length == 1) {
- // There is no column level storage specification. Use the table storage spec.
- colIsBinaryEncoded.add(
- new Boolean(tableDefaultStorageIsBinary && supportsBinaryEncoding));
- } else if (mapInfo.length == 2) {
- // There is a storage specification for the column
- String storageOption = mapInfo[1];
-
- if (!(storageOption.equals("-") || "string".startsWith(storageOption) || "binary"
- .startsWith(storageOption))) {
- throw new SerDeException("Error: A column storage specification is one of"
- + " the following: '-', a prefix of 'string', or a prefix of 'binary'. "
- + storageOption + " is not a valid storage option specification for "
- + fieldSchema.getName());
- }
-
- boolean isBinaryEncoded = false;
- if ("-".equals(storageOption)) {
- isBinaryEncoded = tableDefaultStorageIsBinary;
- } else if ("binary".startsWith(storageOption)) {
- isBinaryEncoded = true;
- }
- if (isBinaryEncoded && !supportsBinaryEncoding) {
- // Use string encoding and log a warning if the column spec is binary but the
- // column type does not support it.
- // TODO: Hive/HBase does not raise an exception, but should we?
- LOG.warn("Column storage specification for column " + fieldSchema.getName()
- + " is binary" + " but the column type " + fieldSchema.getType() +
- " does not support binary encoding. Fallback to string format.");
- isBinaryEncoded = false;
- }
- colIsBinaryEncoded.add(isBinaryEncoded);
- } else {
- // error in storage specification
- throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING
- + " storage specification " + mappingSpec + " is not valid for column: "
- + fieldSchema.getName());
- }
- }
-
- if (rowKeyIndex == -1) {
- columnFamilies.add(0, HBaseSerDe.HBASE_KEY_COL);
- columnQualifiers.add(0, null);
- colIsBinaryEncoded.add(0,
- supportsBinaryEncoding(fieldSchemas.get(0)) && tableDefaultStorageIsBinary);
- }
- }
-
- private boolean supportsBinaryEncoding(FieldSchema fs) {
- try {
- Type colType = parseColumnType(fs);
- // Only boolean, integer and floating point types can use binary storage.
- return colType.isBoolean() || colType.isIntegerType()
- || colType.isFloatingPointType();
- } catch (TableLoadingException e) {
- return false;
- }
- }
-
- @Override
- /**
- * For hbase tables, we can support tables with columns we don't understand at
- * all (e.g. map) as long as the user does not select those. This is in contrast
- * to hdfs tables since we typically need to understand all columns to make sense
- * of the file at all.
- */
- public void load(boolean reuseMetadata, IMetaStoreClient client,
- org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException {
- Preconditions.checkNotNull(getMetaStoreTable());
- try {
- msTable_ = msTbl;
- hbaseTableName_ = getHBaseTableName(getMetaStoreTable());
- // Warm up the connection and verify the table exists.
- getHBaseTable().close();
- columnFamilies_ = null;
- Map<String, String> serdeParams =
- getMetaStoreTable().getSd().getSerdeInfo().getParameters();
- String hbaseColumnsMapping = serdeParams.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
- if (hbaseColumnsMapping == null) {
- throw new MetaException("No hbase.columns.mapping defined in Serde.");
- }
-
- String hbaseTableDefaultStorageType = getMetaStoreTable().getParameters().get(
- HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
- boolean tableDefaultStorageIsBinary = false;
- if (hbaseTableDefaultStorageType != null &&
- !hbaseTableDefaultStorageType.isEmpty()) {
- if (hbaseTableDefaultStorageType.equalsIgnoreCase("binary")) {
- tableDefaultStorageIsBinary = true;
- } else if (!hbaseTableDefaultStorageType.equalsIgnoreCase("string")) {
- throw new SerDeException("Error: " +
- HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE +
- " parameter must be specified as" +
- " 'string' or 'binary'; '" + hbaseTableDefaultStorageType +
- "' is not a valid specification for this table/serde property.");
- }
- }
-
- // Parse HBase column-mapping string.
- List<FieldSchema> fieldSchemas = getMetaStoreTable().getSd().getCols();
- List<String> hbaseColumnFamilies = new ArrayList<String>();
- List<String> hbaseColumnQualifiers = new ArrayList<String>();
- List<Boolean> hbaseColumnBinaryEncodings = new ArrayList<Boolean>();
- parseColumnMapping(tableDefaultStorageIsBinary, hbaseColumnsMapping, fieldSchemas,
- hbaseColumnFamilies, hbaseColumnQualifiers, hbaseColumnBinaryEncodings);
- Preconditions.checkState(
- hbaseColumnFamilies.size() == hbaseColumnQualifiers.size());
- Preconditions.checkState(fieldSchemas.size() == hbaseColumnFamilies.size());
-
- // Populate tmp cols in the order they appear in the Hive metastore.
- // We will reorder the cols below.
- List<HBaseColumn> tmpCols = Lists.newArrayList();
- // Store the key column separately.
- // TODO: Change this to an ArrayList once we support composite row keys.
- HBaseColumn keyCol = null;
- for (int i = 0; i < fieldSchemas.size(); ++i) {
- FieldSchema s = fieldSchemas.get(i);
- Type t = Type.INVALID;
- try {
- t = parseColumnType(s);
- } catch (TableLoadingException e) {
- // Ignore hbase types we don't support yet. We can load the metadata
- // but won't be able to select from it.
- }
- HBaseColumn col = new HBaseColumn(s.getName(), hbaseColumnFamilies.get(i),
- hbaseColumnQualifiers.get(i), hbaseColumnBinaryEncodings.get(i),
- t, s.getComment(), -1);
- if (col.getColumnFamily().equals(ROW_KEY_COLUMN_FAMILY)) {
- // Store the row key column separately from the rest
- keyCol = col;
- } else {
- tmpCols.add(col);
- }
- }
- Preconditions.checkState(keyCol != null);
-
- // The backend assumes that the row key column is always first and
- // that the remaining HBase columns are ordered by columnFamily,columnQualifier,
- // so the final position depends on the other mapped HBase columns.
- // Sort columns and update positions.
- Collections.sort(tmpCols);
- clearColumns();
-
- keyCol.setPosition(0);
- addColumn(keyCol);
- // Update the positions of the remaining columns
- for (int i = 0; i < tmpCols.size(); ++i) {
- HBaseColumn col = tmpCols.get(i);
- col.setPosition(i + 1);
- addColumn(col);
- }
-
- // Set table stats.
- numRows_ = getRowCount(super.getMetaStoreTable().getParameters());
-
- // since we don't support composite hbase rowkeys yet, all hbase tables have a
- // single clustering col
- numClusteringCols_ = 1;
- loadAllColumnStats(client);
- } catch (Exception e) {
- throw new TableLoadingException("Failed to load metadata for HBase table: " +
- name_, e);
- }
- }
-
- @Override
- protected void loadFromThrift(TTable table) throws TableLoadingException {
- super.loadFromThrift(table);
- try {
- hbaseTableName_ = getHBaseTableName(getMetaStoreTable());
- // Warm up the connection and verify the table exists.
- getHBaseTable().close();
- columnFamilies_ = null;
- } catch (Exception e) {
- throw new TableLoadingException("Failed to load metadata for HBase table from " +
- "thrift table: " + name_, e);
- }
- }
-
- /**
- * This method is completely copied from Hive's HBaseStorageHandler.java.
- */
- private String getHBaseTableName(org.apache.hadoop.hive.metastore.api.Table tbl) {
- // Give preference to TBLPROPERTIES over SERDEPROPERTIES
- // (really we should only use TBLPROPERTIES, so this is just
- // for backwards compatibility with the original specs).
- String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME);
- if (tableName == null) {
- tableName = tbl.getSd().getSerdeInfo().getParameters().get(
- HBaseSerDe.HBASE_TABLE_NAME);
- }
- if (tableName == null) {
- tableName = tbl.getDbName() + "." + tbl.getTableName();
- if (tableName.startsWith(DEFAULT_PREFIX)) {
- tableName = tableName.substring(DEFAULT_PREFIX.length());
- }
- }
- return tableName;
- }
-
- /**
- * Estimates the number of rows for a single region and returns a pair with
- * the estimated row count and the estimated size in bytes per row.
- */
- private Pair<Long, Long> getEstimatedRowStatsForRegion(HRegionLocation location,
- boolean isCompressed, ClusterStatus clusterStatus) throws IOException {
- HRegionInfo info = location.getRegionInfo();
-
- Scan s = new Scan(info.getStartKey());
- // Get a small sample of rows
- s.setBatch(ROW_COUNT_ESTIMATE_BATCH_SIZE);
- // Try and get every version so the row's size can be used to estimate.
- s.setMaxVersions(Short.MAX_VALUE);
- // Don't cache the blocks as we don't think these are
- // necessarily important blocks.
- s.setCacheBlocks(false);
- // Try and get deletes too so their size can be counted.
- s.setRaw(false);
-
- org.apache.hadoop.hbase.client.Table table = getHBaseTable();
- ResultScanner rs = table.getScanner(s);
-
- long currentRowSize = 0;
- long currentRowCount = 0;
-
- try {
- // Get the the ROW_COUNT_ESTIMATE_BATCH_SIZE fetched rows
- // for a representative sample
- for (int i = 0; i < ROW_COUNT_ESTIMATE_BATCH_SIZE; ++i) {
- Result r = rs.next();
- if (r == null)
- break;
- // Check for empty rows, see IMPALA-1451
- if (r.isEmpty())
- continue;
- ++currentRowCount;
- // To estimate the number of rows we simply use the amount of bytes
- // returned from the underlying buffer. Since HBase internally works
- // with these structures as well this gives us ok estimates.
- Cell[] cells = r.rawCells();
- for (Cell c : cells) {
- if (c instanceof KeyValue) {
- currentRowSize += KeyValue.getKeyValueDataStructureSize(c.getRowLength(),
- c.getFamilyLength(), c.getQualifierLength(), c.getValueLength(),
- c.getTagsLength());
- } else {
- throw new IllegalStateException("Celltype " + c.getClass().getName() +
- " not supported.");
- }
- }
- }
- } finally {
- rs.close();
- closeHBaseTable(table);
- }
-
- // If there are no rows then no need to estimate.
- if (currentRowCount == 0) return new Pair<Long, Long>(0L, 0L);
- // Get the size.
- long currentSize = getRegionSize(location, clusterStatus);
- // estimate the number of rows.
- double bytesPerRow = currentRowSize / (double) currentRowCount;
- if (currentSize == 0) {
- return new Pair<Long, Long>(currentRowCount, (long) bytesPerRow);
- }
-
- // Compression factor two is only a best effort guess
- long estimatedRowCount =
- (long) ((isCompressed ? 2 : 1) * (currentSize / bytesPerRow));
-
- return new Pair<Long, Long>(estimatedRowCount, (long) bytesPerRow);
- }
-
- /**
- * Get an estimate of the number of rows and bytes per row in regions between
- * startRowKey and endRowKey.
- *
- * This number is calculated by incrementally checking as many region servers as
- * necessary until we observe a relatively constant row size per region on average.
- * Depending on the skew of data in the regions this can either mean that we need
- * to check only a minimal number of regions or that we will scan all regions.
- *
- * The HBase region servers periodically update the master with their metrics,
- * including storefile size. We get the size of the storefiles for all regions in
- * the cluster with a single call to getClusterStatus from the master.
- *
- * The accuracy of this number is determined by the number of rows that are written
- * and kept in the memstore and have not been flushed until now. A large number
- * of key-value pairs in the memstore will lead to bad estimates as this number
- * is not reflected in the storefile size that is used to estimate this number.
- *
- * Currently, the algorithm does not consider the case that the key range used as a
- * parameter might be generally of different size than the rest of the region.
- *
- * The values computed here should be cached so that in high qps workloads
- * the nn is not overwhelmed. Could be done in load(); Synchronized to make
- * sure that only one thread at a time is using the htable.
- *
- * @param startRowKey
- * First row key in the range
- * @param endRowKey
- * Last row key in the range
- * @return The estimated number of rows in the regions between the row keys (first) and
- * the estimated row size in bytes (second).
- */
- public synchronized Pair<Long, Long> getEstimatedRowStats(byte[] startRowKey,
- byte[] endRowKey) {
- Preconditions.checkNotNull(startRowKey);
- Preconditions.checkNotNull(endRowKey);
-
- boolean isCompressed = false;
- long rowCount = 0;
- long rowSize = 0;
-
- org.apache.hadoop.hbase.client.Table table = null;
- try {
- table = getHBaseTable();
- ClusterStatus clusterStatus = getClusterStatus();
-
- // Check to see if things are compressed.
- // If they are we'll estimate a compression factor.
- if (columnFamilies_ == null) {
- columnFamilies_ = table.getTableDescriptor().getColumnFamilies();
- }
- Preconditions.checkNotNull(columnFamilies_);
- for (HColumnDescriptor desc : columnFamilies_) {
- isCompressed |= desc.getCompression() != Compression.Algorithm.NONE;
- }
-
- // Fetch all regions for the key range
- List<HRegionLocation> locations = getRegionsInRange(table, startRowKey, endRowKey);
- Collections.shuffle(locations);
- // The following variables track the number and size of 'rows' in
- // HBase and allow incremental calculation of the average and standard
- // deviation.
- StatsHelper<Long> statsSize = new StatsHelper<Long>();
- long totalEstimatedRows = 0;
-
- // Collects stats samples from at least MIN_NUM_REGIONS_TO_CHECK
- // and at most all regions until the delta is small enough.
- while ((statsSize.count() < MIN_NUM_REGIONS_TO_CHECK ||
- statsSize.stddev() > statsSize.mean() * DELTA_FROM_AVERAGE) &&
- statsSize.count() < locations.size()) {
- HRegionLocation currentLocation = locations.get((int) statsSize.count());
- Pair<Long, Long> tmp = getEstimatedRowStatsForRegion(currentLocation,
- isCompressed, clusterStatus);
- totalEstimatedRows += tmp.first;
- statsSize.addSample(tmp.second);
- }
-
- // Sum up the total size for all regions in range.
- long totalSize = 0;
- for (final HRegionLocation location : locations) {
- totalSize += getRegionSize(location, clusterStatus);
- }
- if (totalSize == 0) {
- rowCount = totalEstimatedRows;
- } else {
- rowCount = (long) (totalSize / statsSize.mean());
- }
- rowSize = (long) statsSize.mean();
- } catch (IOException ioe) {
- // Print the stack trace, but we'll ignore it
- // as this is just an estimate.
- // TODO: Put this into the per query log.
- LOG.error("Error computing HBase row count estimate", ioe);
- return new Pair<Long, Long>(-1l, -1l);
- } finally {
- if (table != null) closeHBaseTable(table);
- }
- return new Pair<Long, Long>(rowCount, rowSize);
- }
-
- /**
- * Returns the size of the given region in bytes. Simply returns the storefile size
- * for this region from the ClusterStatus. Returns 0 in case of an error.
- */
- public long getRegionSize(HRegionLocation location, ClusterStatus clusterStatus) {
- HRegionInfo info = location.getRegionInfo();
- ServerLoad serverLoad = clusterStatus.getLoad(location.getServerName());
-
- // If the serverLoad is null, the master doesn't have information for this region's
- // server. This shouldn't normally happen.
- if (serverLoad == null) {
- LOG.error("Unable to find load for server: " + location.getServerName() +
- " for location " + info.getRegionNameAsString());
- return 0;
- }
- RegionLoad regionLoad = serverLoad.getRegionsLoad().get(info.getRegionName());
-
- final long megaByte = 1024L * 1024L;
- return regionLoad.getStorefileSizeMB() * megaByte;
- }
-
- /**
- * Hive returns the columns in order of their declaration for HBase tables.
- */
- @Override
- public ArrayList<Column> getColumnsInHiveOrder() {
- return getColumns();
- }
-
- @Override
- public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) {
- TTableDescriptor tableDescriptor =
- new TTableDescriptor(id_.asInt(), TTableType.HBASE_TABLE,
- getTColumnDescriptors(), numClusteringCols_, hbaseTableName_, db_.getName());
- tableDescriptor.setHbaseTable(getTHBaseTable());
- return tableDescriptor;
- }
-
- public String getHBaseTableName() {
- return hbaseTableName_;
- }
-
- public static Configuration getHBaseConf() {
- return hbaseConf_;
- }
-
- public int getNumNodes() {
- // TODO: implement
- return 100;
- }
-
- @Override
- public TCatalogObjectType getCatalogObjectType() {
- return TCatalogObjectType.TABLE;
- }
-
- @Override
- public TTable toThrift() {
- TTable table = super.toThrift();
- table.setTable_type(TTableType.HBASE_TABLE);
- table.setHbase_table(getTHBaseTable());
- return table;
- }
-
- private THBaseTable getTHBaseTable() {
- THBaseTable tHbaseTable = new THBaseTable();
- tHbaseTable.setTableName(hbaseTableName_);
- for (Column c : getColumns()) {
- HBaseColumn hbaseCol = (HBaseColumn) c;
- tHbaseTable.addToFamilies(hbaseCol.getColumnFamily());
- if (hbaseCol.getColumnQualifier() != null) {
- tHbaseTable.addToQualifiers(hbaseCol.getColumnQualifier());
- } else {
- tHbaseTable.addToQualifiers("");
- }
- tHbaseTable.addToBinary_encoded(hbaseCol.isBinaryEncoded());
- }
- return tHbaseTable;
- }
-
- /**
- * This is copied from org.apache.hadoop.hbase.client.HTable. The only difference is
- * that it does not use cache when calling getRegionLocation.
- * TODO: Remove this function and use HTable.getRegionsInRange when the non-cache
- * version has been ported to CDH (DISTRO-477).
- * Get the corresponding regions for an arbitrary range of keys.
- * <p>
- *
- * @param startRow
- * Starting row in range, inclusive
- * @param endRow
- * Ending row in range, exclusive
- * @return A list of HRegionLocations corresponding to the regions that
- * contain the specified range
- * @throws IOException
- * if a remote or network exception occurs
- */
- public static List<HRegionLocation> getRegionsInRange(
- org.apache.hadoop.hbase.client.Table hbaseTbl,
- final byte[] startKey, final byte[] endKey) throws IOException {
- final boolean endKeyIsEndOfTable = Bytes.equals(endKey, HConstants.EMPTY_END_ROW);
- if ((Bytes.compareTo(startKey, endKey) > 0) && !endKeyIsEndOfTable) {
- throw new IllegalArgumentException("Invalid range: " +
- Bytes.toStringBinary(startKey) + " > " + Bytes.toStringBinary(endKey));
- }
- final List<HRegionLocation> regionList = new ArrayList<HRegionLocation>();
- byte[] currentKey = startKey;
- Connection connection = ConnectionHolder.getConnection(hbaseConf_);
- // Make sure only one thread is accessing the hbaseTbl.
- synchronized (hbaseTbl) {
- RegionLocator locator = connection.getRegionLocator(hbaseTbl.getName());
- do {
- // always reload region location info.
- HRegionLocation regionLocation = locator.getRegionLocation(currentKey, true);
- regionList.add(regionLocation);
- currentKey = regionLocation.getRegionInfo().getEndKey();
- } while (!Bytes.equals(currentKey, HConstants.EMPTY_END_ROW) &&
- (endKeyIsEndOfTable || Bytes.compareTo(currentKey, endKey) < 0));
- }
- return regionList;
- }
-
- /**
- * Returns the storage handler class for HBase tables read by Hive.
- */
- @Override
- public String getStorageHandlerClassName() {
- return HBASE_STORAGE_HANDLER;
- }
-
- /**
- * Returns statistics on this table as a tabular result set. Used for the
- * SHOW TABLE STATS statement. The schema of the returned TResultSet is set
- * inside this method.
- */
- public TResultSet getTableStats() {
- TResultSet result = new TResultSet();
- TResultSetMetadata resultSchema = new TResultSetMetadata();
- result.setSchema(resultSchema);
- resultSchema.addToColumns(
- new TColumn("Region Location", Type.STRING.toThrift()));
- resultSchema.addToColumns(new TColumn("Start RowKey",
- Type.STRING.toThrift()));
- resultSchema.addToColumns(new TColumn("Est. #Rows", Type.BIGINT.toThrift()));
- resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift()));
-
- org.apache.hadoop.hbase.client.Table table;
- try {
- table = getHBaseTable();
- } catch (IOException e) {
- LOG.error("Error getting HBase table " + hbaseTableName_, e);
- throw new RuntimeException(e);
- }
-
- // TODO: Consider fancier stats maintenance techniques for speeding up this process.
- // Currently, we list all regions and perform a mini-scan of each of them to
- // estimate the number of rows, the data size, etc., which is rather expensive.
- try {
- ClusterStatus clusterStatus = getClusterStatus();
- long totalNumRows = 0;
- long totalSize = 0;
- List<HRegionLocation> regions = HBaseTable.getRegionsInRange(table,
- HConstants.EMPTY_END_ROW, HConstants.EMPTY_START_ROW);
- for (HRegionLocation region : regions) {
- TResultRowBuilder rowBuilder = new TResultRowBuilder();
- HRegionInfo regionInfo = region.getRegionInfo();
- Pair<Long, Long> estRowStats =
- getEstimatedRowStatsForRegion(region, false, clusterStatus);
-
- long numRows = estRowStats.first.longValue();
- long regionSize = getRegionSize(region, clusterStatus);
- totalNumRows += numRows;
- totalSize += regionSize;
-
- // Add the region location, start rowkey, number of rows and raw size.
- rowBuilder.add(String.valueOf(region.getHostname()))
- .add(Bytes.toString(regionInfo.getStartKey())).add(numRows)
- .addBytes(regionSize);
- result.addToRows(rowBuilder.get());
- }
-
- // Total num rows and raw region size.
- if (regions.size() > 1) {
- TResultRowBuilder rowBuilder = new TResultRowBuilder();
- rowBuilder.add("Total").add("").add(totalNumRows).addBytes(totalSize);
- result.addToRows(rowBuilder.get());
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
- } finally {
- closeHBaseTable(table);
- }
- return result;
- }
-
- /**
- * Returns true if the given Metastore Table represents an HBase table.
- * Versions of Hive/HBase are inconsistent which HBase related fields are set
- * (e.g., HIVE-6548 changed the input format to null).
- * For maximum compatibility consider all known fields that indicate an HBase table.
- */
- public static boolean isHBaseTable(
- org.apache.hadoop.hive.metastore.api.Table msTbl) {
- if (msTbl.getParameters() != null &&
- msTbl.getParameters().containsKey(HBASE_STORAGE_HANDLER)) {
- return true;
- }
- StorageDescriptor sd = msTbl.getSd();
- if (sd == null) return false;
- if (sd.getInputFormat() != null && sd.getInputFormat().equals(HBASE_INPUT_FORMAT)) {
- return true;
- } else if (sd.getSerdeInfo() != null &&
- sd.getSerdeInfo().getSerializationLib() != null &&
- sd.getSerdeInfo().getSerializationLib().equals(HBASE_SERIALIZATION_LIB)) {
- return true;
- }
- return false;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java
deleted file mode 100644
index b8ff102..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCachePool.java
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-
-import com.cloudera.impala.thrift.TCatalogObjectType;
-import com.cloudera.impala.thrift.THdfsCachePool;
-import com.google.common.base.Preconditions;
-
-/**
- * Represents an HDFS cache pool (CachePoolInfo class). Currently, the only metadata we
- * care about for cache pools is the cache pool name. In the future it may be desirable
- * to track additional metadata such as the owner, size, and current usage of the pool.
- */
-public class HdfsCachePool implements CatalogObject {
- private long catalogVersion_;
- private final THdfsCachePool cachePool_;
-
- public HdfsCachePool(CachePoolInfo cachePoolInfo) {
- cachePool_ = new THdfsCachePool(cachePoolInfo.getPoolName());
- }
-
- public HdfsCachePool(THdfsCachePool cachePool) {
- Preconditions.checkNotNull(cachePool);
- cachePool_ = cachePool;
- }
-
- @Override
- public TCatalogObjectType getCatalogObjectType() {
- return TCatalogObjectType.HDFS_CACHE_POOL;
- }
-
- public THdfsCachePool toThrift() {
- return cachePool_;
- }
-
- public static HdfsCachePool fromThrift(THdfsCachePool cachePool) {
- return new HdfsCachePool(cachePool);
- }
-
- @Override
- public String getName() { return cachePool_.getPool_name(); }
- @Override
- public long getCatalogVersion() { return catalogVersion_; }
- @Override
- public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; }
- @Override
- public boolean isLoaded() { return true; }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java
deleted file mode 100644
index 302ec99..0000000
--- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsCompression.java
+++ /dev/null
@@ -1,85 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package com.cloudera.impala.catalog;
-
-import com.cloudera.impala.thrift.THdfsCompression;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-
-/**
- * Support for recognizing compression suffixes on data files.
- * Compression of a file is recognized in mapreduce by looking for suffixes of
- * supported codecs.
- * For now Impala supports LZO, GZIP, SNAPPY, and BZIP2. LZO can use the specific HIVE
- * input class.
- */
-// TODO: Add LZ4?
-public enum HdfsCompression {
- NONE,
- DEFLATE,
- GZIP,
- BZIP2,
- SNAPPY,
- LZO,
- LZO_INDEX; //Lzo index file.
-
- /* Map from a suffix to a compression type */
- private static final ImmutableMap<String, HdfsCompression> SUFFIX_MAP =
- ImmutableMap.<String, HdfsCompression>builder().
- put("deflate", DEFLATE).
- put("gz", GZIP).
- put("bz2", BZIP2).
- put("snappy", SNAPPY).
- put("lzo", LZO).
- put("index", LZO_INDEX).
- build();
-
- /* Given a file name return its compression type, if any. */
- public static HdfsCompression fromFileName(String fileName) {
- int index = fileName.lastIndexOf(".");
- if (index == -1) {
- return NONE;
- }
-
- String suffix = fileName.substring(index + 1);
- HdfsCompression compression = SUFFIX_MAP.get(suffix.toLowerCase());
- return compression == null ? NONE : compression;
- }
-
- public THdfsCompression toThrift() {
- switch (this) {
- case NONE: return THdfsCompression.NONE;
- case DEFLATE: return THdfsCompression.DEFLATE;
- case GZIP: return THdfsCompression.GZIP;
- case BZIP2: return THdfsCompression.BZIP2;
- case SNAPPY: return THdfsCompression.SNAPPY_BLOCKED;
- case LZO: return THdfsCompression.LZO;
- default: throw new IllegalStateException("Unexpected codec: " + this);
- }
- }
-
- /* Returns a compression type based on (Hive's) intput format. Special case for LZO. */
- public static HdfsCompression fromHdfsInputFormatClass(String inputFormatClass) {
- // TODO: Remove when we have the native LZO writer.
- Preconditions.checkNotNull(inputFormatClass);
- if (inputFormatClass.equals(HdfsFileFormat.LZO_TEXT.inputFormat())) {
- return LZO;
- }
- return NONE;
- }
-}