You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/03/23 21:54:34 UTC
[3/4] impala git commit: IMPALA-4277: Support multiple versions of
Hadoop ecosystem
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/analysis/ParquetHelper.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/analysis/ParquetHelper.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/analysis/ParquetHelper.java
new file mode 100644
index 0000000..8c9bff8
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/analysis/ParquetHelper.java
@@ -0,0 +1,341 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+
+import org.apache.impala.catalog.ArrayType;
+import org.apache.impala.catalog.MapType;
+import org.apache.impala.catalog.ScalarType;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.StructType;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FileSystemUtil;
+
+/**
+ * Provides extractParquetSchema() to extract a schema
+ * from a parquet file.
+ *
+ * Because Parquet's Java package changed between Parquet 1.5
+ * and 1.9, a second copy of this file, with "org.apache.parquet." replaced
+ * with "org.apache.org.apache.parquet." is generated by the build system.
+ */
+class ParquetHelper {
+ private final static String ERROR_MSG =
+ "Failed to convert Parquet type\n%s\nto an Impala %s type:\n%s\n";
+
+ /**
+ * Reads the first block from the given HDFS file and returns the Parquet schema.
+ * Throws Analysis exception for any failure, such as failing to read the file
+ * or failing to parse the contents.
+ */
+ private static org.apache.parquet.schema.MessageType loadParquetSchema(Path pathToFile)
+ throws AnalysisException {
+ try {
+ FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
+ if (!fs.isFile(pathToFile)) {
+ throw new AnalysisException("Cannot infer schema, path is not a file: " +
+ pathToFile);
+ }
+ } catch (IOException e) {
+ throw new AnalysisException("Failed to connect to filesystem:" + e);
+ } catch (IllegalArgumentException e) {
+ throw new AnalysisException(e.getMessage());
+ }
+ ParquetMetadata readFooter = null;
+ try {
+ readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(),
+ pathToFile);
+ } catch (FileNotFoundException e) {
+ throw new AnalysisException("File not found: " + e);
+ } catch (IOException e) {
+ throw new AnalysisException("Failed to open file as a parquet file: " + e);
+ } catch (RuntimeException e) {
+ // Parquet throws a generic RuntimeException when reading a non-parquet file
+ if (e.toString().contains("is not a Parquet file")) {
+ throw new AnalysisException("File is not a parquet file: " + pathToFile);
+ }
+ // otherwise, who knows what we caught, throw it back up
+ throw e;
+ }
+ return readFooter.getFileMetaData().getSchema();
+ }
+
+ /**
+ * Converts a "primitive" Parquet type to an Impala type.
+ * A primitive type is a non-nested type with no annotations.
+ */
+ private static Type convertPrimitiveParquetType(org.apache.parquet.schema.Type parquetType)
+ throws AnalysisException {
+ Preconditions.checkState(parquetType.isPrimitive());
+ PrimitiveType prim = parquetType.asPrimitiveType();
+ switch (prim.getPrimitiveTypeName()) {
+ case BINARY: return Type.STRING;
+ case BOOLEAN: return Type.BOOLEAN;
+ case DOUBLE: return Type.DOUBLE;
+ case FIXED_LEN_BYTE_ARRAY:
+ throw new AnalysisException(
+ "Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field " +
+ parquetType.getName());
+ case FLOAT: return Type.FLOAT;
+ case INT32: return Type.INT;
+ case INT64: return Type.BIGINT;
+ case INT96: return Type.TIMESTAMP;
+ default:
+ Preconditions.checkState(false, "Unexpected parquet primitive type: " +
+ prim.getPrimitiveTypeName());
+ return null;
+ }
+ }
+
+ /**
+ * Converts a Parquet group type to an Impala map Type. We support both standard
+ * Parquet map representations, as well as legacy. Legacy representations are handled
+ * according to this specification:
+ * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#backward-compatibility-rules-1
+ *
+ * Standard representation of a map in Parquet:
+ * <optional | required> group <name> (MAP) { <-- outerGroup is pointing at this
+ * repeated group key_value {
+ * required <key-type> key;
+ * <optional | required> <value-type> value;
+ * }
+ * }
+ */
+ private static MapType convertMap(org.apache.parquet.schema.GroupType outerGroup)
+ throws AnalysisException {
+ if (outerGroup.getFieldCount() != 1){
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The logical MAP type must have exactly 1 inner field."));
+ }
+
+ org.apache.parquet.schema.Type innerField = outerGroup.getType(0);
+ if (!innerField.isRepetition(org.apache.parquet.schema.Type.Repetition.REPEATED)){
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The logical MAP type must have a repeated inner field."));
+ }
+ if (innerField.isPrimitive()) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The inner field of the logical MAP type must be a group."));
+ }
+
+ org.apache.parquet.schema.GroupType innerGroup = innerField.asGroupType();
+ // It does not matter whether innerGroup has an annotation or not (for example it may
+ // be annotated with MAP_KEY_VALUE). We treat the case that innerGroup has an
+ // annotation and the case the innerGroup does not have an annotation the same.
+ if (innerGroup.getFieldCount() != 2) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The inner field of the logical MAP type must have exactly 2 fields."));
+ }
+
+ org.apache.parquet.schema.Type key = innerGroup.getType(0);
+ if (!key.getName().equals("key")) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The name of the first field of the inner field of the logical MAP " +
+ "type must be 'key'"));
+ }
+ if (!key.isPrimitive()) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The key type of the logical MAP type must be primitive."));
+ }
+ org.apache.parquet.schema.Type value = innerGroup.getType(1);
+ if (!value.getName().equals("value")) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "MAP", "The name of the second field of the inner field of the logical MAP " +
+ "type must be 'value'"));
+ }
+
+ return new MapType(convertParquetType(key), convertParquetType(value));
+ }
+
+ /**
+ * Converts a Parquet group type to an Impala struct Type.
+ */
+ private static StructType convertStruct(org.apache.parquet.schema.GroupType outerGroup)
+ throws AnalysisException {
+ ArrayList<StructField> structFields = new ArrayList<StructField>();
+ for (org.apache.parquet.schema.Type field: outerGroup.getFields()) {
+ StructField f = new StructField(field.getName(), convertParquetType(field));
+ structFields.add(f);
+ }
+ return new StructType(structFields);
+ }
+
+ /**
+ * Converts a Parquet group type to an Impala array Type. We can handle the standard
+ * representation, but also legacy representations for backwards compatibility.
+ * Legacy representations are handled according to this specification:
+ * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#backward-compatibility-rules
+ *
+ * Standard representation of an array in Parquet:
+ * <optional | required> group <name> (LIST) { <-- outerGroup is pointing at this
+ * repeated group list {
+ * <optional | required> <element-type> element;
+ * }
+ * }
+ */
+ private static ArrayType convertArray(org.apache.parquet.schema.GroupType outerGroup)
+ throws AnalysisException {
+ if (outerGroup.getFieldCount() != 1) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "LIST", "The logical LIST type must have exactly 1 inner field."));
+ }
+
+ org.apache.parquet.schema.Type innerField = outerGroup.getType(0);
+ if (!innerField.isRepetition(org.apache.parquet.schema.Type.Repetition.REPEATED)) {
+ throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
+ "LIST", "The inner field of the logical LIST type must be repeated."));
+ }
+ if (innerField.isPrimitive() || innerField.getOriginalType() != null) {
+ // From the Parquet Spec:
+ // 1. If the repeated field is not a group then it's type is the element type.
+ //
+ // If innerField is a group, but originalType is not null, the element type is
+ // based on the logical type.
+ return new ArrayType(convertParquetType(innerField));
+ }
+
+ org.apache.parquet.schema.GroupType innerGroup = innerField.asGroupType();
+ if (innerGroup.getFieldCount() != 1) {
+ // From the Parquet Spec:
+ // 2. If the repeated field is a group with multiple fields, then it's type is a
+ // struct.
+ return new ArrayType(convertStruct(innerGroup));
+ }
+
+ return new ArrayType(convertParquetType(innerGroup.getType(0)));
+ }
+
+ /**
+ * Converts a "logical" Parquet type to an Impala column type.
+ * A Parquet type is considered logical when it has an annotation. The annotation is
+ * stored as a "OriginalType". The Parquet documentation refers to these as logical
+ * types, so we use that terminology here.
+ */
+ private static Type convertLogicalParquetType(org.apache.parquet.schema.Type parquetType)
+ throws AnalysisException {
+ OriginalType orig = parquetType.getOriginalType();
+ if (orig == OriginalType.LIST) {
+ return convertArray(parquetType.asGroupType());
+ }
+ if (orig == OriginalType.MAP || orig == OriginalType.MAP_KEY_VALUE) {
+ // MAP_KEY_VALUE annotation should not be used any more. However, according to the
+ // Parquet spec, some existing data incorrectly uses MAP_KEY_VALUE in place of MAP.
+ // For backward-compatibility, a group annotated with MAP_KEY_VALUE that is not
+ // contained by a MAP-annotated group should be handled as a MAP-annotated group.
+ return convertMap(parquetType.asGroupType());
+ }
+
+ PrimitiveType prim = parquetType.asPrimitiveType();
+ if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY &&
+ (orig == OriginalType.UTF8 || orig == OriginalType.ENUM)) {
+ // UTF8 is the type annotation Parquet uses for strings
+ // ENUM is the type annotation Parquet uses to indicate that
+ // the original data type, before conversion to parquet, had been enum.
+ // Applications which do not have enumerated types (e.g. Impala)
+ // should interpret it as a string.
+ // We check to make sure it applies to BINARY to avoid errors if there is a bad
+ // annotation.
+ return Type.STRING;
+ }
+
+ if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32
+ || prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT64) {
+ // Map signed integer types to an supported Impala column type
+ switch (orig) {
+ case INT_8: return Type.TINYINT;
+ case INT_16: return Type.SMALLINT;
+ case INT_32: return Type.INT;
+ case INT_64: return Type.BIGINT;
+ }
+ }
+
+ if (orig == OriginalType.DECIMAL) {
+ return ScalarType.createDecimalType(prim.getDecimalMetadata().getPrecision(),
+ prim.getDecimalMetadata().getScale());
+ }
+
+ throw new AnalysisException(
+ "Unsupported logical parquet type " + orig + " (primitive type is " +
+ prim.getPrimitiveTypeName().name() + ") for field " +
+ parquetType.getName());
+ }
+
+ /**
+ * Converts a Parquet type into an Impala type.
+ */
+ private static Type convertParquetType(org.apache.parquet.schema.Type field)
+ throws AnalysisException {
+ Type type = null;
+ // TODO for 2.3: If a field is not annotated with LIST, it can still be sometimes
+ // interpreted as an array. The following 2 examples should be interpreted as an array
+ // of integers, but this is currently not done.
+ // 1. repeated int int_col;
+ // 2. required group int_arr {
+ // repeated group list {
+ // required int element;
+ // }
+ // }
+ if (field.getOriginalType() != null) {
+ type = convertLogicalParquetType(field);
+ } else if (field.isPrimitive()) {
+ type = convertPrimitiveParquetType(field);
+ } else {
+ // If field is not primitive, it must be a struct.
+ type = convertStruct(field.asGroupType());
+ }
+ return type;
+ }
+
+ /**
+ * Parses a Parquet file stored in HDFS and returns the corresponding Impala schema.
+ * This fails with an analysis exception if any errors occur reading the file,
+ * parsing the Parquet schema, or if the Parquet types cannot be represented in Impala.
+ */
+ static List<ColumnDef> extractParquetSchema(HdfsUri location)
+ throws AnalysisException {
+ org.apache.parquet.schema.MessageType parquetSchema = loadParquetSchema(location.getPath());
+ List<org.apache.parquet.schema.Type> fields = parquetSchema.getFields();
+ List<ColumnDef> schema = new ArrayList<ColumnDef>();
+
+ for (org.apache.parquet.schema.Type field: fields) {
+ Type type = convertParquetType(field);
+ Preconditions.checkNotNull(type);
+ String colName = field.getName();
+ Map<ColumnDef.Option, Object> option = Maps.newHashMap();
+ option.put(ColumnDef.Option.COMMENT, "Inferred from Parquet file.");
+ schema.add(new ColumnDef(colName, new TypeDef(type), option));
+ }
+ return schema;
+ }
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaActionFactory.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaActionFactory.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaActionFactory.java
new file mode 100644
index 0000000..da22386
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaActionFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.impala.authorization;
+
+import org.apache.sentry.core.common.BitFieldAction;
+import org.apache.sentry.core.common.BitFieldActionFactory;
+import org.apache.sentry.core.model.db.AccessConstants;
+
+import java.util.List;
+
+/**
+ * Almost identical to HiveActionFactory, but extended to allow
+ * for "refresh".
+ */
+public class ImpalaActionFactory extends BitFieldActionFactory {
+
+ enum ActionType {
+ SELECT(AccessConstants.SELECT, 1),
+ INSERT(AccessConstants.INSERT, 2),
+ ALTER(AccessConstants.ALTER, 4),
+ CREATE(AccessConstants.CREATE, 8),
+ DROP(AccessConstants.DROP, 16),
+ INDEX(AccessConstants.INDEX, 32),
+ LOCK(AccessConstants.LOCK, 64),
+ // "refresh" isn't available in AccessConstants, so using an Impala constant.
+ REFRESH(Privilege.SentryAction.REFRESH.name(), 128),
+
+ // For the compatibility, ALL, ALL_STAR, SOME have the same binary value;
+ // They have the different names which are "ALL", "*", "+"
+ ALL(AccessConstants.ACTION_ALL, SELECT.getCode() | INSERT.getCode() | ALTER.getCode() | CREATE.getCode() |
+ DROP.getCode() | INDEX.getCode() | LOCK.getCode() | REFRESH.getCode()),
+ ALL_STAR(AccessConstants.ALL, ALL.getCode()),
+ SOME(AccessConstants.SOME, ALL.getCode());
+
+ final private String name;
+ final private int code;
+
+ ActionType(String name, int code) {
+ this.name = name;
+ this.code = code;
+ }
+
+ public int getCode() {
+ return code;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ public List<? extends BitFieldAction> getActionsByCode(int actionCode) {
+ return null;
+ }
+
+ public BitFieldAction getActionByName(String name) {
+ for (ActionType action : ActionType.values()) {
+ if (action.name.equalsIgnoreCase(name)) {
+ return new BitFieldAction(action.getName(), action.getCode());
+ }
+ }
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaPrivilegeModel.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaPrivilegeModel.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaPrivilegeModel.java
new file mode 100644
index 0000000..0fd0a70
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/ImpalaPrivilegeModel.java
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.impala.authorization;
+
+import java.util.Map;
+
+import org.apache.sentry.core.common.BitFieldActionFactory;
+import org.apache.sentry.core.common.ImplyMethodType;
+import org.apache.sentry.core.model.db.HivePrivilegeModel;
+import org.apache.sentry.core.common.Model;
+
+/**
+ * Delegates to HivePrivilegeModel for getImplyMethodMap(), but
+ * adds "refresh" to the BitFieldActionFactory.
+ */
+public class ImpalaPrivilegeModel implements Model {
+ public static final ImpalaPrivilegeModel INSTANCE = new ImpalaPrivilegeModel();
+
+ private ImpalaActionFactory actionFactory = new ImpalaActionFactory();
+
+ @Override
+ public Map<String, ImplyMethodType> getImplyMethodMap() {
+ return HivePrivilegeModel.getInstance().getImplyMethodMap();
+ }
+
+ @Override
+ public BitFieldActionFactory getBitFieldActionFactory() {
+ return actionFactory;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/SentryAuthProvider.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/SentryAuthProvider.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/SentryAuthProvider.java
new file mode 100644
index 0000000..a4f0743
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/authorization/SentryAuthProvider.java
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.authorization;
+
+import com.google.common.base.Preconditions;
+
+import org.apache.impala.catalog.AuthorizationPolicy;
+
+import org.apache.commons.lang.reflect.ConstructorUtils;
+import org.apache.sentry.core.common.Model;
+import org.apache.sentry.core.model.db.HivePrivilegeModel;
+import org.apache.sentry.policy.common.PolicyEngine;
+import org.apache.sentry.policy.engine.common.CommonPolicyEngine;
+import org.apache.sentry.provider.cache.SimpleCacheProviderBackend;
+import org.apache.sentry.provider.common.ProviderBackend;
+import org.apache.sentry.provider.common.ProviderBackendContext;
+import org.apache.sentry.provider.common.ResourceAuthorizationProvider;
+import org.apache.sentry.provider.file.SimpleFileProviderBackend;
+
+/**
+ * Wrapper to facilitate differences in Sentry APIs across Sentry versions.
+ */
+class SentryAuthProvider {
+ /*
+ * Creates a new ResourceAuthorizationProvider based on the given configuration.
+ */
+ static ResourceAuthorizationProvider createProvider(AuthorizationConfig config,
+ AuthorizationPolicy policy) {
+ try {
+ ProviderBackend providerBe;
+ // Create the appropriate backend provider.
+ if (config.isFileBasedPolicy()) {
+ providerBe = new SimpleFileProviderBackend(config.getSentryConfig().getConfig(),
+ config.getPolicyFile());
+ ProviderBackendContext context = new ProviderBackendContext();
+ providerBe.initialize(context);
+ } else {
+ // Note: The second parameter to the ProviderBackend is a "resourceFile" path
+ // which is not used by Impala. We cannot pass 'null' so instead pass an empty
+ // string.
+ providerBe = new SimpleCacheProviderBackend(config.getSentryConfig().getConfig(),
+ "");
+ Preconditions.checkNotNull(policy);
+ ProviderBackendContext context = new ProviderBackendContext();
+ context.setBindingHandle(policy);
+ providerBe.initialize(context);
+ }
+
+ CommonPolicyEngine engine =
+ new CommonPolicyEngine(providerBe);
+
+ // Try to create an instance of the specified policy provider class.
+ // Re-throw any exceptions that are encountered.
+ String policyFile = config.getPolicyFile() == null ? "" : config.getPolicyFile();
+
+ return (ResourceAuthorizationProvider) ConstructorUtils.invokeConstructor(
+ Class.forName(config.getPolicyProviderClassName()),
+ new Object[] {policyFile, engine, ImpalaPrivilegeModel.INSTANCE});
+ } catch (Exception e) {
+ // Re-throw as unchecked exception.
+ throw new IllegalStateException(
+ "Error creating ResourceAuthorizationProvider: ", e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MetastoreShim.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MetastoreShim.java
new file mode 100644
index 0000000..3d69545
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MetastoreShim.java
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.compat;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
+import org.apache.hive.service.rpc.thrift.TGetFunctionsReq;
+import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
+import org.apache.hive.service.rpc.thrift.TGetTablesReq;
+import org.apache.impala.authorization.User;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.Pair;
+import org.apache.impala.service.Frontend;
+import org.apache.impala.service.MetadataOp;
+import org.apache.impala.thrift.TMetadataOpRequest;
+import org.apache.impala.thrift.TResultSet;
+import org.apache.thrift.TException;
+
+/**
+ * A wrapper around some of Hive's Metastore API's to abstract away differences
+ * between major versions of Hive. This implements the shimmed methods for Hive 2.
+ */
+public class MetastoreShim {
+ /**
+ * Wrapper around MetaStoreUtils.validateName() to deal with added arguments.
+ */
+ public static boolean validateName(String name) {
+ return MetaStoreUtils.validateName(name, null);
+ }
+
+ /**
+ * Wrapper around IMetaStoreClient.alter_partition() to deal with added
+ * arguments.
+ */
+ public static void alterPartition(IMetaStoreClient client, Partition partition)
+ throws InvalidOperationException, MetaException, TException {
+ client.alter_partition(
+ partition.getDbName(), partition.getTableName(), partition, null);
+ }
+
+ /**
+ * Wrapper around IMetaStoreClient.alter_partitions() to deal with added
+ * arguments.
+ */
+ public static void alterPartitions(IMetaStoreClient client, String dbName,
+ String tableName, List<Partition> partitions)
+ throws InvalidOperationException, MetaException, TException {
+ client.alter_partitions(dbName, tableName, partitions, null);
+ }
+
+ /**
+ * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added
+ * arguments.
+ */
+ public static void updatePartitionStatsFast(Partition partition, Warehouse warehouse)
+ throws MetaException {
+ MetaStoreUtils.updatePartitionStatsFast(partition, warehouse, null);
+ }
+
+ /**
+ * Return the maximum number of Metastore objects that should be retrieved in
+ * a batch.
+ */
+ public static String metastoreBatchRetrieveObjectsMaxConfigKey() {
+ return HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX.toString();
+ }
+
+ /**
+ * Return the key and value that should be set in the partition parameters to
+ * mark that the stats were generated automatically by a stats task.
+ */
+ public static Pair<String, String> statsGeneratedViaStatsTaskParam() {
+ return Pair.create(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
+ }
+
+ public static TResultSet execGetFunctions(
+ Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+ TGetFunctionsReq req = request.getGet_functions_req();
+ return MetadataOp.getFunctions(
+ frontend, req.getCatalogName(), req.getSchemaName(), req.getFunctionName(), user);
+ }
+
+ public static TResultSet execGetColumns(
+ Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+ TGetColumnsReq req = request.getGet_columns_req();
+ return MetadataOp.getColumns(frontend, req.getCatalogName(), req.getSchemaName(),
+ req.getTableName(), req.getColumnName(), user);
+ }
+
+ public static TResultSet execGetTables(
+ Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+ TGetTablesReq req = request.getGet_tables_req();
+ return MetadataOp.getTables(frontend, req.getCatalogName(), req.getSchemaName(),
+ req.getTableName(), req.getTableTypes(), user);
+ }
+
+ public static TResultSet execGetSchemas(
+ Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException {
+ TGetSchemasReq req = request.getGet_schemas_req();
+ return MetadataOp.getSchemas(
+ frontend, req.getCatalogName(), req.getSchemaName(), user);
+ }
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MiniclusterProfile.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MiniclusterProfile.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MiniclusterProfile.java
new file mode 100644
index 0000000..9f9c36c
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/compat/MiniclusterProfile.java
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.compat;
+
+/**
+ * Constant to tell us what Minicluster Profile we are built against.
+ */
+public class MiniclusterProfile {
+ public static final int MINICLUSTER_PROFILE = 3;
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/compat-minicluster-profile-3/java/org/apache/impala/util/SentryUtil.java
----------------------------------------------------------------------
diff --git a/fe/src/compat-minicluster-profile-3/java/org/apache/impala/util/SentryUtil.java b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/util/SentryUtil.java
new file mode 100644
index 0000000..f85e890
--- /dev/null
+++ b/fe/src/compat-minicluster-profile-3/java/org/apache/impala/util/SentryUtil.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.util;
+
+import java.util.Set;
+
+import org.apache.sentry.core.common.exception.SentryAccessDeniedException;
+import org.apache.sentry.core.common.exception.SentryAlreadyExistsException;
+import org.apache.sentry.core.common.exception.SentryGroupNotFoundException;
+import org.apache.sentry.provider.db.service.thrift.SentryPolicyServiceClient;
+import org.apache.sentry.provider.db.service.thrift.TSentryRole;
+// See IMPALA-5540. Sentry over-shades itself (to avoid leaking Thrift),
+// causing this unusual package name. In the code below, we typically
+// check for either variant when it's available in the classpath.
+import sentry.org.apache.sentry.core.common.exception.SentryUserException;
+
+/**
+ * Wrapper to facilitate differences in Sentry APIs across Sentry versions.
+ */
+public class SentryUtil {
+ static boolean isSentryAlreadyExists(Exception e) {
+ return e instanceof SentryAlreadyExistsException || e instanceof
+ sentry.org.apache.sentry.core.common.exception.SentryAlreadyExistsException;
+ }
+
+ static boolean isSentryAccessDenied(Exception e) {
+ return e instanceof SentryAccessDeniedException || e instanceof
+ sentry.org.apache.sentry.core.common.exception.SentryAccessDeniedException;
+ }
+
+ public static boolean isSentryGroupNotFound(Exception e) {
+ return e instanceof SentryGroupNotFoundException;
+ }
+
+ static Set<TSentryRole> listRoles(SentryPolicyServiceClient client, String username)
+ throws SentryUserException {
+ return client.listAllRoles(username);
+ }
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
index 5f43ec3..e92458e 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeFileStmt.java
@@ -17,8 +17,6 @@
package org.apache.impala.analysis;
-import java.io.FileNotFoundException;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -26,25 +24,12 @@ import java.util.Map;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
-import parquet.hadoop.ParquetFileReader;
-import parquet.hadoop.metadata.ParquetMetadata;
-import parquet.schema.OriginalType;
-import parquet.schema.PrimitiveType;
import org.apache.impala.authorization.Privilege;
-import org.apache.impala.catalog.ArrayType;
import org.apache.impala.catalog.HdfsCompression;
import org.apache.impala.catalog.HdfsFileFormat;
-import org.apache.impala.catalog.MapType;
-import org.apache.impala.catalog.ScalarType;
-import org.apache.impala.catalog.StructField;
-import org.apache.impala.catalog.StructType;
-import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
-import org.apache.impala.common.FileSystemUtil;
import org.apache.impala.thrift.THdfsFileFormat;
@@ -56,8 +41,6 @@ import org.apache.impala.thrift.THdfsFileFormat;
public class CreateTableLikeFileStmt extends CreateTableStmt {
private final HdfsUri schemaLocation_;
private final THdfsFileFormat schemaFileFormat_;
- private final static String ERROR_MSG =
- "Failed to convert Parquet type\n%s\nto an Impala %s type:\n%s\n";
public CreateTableLikeFileStmt(CreateTableStmt createTableStmt,
THdfsFileFormat schemaFileFormat, HdfsUri schemaLocation) {
@@ -66,291 +49,6 @@ public class CreateTableLikeFileStmt extends CreateTableStmt {
schemaFileFormat_ = schemaFileFormat;
}
- /**
- * Reads the first block from the given HDFS file and returns the Parquet schema.
- * Throws Analysis exception for any failure, such as failing to read the file
- * or failing to parse the contents.
- */
- private static parquet.schema.MessageType loadParquetSchema(Path pathToFile)
- throws AnalysisException {
- try {
- FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
- if (!fs.isFile(pathToFile)) {
- throw new AnalysisException("Cannot infer schema, path is not a file: " +
- pathToFile);
- }
- } catch (IOException e) {
- throw new AnalysisException("Failed to connect to filesystem:" + e);
- } catch (IllegalArgumentException e) {
- throw new AnalysisException(e.getMessage());
- }
- ParquetMetadata readFooter = null;
- try {
- readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(),
- pathToFile);
- } catch (FileNotFoundException e) {
- throw new AnalysisException("File not found: " + e);
- } catch (IOException e) {
- throw new AnalysisException("Failed to open file as a parquet file: " + e);
- } catch (RuntimeException e) {
- // Parquet throws a generic RuntimeException when reading a non-parquet file
- if (e.toString().contains("is not a Parquet file")) {
- throw new AnalysisException("File is not a parquet file: " + pathToFile);
- }
- // otherwise, who knows what we caught, throw it back up
- throw e;
- }
- return readFooter.getFileMetaData().getSchema();
- }
-
- /**
- * Converts a "primitive" Parquet type to an Impala type.
- * A primitive type is a non-nested type with no annotations.
- */
- private static Type convertPrimitiveParquetType(parquet.schema.Type parquetType)
- throws AnalysisException {
- Preconditions.checkState(parquetType.isPrimitive());
- PrimitiveType prim = parquetType.asPrimitiveType();
- switch (prim.getPrimitiveTypeName()) {
- case BINARY: return Type.STRING;
- case BOOLEAN: return Type.BOOLEAN;
- case DOUBLE: return Type.DOUBLE;
- case FIXED_LEN_BYTE_ARRAY:
- throw new AnalysisException(
- "Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field " +
- parquetType.getName());
- case FLOAT: return Type.FLOAT;
- case INT32: return Type.INT;
- case INT64: return Type.BIGINT;
- case INT96: return Type.TIMESTAMP;
- default:
- Preconditions.checkState(false, "Unexpected parquet primitive type: " +
- prim.getPrimitiveTypeName());
- return null;
- }
- }
-
- /**
- * Converts a Parquet group type to an Impala map Type. We support both standard
- * Parquet map representations, as well as legacy. Legacy representations are handled
- * according to this specification:
- * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#backward-compatibility-rules-1
- *
- * Standard representation of a map in Parquet:
- * <optional | required> group <name> (MAP) { <-- outerGroup is pointing at this
- * repeated group key_value {
- * required <key-type> key;
- * <optional | required> <value-type> value;
- * }
- * }
- */
- private static MapType convertMap(parquet.schema.GroupType outerGroup)
- throws AnalysisException {
- if (outerGroup.getFieldCount() != 1){
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The logical MAP type must have exactly 1 inner field."));
- }
-
- parquet.schema.Type innerField = outerGroup.getType(0);
- if (!innerField.isRepetition(parquet.schema.Type.Repetition.REPEATED)){
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The logical MAP type must have a repeated inner field."));
- }
- if (innerField.isPrimitive()) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The inner field of the logical MAP type must be a group."));
- }
-
- parquet.schema.GroupType innerGroup = innerField.asGroupType();
- // It does not matter whether innerGroup has an annotation or not (for example it may
- // be annotated with MAP_KEY_VALUE). We treat the case that innerGroup has an
- // annotation and the case the innerGroup does not have an annotation the same.
- if (innerGroup.getFieldCount() != 2) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The inner field of the logical MAP type must have exactly 2 fields."));
- }
-
- parquet.schema.Type key = innerGroup.getType(0);
- if (!key.getName().equals("key")) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The name of the first field of the inner field of the logical MAP " +
- "type must be 'key'"));
- }
- if (!key.isPrimitive()) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The key type of the logical MAP type must be primitive."));
- }
- parquet.schema.Type value = innerGroup.getType(1);
- if (!value.getName().equals("value")) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "MAP", "The name of the second field of the inner field of the logical MAP " +
- "type must be 'value'"));
- }
-
- return new MapType(convertParquetType(key), convertParquetType(value));
- }
-
- /**
- * Converts a Parquet group type to an Impala struct Type.
- */
- private static StructType convertStruct(parquet.schema.GroupType outerGroup)
- throws AnalysisException {
- ArrayList<StructField> structFields = new ArrayList<StructField>();
- for (parquet.schema.Type field: outerGroup.getFields()) {
- StructField f = new StructField(field.getName(), convertParquetType(field));
- structFields.add(f);
- }
- return new StructType(structFields);
- }
-
- /**
- * Converts a Parquet group type to an Impala array Type. We can handle the standard
- * representation, but also legacy representations for backwards compatibility.
- * Legacy representations are handled according to this specification:
- * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#backward-compatibility-rules
- *
- * Standard representation of an array in Parquet:
- * <optional | required> group <name> (LIST) { <-- outerGroup is pointing at this
- * repeated group list {
- * <optional | required> <element-type> element;
- * }
- * }
- */
- private static ArrayType convertArray(parquet.schema.GroupType outerGroup)
- throws AnalysisException {
- if (outerGroup.getFieldCount() != 1) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "LIST", "The logical LIST type must have exactly 1 inner field."));
- }
-
- parquet.schema.Type innerField = outerGroup.getType(0);
- if (!innerField.isRepetition(parquet.schema.Type.Repetition.REPEATED)) {
- throw new AnalysisException(String.format(ERROR_MSG, outerGroup.toString(),
- "LIST", "The inner field of the logical LIST type must be repeated."));
- }
- if (innerField.isPrimitive() || innerField.getOriginalType() != null) {
- // From the Parquet Spec:
- // 1. If the repeated field is not a group then it's type is the element type.
- //
- // If innerField is a group, but originalType is not null, the element type is
- // based on the logical type.
- return new ArrayType(convertParquetType(innerField));
- }
-
- parquet.schema.GroupType innerGroup = innerField.asGroupType();
- if (innerGroup.getFieldCount() != 1) {
- // From the Parquet Spec:
- // 2. If the repeated field is a group with multiple fields, then it's type is a
- // struct.
- return new ArrayType(convertStruct(innerGroup));
- }
-
- return new ArrayType(convertParquetType(innerGroup.getType(0)));
- }
-
- /**
- * Converts a "logical" Parquet type to an Impala column type.
- * A Parquet type is considered logical when it has an annotation. The annotation is
- * stored as a "OriginalType". The Parquet documentation refers to these as logical
- * types, so we use that terminology here.
- */
- private static Type convertLogicalParquetType(parquet.schema.Type parquetType)
- throws AnalysisException {
- OriginalType orig = parquetType.getOriginalType();
- if (orig == OriginalType.LIST) {
- return convertArray(parquetType.asGroupType());
- }
- if (orig == OriginalType.MAP || orig == OriginalType.MAP_KEY_VALUE) {
- // MAP_KEY_VALUE annotation should not be used any more. However, according to the
- // Parquet spec, some existing data incorrectly uses MAP_KEY_VALUE in place of MAP.
- // For backward-compatibility, a group annotated with MAP_KEY_VALUE that is not
- // contained by a MAP-annotated group should be handled as a MAP-annotated group.
- return convertMap(parquetType.asGroupType());
- }
-
- PrimitiveType prim = parquetType.asPrimitiveType();
- if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY &&
- (orig == OriginalType.UTF8 || orig == OriginalType.ENUM)) {
- // UTF8 is the type annotation Parquet uses for strings
- // ENUM is the type annotation Parquet uses to indicate that
- // the original data type, before conversion to parquet, had been enum.
- // Applications which do not have enumerated types (e.g. Impala)
- // should interpret it as a string.
- // We check to make sure it applies to BINARY to avoid errors if there is a bad
- // annotation.
- return Type.STRING;
- }
-
- if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32
- || prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT64) {
- // Map signed integer types to an supported Impala column type
- switch (orig) {
- case INT_8: return Type.TINYINT;
- case INT_16: return Type.SMALLINT;
- case INT_32: return Type.INT;
- case INT_64: return Type.BIGINT;
- }
- }
-
- if (orig == OriginalType.DECIMAL) {
- return ScalarType.createDecimalType(prim.getDecimalMetadata().getPrecision(),
- prim.getDecimalMetadata().getScale());
- }
-
- throw new AnalysisException(
- "Unsupported logical parquet type " + orig + " (primitive type is " +
- prim.getPrimitiveTypeName().name() + ") for field " +
- parquetType.getName());
- }
-
- /**
- * Converts a Parquet type into an Impala type.
- */
- private static Type convertParquetType(parquet.schema.Type field)
- throws AnalysisException {
- Type type = null;
- // TODO for 2.3: If a field is not annotated with LIST, it can still be sometimes
- // interpreted as an array. The following 2 examples should be interpreted as an array
- // of integers, but this is currently not done.
- // 1. repeated int int_col;
- // 2. required group int_arr {
- // repeated group list {
- // required int element;
- // }
- // }
- if (field.getOriginalType() != null) {
- type = convertLogicalParquetType(field);
- } else if (field.isPrimitive()) {
- type = convertPrimitiveParquetType(field);
- } else {
- // If field is not primitive, it must be a struct.
- type = convertStruct(field.asGroupType());
- }
- return type;
- }
-
- /**
- * Parses a Parquet file stored in HDFS and returns the corresponding Impala schema.
- * This fails with an analysis exception if any errors occur reading the file,
- * parsing the Parquet schema, or if the Parquet types cannot be represented in Impala.
- */
- private static List<ColumnDef> extractParquetSchema(HdfsUri location)
- throws AnalysisException {
- parquet.schema.MessageType parquetSchema = loadParquetSchema(location.getPath());
- List<parquet.schema.Type> fields = parquetSchema.getFields();
- List<ColumnDef> schema = new ArrayList<ColumnDef>();
-
- for (parquet.schema.Type field: fields) {
- Type type = convertParquetType(field);
- Preconditions.checkNotNull(type);
- String colName = field.getName();
- Map<ColumnDef.Option, Object> option = Maps.newHashMap();
- option.put(ColumnDef.Option.COMMENT, "Inferred from Parquet file.");
- schema.add(new ColumnDef(colName, new TypeDef(type), option));
- }
- return schema;
- }
-
@Override
public String toSql() {
ArrayList<String> colsSql = Lists.newArrayList();
@@ -376,7 +74,7 @@ public class CreateTableLikeFileStmt extends CreateTableStmt {
schemaLocation_.analyze(analyzer, Privilege.ALL, FsAction.READ);
switch (schemaFileFormat_) {
case PARQUET:
- getColumnDefs().addAll(extractParquetSchema(schemaLocation_));
+ getColumnDefs().addAll(ParquetHelper.extractParquetSchema(schemaLocation_));
break;
default:
throw new AnalysisException("Unsupported file type for schema inference: "
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
index ad05751..43d582b 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java
@@ -494,10 +494,10 @@ public class InsertStmt extends StatementBase {
}
for (int colIdx = 0; colIdx < numClusteringCols; ++colIdx) {
Column col = hdfsTable.getColumns().get(colIdx);
- // Hive has a number of issues handling BOOLEAN partition columns (see HIVE-6590).
+ // Hive 1.x has a number of issues handling BOOLEAN partition columns (see HIVE-6590).
// Instead of working around the Hive bugs, INSERT is disabled for BOOLEAN
- // partitions in Impala. Once the Hive JIRA is resolved, we can remove this
- // analysis check.
+ // partitions in Impala when built against Hive 1. HIVE-6590 is currently resolved,
+ // but not in Hive 2.3.2, the latest release as of 3/17/2018.
if (col.getType() == Type.BOOLEAN) {
throw new AnalysisException(String.format("INSERT into table with BOOLEAN " +
"partition column (%s) is not supported: %s", col.getName(),
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
index b6fc299..0331f7d 100644
--- a/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
+++ b/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
@@ -17,6 +17,7 @@
package org.apache.impala.authorization;
+import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
@@ -29,7 +30,6 @@ import org.apache.impala.common.InternalException;
import org.apache.sentry.core.common.ActiveRoleSet;
import org.apache.sentry.core.common.Subject;
import org.apache.sentry.core.model.db.DBModelAuthorizable;
-import org.apache.sentry.policy.db.SimpleDBPolicyEngine;
import org.apache.sentry.provider.cache.SimpleCacheProviderBackend;
import org.apache.sentry.provider.common.ProviderBackend;
import org.apache.sentry.provider.common.ProviderBackendContext;
@@ -39,6 +39,8 @@ import org.apache.sentry.provider.file.SimpleFileProviderBackend;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
+import org.apache.impala.util.SentryUtil;
+
/**
* Class used to check whether a user has access to a given resource.
*/
@@ -68,38 +70,7 @@ public class AuthorizationChecker {
*/
private static ResourceAuthorizationProvider createProvider(AuthorizationConfig config,
AuthorizationPolicy policy) {
- try {
- ProviderBackend providerBe;
- // Create the appropriate backend provider.
- if (config.isFileBasedPolicy()) {
- providerBe = new SimpleFileProviderBackend(config.getSentryConfig().getConfig(),
- config.getPolicyFile());
- } else {
- // Note: The second parameter to the ProviderBackend is a "resourceFile" path
- // which is not used by Impala. We cannot pass 'null' so instead pass an empty
- // string.
- providerBe = new SimpleCacheProviderBackend(config.getSentryConfig().getConfig(),
- "");
- Preconditions.checkNotNull(policy);
- ProviderBackendContext context = new ProviderBackendContext();
- context.setBindingHandle(policy);
- providerBe.initialize(context);
- }
-
- SimpleDBPolicyEngine engine =
- new SimpleDBPolicyEngine(config.getServerName(), providerBe);
-
- // Try to create an instance of the specified policy provider class.
- // Re-throw any exceptions that are encountered.
- String policyFile = config.getPolicyFile() == null ? "" : config.getPolicyFile();
- return (ResourceAuthorizationProvider) ConstructorUtils.invokeConstructor(
- Class.forName(config.getPolicyProviderClassName()),
- new Object[] {policyFile, engine});
- } catch (Exception e) {
- // Re-throw as unchecked exception.
- throw new IllegalStateException(
- "Error creating ResourceAuthorizationProvider: ", e);
- }
+ return SentryAuthProvider.createProvider(config, policy);
}
/*
@@ -113,7 +84,16 @@ public class AuthorizationChecker {
* local group mappings.
*/
public Set<String> getUserGroups(User user) throws InternalException {
- return provider_.getGroupMapping().getGroups(user.getShortName());
+ try {
+ return provider_.getGroupMapping().getGroups(user.getShortName());
+ } catch (Exception e) {
+ if (SentryUtil.isSentryGroupNotFound(e)) {
+ // Sentry 2.1+ throws exceptions when user does not exist; swallow the
+ // exception and just return an empty set for this case.
+ return Collections.emptySet();
+ }
+ throw new RuntimeException(e);
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/main/java/org/apache/impala/catalog/AuthorizationPolicy.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/AuthorizationPolicy.java b/fe/src/main/java/org/apache/impala/catalog/AuthorizationPolicy.java
index bc83522..1c84973 100644
--- a/fe/src/main/java/org/apache/impala/catalog/AuthorizationPolicy.java
+++ b/fe/src/main/java/org/apache/impala/catalog/AuthorizationPolicy.java
@@ -294,6 +294,19 @@ public class AuthorizationPolicy implements PrivilegeCache {
return privileges;
}
+ /**
+ * Returns a set of privilege strings in Sentry format.
+ */
+ // This is an override for Sentry 2.1, but not for Sentry 1.x; we
+ // avoid annotation to support both.
+ // @Override
+ public Set<String> listPrivileges(Set<String> groups, Set<String> users,
+ ActiveRoleSet roleSet) {
+ /* User based roles and authorization hierarchy is not currently supported.
+ Fallback to listing privileges using groups. */
+ return listPrivileges(groups, roleSet);
+ }
+
@Override
public void close() {
// Nothing to do, but required by PrivilegeCache.
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java b/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
index f320cb9..f55ac30 100644
--- a/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
+++ b/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
@@ -19,8 +19,6 @@ package org.apache.impala.util;
import java.util.List;
-import org.apache.sentry.provider.db.SentryAccessDeniedException;
-import org.apache.sentry.provider.db.SentryAlreadyExistsException;
import org.apache.sentry.provider.db.service.thrift.SentryPolicyServiceClient;
import org.apache.sentry.provider.db.service.thrift.TSentryGrantOption;
import org.apache.sentry.provider.db.service.thrift.TSentryPrivilege;
@@ -124,10 +122,11 @@ public class SentryPolicyService {
} else {
client.get().dropRole(requestingUser.getShortName(), roleName);
}
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "DROP_ROLE"));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "DROP_ROLE"));
+ }
throw new InternalException("Error dropping role: ", e);
} finally {
client.close();
@@ -151,14 +150,16 @@ public class SentryPolicyService {
SentryServiceClient client = new SentryServiceClient();
try {
client.get().createRole(requestingUser.getShortName(), roleName);
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "CREATE_ROLE"));
- } catch (SentryAlreadyExistsException e) {
- if (ifNotExists) return;
- throw new InternalException("Error creating role: ", e);
} catch (Exception e) {
- throw new InternalException("Error creating role: ", e);
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "CREATE_ROLE"));
+ }
+ if (SentryUtil.isSentryAlreadyExists(e)) {
+ if (ifNotExists) return;
+ throw new InternalException("Error creating role: " + e.getMessage(), e);
+ }
+ throw new InternalException("Error creating role: " + e.getMessage(), e);
} finally {
client.close();
}
@@ -181,10 +182,11 @@ public class SentryPolicyService {
SentryServiceClient client = new SentryServiceClient();
try {
client.get().grantRoleToGroup(requestingUser.getShortName(), groupName, roleName);
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "GRANT_ROLE"));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "GRANT_ROLE"));
+ }
throw new InternalException(
"Error making 'grantRoleToGroup' RPC to Sentry Service: ", e);
} finally {
@@ -210,10 +212,11 @@ public class SentryPolicyService {
try {
client.get().revokeRoleFromGroup(requestingUser.getShortName(),
groupName, roleName);
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "REVOKE_ROLE"));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "REVOKE_ROLE"));
+ }
throw new InternalException(
"Error making 'revokeRoleFromGroup' RPC to Sentry Service: ", e);
} finally {
@@ -288,10 +291,11 @@ public class SentryPolicyService {
privilege.isHas_grant_opt());
break;
}
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "GRANT_PRIVILEGE"));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "GRANT_PRIVILEGE"));
+ }
throw new InternalException(
"Error making 'grantPrivilege*' RPC to Sentry Service: ", e);
} finally {
@@ -355,10 +359,11 @@ public class SentryPolicyService {
null);
break;
}
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "REVOKE_PRIVILEGE"));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "REVOKE_PRIVILEGE"));
+ }
throw new InternalException(
"Error making 'revokePrivilege*' RPC to Sentry Service: ", e);
} finally {
@@ -390,11 +395,13 @@ public class SentryPolicyService {
public List<TSentryRole> listAllRoles(User requestingUser) throws ImpalaException {
SentryServiceClient client = new SentryServiceClient();
try {
- return Lists.newArrayList(client.get().listRoles(requestingUser.getShortName()));
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "LIST_ROLES"));
+ return Lists.newArrayList(SentryUtil.listRoles(client.get(),
+ requestingUser.getShortName()));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "LIST_ROLES"));
+ }
throw new InternalException("Error making 'listRoles' RPC to Sentry Service: ", e);
} finally {
client.close();
@@ -410,10 +417,11 @@ public class SentryPolicyService {
try {
return Lists.newArrayList(client.get().listAllPrivilegesByRoleName(
requestingUser.getShortName(), roleName));
- } catch (SentryAccessDeniedException e) {
- throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
- requestingUser.getName(), "LIST_ROLE_PRIVILEGES"));
} catch (Exception e) {
+ if (SentryUtil.isSentryAccessDenied(e)) {
+ throw new AuthorizationException(String.format(ACCESS_DENIED_ERROR_MSG,
+ requestingUser.getName(), "LIST_ROLE_PRIVILEGES"));
+ }
throw new InternalException("Error making 'listAllPrivilegesByRoleName' RPC to " +
"Sentry Service: ", e);
} finally {
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
index e517172..39d207f 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AuthorizationTest.java
@@ -46,6 +46,7 @@ import org.apache.impala.common.FrontendTestBase;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.InternalException;
import org.apache.impala.common.RuntimeEnv;
+import org.apache.impala.compat.MiniclusterProfile;
import org.apache.impala.service.Frontend;
import org.apache.impala.testutil.ImpaladTestCatalog;
import org.apache.impala.thrift.TColumnValue;
@@ -76,6 +77,7 @@ import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -1603,7 +1605,7 @@ public class AuthorizationTest extends FrontendTestBase {
// Expected output of DESCRIBE for a functional table.
// "*" is used when the output is variable such as time or user.
private static final List<String> EXPECTED_DESCRIBE_EXTENDED_ALLTYPESAGG =
- Lists.newArrayList(
+ ImmutableList.<String>builder().add(
"# col_name","data_type","comment",
"","NULL","NULL",
"id","int","NULL",
@@ -1629,15 +1631,22 @@ public class AuthorizationTest extends FrontendTestBase {
"Database:","functional","NULL",
"Owner:","*","NULL",
"CreateTime:","*","NULL",
- "LastAccessTime:","UNKNOWN","NULL",
- "Protect Mode:","None","NULL",
+ "LastAccessTime:","UNKNOWN","NULL")
+ // Account for minor differences between Hive 1 and Hive 2
+ .addAll(MiniclusterProfile.MINICLUSTER_PROFILE == 2
+ ? ImmutableList.<String>of("Protect Mode:", "None", "NULL")
+ : ImmutableList.<String>of())
+ .add(
"Retention:","0","NULL",
"Location:","hdfs://localhost:20500/test-warehouse/alltypesagg","NULL",
"Table Type:","EXTERNAL_TABLE","NULL",
"Table Parameters:","NULL","NULL",
"","DO_NOT_UPDATE_STATS","true",
- "","EXTERNAL","TRUE",
- "","STATS_GENERATED_VIA_STATS_TASK","true",
+ "","EXTERNAL","TRUE")
+ .addAll(MiniclusterProfile.MINICLUSTER_PROFILE == 2
+ ? ImmutableList.of("", "STATS_GENERATED_VIA_STATS_TASK", "true")
+ : ImmutableList.of("", "STATS_GENERATED", "TASK"))
+ .add(
"","numRows","11000",
"","totalSize","834279",
"","transient_lastDdlTime","*",
@@ -1654,12 +1663,12 @@ public class AuthorizationTest extends FrontendTestBase {
"","escape.delim","\\\\",
"","field.delim",",",
"","serialization.format",","
- );
+ ).build();
// Expected output of DESCRIBE for a functional table.
// "*" is used when the output is variable such as time or user.
private static final List<String> EXPECTED_DESCRIBE_EXTENDED_ALLTYPESSMALL =
- Lists.newArrayList(
+ ImmutableList.<String>builder().add(
"# col_name","data_type","comment",
"","NULL","NULL",
"id","int","NULL",
@@ -1674,14 +1683,20 @@ public class AuthorizationTest extends FrontendTestBase {
"Database:","functional","NULL",
"Owner:","*","NULL",
"CreateTime:","*","NULL",
- "LastAccessTime:","UNKNOWN","NULL",
- "Protect Mode:","None","NULL",
+ "LastAccessTime:","UNKNOWN","NULL")
+ .addAll(MiniclusterProfile.MINICLUSTER_PROFILE == 2
+ ? ImmutableList.<String>of("Protect Mode:", "None", "NULL")
+ : ImmutableList.<String>of())
+ .add(
"Retention:","0","NULL",
"Table Type:","EXTERNAL_TABLE","NULL",
"Table Parameters:","NULL","NULL",
"","DO_NOT_UPDATE_STATS","true",
- "","EXTERNAL","TRUE",
- "","STATS_GENERATED_VIA_STATS_TASK","true",
+ "","EXTERNAL","TRUE")
+ .addAll(MiniclusterProfile.MINICLUSTER_PROFILE == 2
+ ? ImmutableList.of("", "STATS_GENERATED_VIA_STATS_TASK", "true")
+ : ImmutableList.of("", "STATS_GENERATED", "TASK"))
+ .add(
"","numRows","100",
"","totalSize","6472",
"","transient_lastDdlTime","*",
@@ -1698,7 +1713,7 @@ public class AuthorizationTest extends FrontendTestBase {
"","escape.delim","\\\\",
"","field.delim",",",
"","serialization.format",","
- );
+ ).build();
@Test
public void TestDescribeTableResults() throws ImpalaException {
@@ -1735,7 +1750,8 @@ public class AuthorizationTest extends FrontendTestBase {
// Compares two arrays but skips an expected value that contains '*' since we need to
// compare output but some values change based on builds, environments, etc.
private void verifyOutputWithOptionalData(List<String> expected, List<String> actual) {
- Assert.assertEquals(expected.size(), actual.size());
+ Assert.assertEquals("Size difference. Expected: " + expected + " Actual: " + actual,
+ expected.size(), actual.size());
for (int idx = 0; idx < expected.size(); idx++) {
if (!expected.get(idx).equals("*")) {
Assert.assertEquals(expected.get(idx), actual.get(idx));
@@ -2135,7 +2151,13 @@ public class AuthorizationTest extends FrontendTestBase {
public void TestShortUsernameUsed() throws Exception {
// Different long variations of the same username.
List<User> users = Lists.newArrayList(
- new User(USER.getName() + "/abc.host.com@"),
+ // Hadoop 2 accepts kerberos names missing a realm, but insists
+ // on having a terminating '@' even when the default realm
+ // is intended. Hadoop 3 now has more normal name convetions,
+ // where to specify the default realm, everything after and
+ // including the '@' character is omitted.
+ new User(USER.getName() + "/abc.host.com" +
+ (MiniclusterProfile.MINICLUSTER_PROFILE == 3 ? "" : "@")),
new User(USER.getName() + "/abc.host.com@REAL.COM"),
new User(USER.getName() + "@REAL.COM"));
for (User user: users) {
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
index 28dc710..b3d16ba 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
@@ -54,6 +54,7 @@ import org.apache.impala.catalog.ScalarType;
import org.apache.impala.catalog.Table;
import org.apache.impala.catalog.Type;
import org.apache.impala.catalog.View;
+import org.apache.impala.compat.MiniclusterProfile;
import org.apache.impala.service.CatalogOpExecutor;
import org.apache.impala.service.Frontend;
import org.apache.impala.testutil.ImpaladTestCatalog;
@@ -407,9 +408,18 @@ public class FrontendTestBase {
} catch (Exception e) {
String errorString = e.getMessage();
Preconditions.checkNotNull(errorString, "Stack trace lost during exception.");
- Assert.assertTrue(
- "got error:\n" + errorString + "\nexpected:\n" + expectedErrorString,
- errorString.startsWith(expectedErrorString));
+ String msg = "got error:\n" + errorString + "\nexpected:\n" + expectedErrorString;
+ if (MiniclusterProfile.MINICLUSTER_PROFILE == 3) {
+ // Different versions of Hive have slightly different error messages;
+ // we normalize here as follows:
+ // 'No FileSystem for Scheme "x"' -> 'No FileSystem for scheme: x'
+ if (errorString.contains("No FileSystem for scheme ")) {
+ errorString = errorString.replace("\"", "");
+ errorString = errorString.replace("No FileSystem for scheme ",
+ "No FileSystem for scheme: ");
+ }
+ }
+ Assert.assertTrue(msg, errorString.startsWith(expectedErrorString));
return;
}
fail("Stmt didn't result in analysis error: " + stmt);
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/fe/src/test/resources/hive-log4j2.properties.template
----------------------------------------------------------------------
diff --git a/fe/src/test/resources/hive-log4j2.properties.template b/fe/src/test/resources/hive-log4j2.properties.template
new file mode 100644
index 0000000..2361f1d
--- /dev/null
+++ b/fe/src/test/resources/hive-log4j2.properties.template
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = INFO
+name = HiveLog4j2
+packages = org.apache.hadoop.hive.ql.log
+
+# list of properties
+property.hive.log.level = INFO
+property.hive.root.logger = DRFA
+property.hive.log.dir = ${env:IMPALA_CLUSTER_LOGS_DIR}
+property.hive.log.file = hive.log
+property.hive.perflogger.log.level = INFO
+
+# list of all appenders
+appenders = console, DRFA
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
+
+# daily rolling file appender
+appender.DRFA.type = RollingRandomAccessFile
+appender.DRFA.name = DRFA
+appender.DRFA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file}
+# Use %pid in the filePattern to append <process-id>@<host-name> to the filename if you want separate log files for different CLI session
+appender.DRFA.filePattern = ${sys:hive.log.dir}/${sys:hive.log.file}.%d{yyyy-MM-dd}
+appender.DRFA.layout.type = PatternLayout
+appender.DRFA.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
+appender.DRFA.policies.type = Policies
+appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy
+appender.DRFA.policies.time.interval = 1
+appender.DRFA.policies.time.modulate = true
+appender.DRFA.strategy.type = DefaultRolloverStrategy
+appender.DRFA.strategy.max = 30
+
+# list of all loggers
+loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, PerfLogger, AmazonAws, ApacheHttp
+
+logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
+logger.NIOServerCnxn.level = WARN
+
+logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
+logger.ClientCnxnSocketNIO.level = WARN
+
+logger.DataNucleus.name = DataNucleus
+logger.DataNucleus.level = ERROR
+
+logger.Datastore.name = Datastore
+logger.Datastore.level = ERROR
+
+logger.JPOX.name = JPOX
+logger.JPOX.level = ERROR
+
+logger.AmazonAws.name=com.amazonaws
+logger.AmazonAws.level = INFO
+
+logger.ApacheHttp.name=org.apache.http
+logger.ApacheHttp.level = INFO
+
+logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger
+logger.PerfLogger.level = ${sys:hive.perflogger.log.level}
+
+# root logger
+rootLogger.level = ${sys:hive.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/impala-parent/pom.xml
----------------------------------------------------------------------
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index 6dc2064..b806f2b 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -51,7 +51,7 @@ under the License.
<junit.version>4.12</junit.version>
<!-- Beware compatibility requirements with Thrift and
KMS; see IMPALA-4210. -->
- <httpcomponents.core.version>4.2.5</httpcomponents.core.version>
+ <httpcomponents.core.version>4.4.9</httpcomponents.core.version>
<yarn-extras.version>${project.version}</yarn-extras.version>
<eclipse.output.directory>eclipse-classes</eclipse.output.directory>
<guava.version>11.0.2</guava.version>
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/testdata/bin/run-hbase.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/run-hbase.sh b/testdata/bin/run-hbase.sh
index f264b65..878fe4d 100755
--- a/testdata/bin/run-hbase.sh
+++ b/testdata/bin/run-hbase.sh
@@ -36,6 +36,9 @@ cat > ${HBASE_CONF_DIR}/hbase-env.sh <<EOF
export JAVA_HOME=${JAVA_HOME}
export HBASE_LOG_DIR=${HBASE_LOGDIR}
export HBASE_PID_DIR=${HBASE_LOGDIR}
+if [[ $IMPALA_MINICLUSTER_PROFILE == 3 ]]; then
+ export HBASE_CLASSPATH=${HADOOP_CLASSPATH}
+fi
EOF
# Put zookeeper things in the logs/cluster/zoo directory.
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/testdata/bin/run-hive-server.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh
index 42d95b5..49d1de2 100755
--- a/testdata/bin/run-hive-server.sh
+++ b/testdata/bin/run-hive-server.sh
@@ -63,8 +63,10 @@ done
${CLUSTER_BIN}/kill-hive-server.sh &> /dev/null
# Starts a Hive Metastore Server on the specified port.
-HADOOP_CLIENT_OPTS=-Xmx2024m hive --service metastore -p $HIVE_METASTORE_PORT \
- > ${LOGDIR}/hive-metastore.out 2>&1 &
+# To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS:
+# -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE
+HADOOP_CLIENT_OPTS="-Xmx2024m -Dhive.log.file=hive-metastore.log" hive \
+ --service metastore -p $HIVE_METASTORE_PORT > ${LOGDIR}/hive-metastore.out 2>&1 &
# Wait for the Metastore to come up because HiveServer2 relies on it being live.
${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT}
@@ -72,7 +74,12 @@ ${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT}
if [ ${ONLY_METASTORE} -eq 0 ]; then
# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
# environment variable.
- HADOOP_HEAPSIZE="512" hive --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &
+ if [[ $IMPALA_MINICLUSTER_PROFILE == 2 ]]; then
+ HADOOP_HEAPSIZE="512" hive --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &
+ elif [[ $IMPALA_MINICLUSTER_PROFILE == 3 ]]; then
+ HADOOP_CLIENT_OPTS="-Xmx2048m -Dhive.log.file=hive-server2.log" hive \
+ --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &
+ fi
# Wait for the HiveServer2 service to come up because callers of this script
# may rely on it being available.
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-acls.xml.tmpl
----------------------------------------------------------------------
diff --git a/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-acls.xml.tmpl b/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-acls.xml.tmpl
deleted file mode 100644
index 0eefdb4..0000000
--- a/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-acls.xml.tmpl
+++ /dev/null
@@ -1,68 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<configuration>
- <property>
- <name>hadoop.kms.acl.CREATE</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.DELETE</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.ROLLOVER</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.GET</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.GET_KEYS</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.GET_METADATA</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.SET_KEY_MATERIAL</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.GENERATE_EEK</name>
- <value>*</value>
- </property>
-
- <property>
- <name>hadoop.kms.acl.DECRYPT_EEK</name>
- <value>*</value>
- </property>
-
- <property>
- <name>default.key.acl.MANAGEMENT</name>
- <value>*</value>
- </property>
-
- <property>
- <name>default.key.acl.GENERATE_EEK</name>
- <value>*</value>
- </property>
-
- <property>
- <name>default.key.acl.DECRYPT_EEK</name>
- <value>*</value>
- </property>
-
- <property>
- <name>default.key.acl.READ</name>
- <value>*</value>
- </property>
-
-</configuration>
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-site.xml.tmpl
----------------------------------------------------------------------
diff --git a/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-site.xml.tmpl b/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-site.xml.tmpl
deleted file mode 100644
index 4027e32..0000000
--- a/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/kms-site.xml.tmpl
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<configuration>
- <property>
- <name>hadoop.kms.key.provider.uri</name>
- <value>jceks://file@/${NODE_DIR}/data/kms.keystore</value>
- </property>
-
- <property>
- <name>hadoop.kms.authentication.type</name>
- <value>simple</value>
- </property>
-</configuration>
http://git-wip-us.apache.org/repos/asf/impala/blob/783de170/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/yarn-site.xml.tmpl
----------------------------------------------------------------------
diff --git a/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/yarn-site.xml.tmpl b/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/yarn-site.xml.tmpl
deleted file mode 100644
index 0dd7bec..0000000
--- a/testdata/cluster/node_templates/cdh5/etc/hadoop/conf/yarn-site.xml.tmpl
+++ /dev/null
@@ -1,154 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- TODO: Remove any Llama-related configuration. Can this file be removed entirely? -->
-<configuration>
- <property>
- <name>yarn.resourcemanager.webapp.address</name>
- <value>0.0.0.0:${YARN_WEBUI_PORT}</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.address</name>
- <value>127.0.0.1:${NODEMANAGER_PORT}</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.delete.debug-delay-sec</name>
- <value>600</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.resource.memory-mb</name>
- <value>16384</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.resource.cpu-vcores</name>
- <value>16</value>
- </property>
-
- <property>
- <name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
- <value>100</value>
- </property>
-
- <property>
- <name>yarn.scheduler.fair.continuous-scheduling-enabled</name>
- <value>true</value>
- </property>
-
- <property>
- <name>yarn.scheduler.fair.assignmultiple</name>
- <value>true</value>
- </property>
-
- <property>
- <name>yarn.resourcemanager.scheduler.class</name>
- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.localizer.address</name>
- <value>127.0.0.1:${NODEMANAGER_LOCALIZER_PORT}</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.webapp.address</name>
- <value>127.0.0.1:${NODEMANAGER_WEBUI_PORT}</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.local-dirs</name>
- <value>${NODE_DIR}/data/yarn/local</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.log-dirs</name>
- <value>${NODE_DIR}/data/yarn/logs</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.aux-services</name>
- <value>mapreduce_shuffle</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
- <value>org.apache.hadoop.mapred.ShuffleHandler</value>
- </property>
-
- <property>
- <name>yarn.log-aggregation-enable</name>
- <value>true</value>
- </property>
-
- <property>
- <description>List of directories to store localized files in.</description>
- <name>yarn.nodemanager.local-dirs</name>
- <value>${NODE_DIR}/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
- </property>
-
- <property>
- <description>Where to store container logs.</description>
- <name>yarn.nodemanager.log-dirs</name>
- <value>${NODE_DIR}/var/log/hadoop-yarn/containers</value>
- </property>
-
- <property>
- <description>Where to aggregate logs to.</description>
- <name>yarn.nodemanager.remote-app-log-dir</name>
- <value>${NODE_DIR}/var/log/hadoop-yarn/apps</value>
- </property>
-
- <property>
- <description>Classpath for typical applications.</description>
- <name>yarn.application.classpath</name>
- <value>
- ${HADOOP_CONF_DIR},
- ${HADOOP_HOME}/share/hadoop/tools/lib/*,
- ${HADOOP_HOME}/share/hadoop/common/*,
- ${HADOOP_HOME}/share/hadoop/common/lib/*,
- ${HADOOP_HOME}/share/hadoop/hdfs/*,
- ${HADOOP_HOME}/share/hdfs/common/lib/*,
- ${HADOOP_HOME}/share/hadoop/mapreduce/*,
- ${HADOOP_HOME}/share/hadoop/mapreduce/lib/*,
- ${HADOOP_HOME}/share/hadoop/yarn/*,
- ${HADOOP_HOME}/share/hadoop/yarn/lib/*,
- ${LZO_JAR_PATH}
- </value>
- </property>
-
- <!-- BEGIN Kerberos settings -->
-
- <!-- KERBEROS TODO: Add these to yarn.application.classpath.
- ${IMPALA_FE_DIR}/target/*,${HADOOP_LZO}/build/*,
- ${IMPALA_FE_DIR}/target/dependency/* -->
-
- <!-- ResourceManager security configs -->
- <property>
- <name>yarn.resourcemanager.keytab</name>
- <value>${KRB5_KTNAME}</value>
- </property>
-
- <property>
- <name>yarn.resourcemanager.principal</name>
- <value>${MINIKDC_PRINC_USER}</value>
- <!-- Sort of horrible: instead of the yarn principle, we'll use ${USER}
- so that we don't have a problem with file system permissions. -->
- </property>
-
- <!-- NodeManager security configs -->
- <property>
- <name>yarn.nodemanager.keytab</name>
- <value>${KRB5_KTNAME}</value>
- </property>
-
- <property>
- <name>yarn.nodemanager.principal</name>
- <value>${MINIKDC_PRINC_USER}</value>
- <!-- Also sort of horrible as per above -->
- </property>
- <!-- END Kerberos settings -->
-
-</configuration>