You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2018/08/03 21:23:29 UTC

[1/7] impala git commit: IMPALA-5542: Impala cannot scan Parquet decimal stored as int64_t/int32_t

Repository: impala
Updated Branches:
  refs/heads/master a76ea5c2e -> 2b4d06710


IMPALA-5542: Impala cannot scan Parquet decimal stored as int64_t/int32_t

The Decimal type in Parquet is a logical type. That means
the Parquet file stores some physical/primitive type that
is annotated by the DECIMAL tag to make it behave like
decimals.

The allowed physical types for decimals are INT32, INT64,
FIXED, and BINARY. Before this commit Impala could only
read decimals stored as FIXED or BINARY.

Spark decided to write decimals as INT32 or INT64 when
their precision allows it:
(1 <= precision <= 9) ==> INT32
(10 <= precision <= 18) ==> INT64

I updated our column readers to accept INT32 and INT64
as valid physical types for decimals.

Testing:
* extended parquet-plain-test.cc
* added Parquet files generated by Spark 2.3.1
  and updated test_scanners.py

Change-Id: Ib8c41bfc7c1664bdba5099d3893dc8dbe4304794
Reviewed-on: http://gerrit.cloudera.org:8080/11000
Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7917eac0
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7917eac0
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7917eac0

Branch: refs/heads/master
Commit: 7917eac0ad52fbfa4f6e95046986950ea04af676
Parents: a76ea5c
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Thu Jul 12 15:27:27 2018 +0200
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Aug 2 20:21:12 2018 +0000

----------------------------------------------------------------------
 be/src/exec/parquet-column-readers.cc           |   8 +++++
 be/src/exec/parquet-common.h                    |  32 ++++++++++++-------
 be/src/exec/parquet-metadata-utils.cc           |  17 ++++++++--
 be/src/exec/parquet-plain-test.cc               |  24 ++++++++++++--
 testdata/data/README                            |   8 +++++
 testdata/data/decimal_stored_as_int32.parquet   | Bin 0 -> 597 bytes
 testdata/data/decimal_stored_as_int64.parquet   | Bin 0 -> 627 bytes
 .../QueryTest/parquet-decimal-formats.test      |  18 +++++++++++
 tests/query_test/test_scanners.py               |  17 ++++++++++
 9 files changed, 109 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/be/src/exec/parquet-column-readers.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-column-readers.cc b/be/src/exec/parquet-column-readers.cc
index 3b13001..2cb483e 100644
--- a/be/src/exec/parquet-column-readers.cc
+++ b/be/src/exec/parquet-column-readers.cc
@@ -1532,6 +1532,14 @@ static ParquetColumnReader* GetDecimalColumnReader(const SchemaNode& node,
             parent, node, slot_desc);
       }
       break;
+    case parquet::Type::INT32:
+      DCHECK_EQ(sizeof(Decimal4Value::StorageType), slot_desc->type().GetByteSize());
+      return new ScalarColumnReader<Decimal4Value, parquet::Type::INT32, true>(
+          parent, node, slot_desc);
+    case parquet::Type::INT64:
+      DCHECK_EQ(sizeof(Decimal8Value::StorageType), slot_desc->type().GetByteSize());
+      return new ScalarColumnReader<Decimal8Value, parquet::Type::INT64, true>(
+          parent, node, slot_desc);
     default:
       DCHECK(false) << "Invalid decimal primitive type";
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/be/src/exec/parquet-common.h
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-common.h b/be/src/exec/parquet-common.h
index a81064e..f3add14 100644
--- a/be/src/exec/parquet-common.h
+++ b/be/src/exec/parquet-common.h
@@ -57,7 +57,8 @@ parquet::CompressionCodec::type ConvertImpalaToParquetCodec(
 class ParquetPlainEncoder {
  public:
   /// Returns the byte size of 'v' where InternalType is the datatype that Impala uses
-  /// internally to store tuple data.
+  /// internally to store tuple data. Used in some template function implementations to
+  /// determine the encoded byte size for fixed-length types.
   template <typename InternalType>
   static int ByteSize(const InternalType& v) { return sizeof(InternalType); }
 
@@ -183,6 +184,17 @@ class ParquetPlainEncoder {
   /// need not be aligned. If PARQUET_TYPE is FIXED_LEN_BYTE_ARRAY then 'fixed_len_size'
   /// is the size of the object. Otherwise, it is unused.
   /// Returns the number of bytes read or -1 if the value was not decoded successfully.
+  /// This generic template function is used with the following types:
+  /// =============================
+  /// InternalType   | PARQUET_TYPE
+  /// =============================
+  /// int32_t        | INT32
+  /// int64_t        | INT64
+  /// float          | FLOAT
+  /// double         | DOUBLE
+  /// Decimal4Value  | INT32
+  /// Decimal8Value  | INT64
+  /// TimestampValue | INT96
   template <typename InternalType, parquet::Type::type PARQUET_TYPE>
   static int Decode(const uint8_t* buffer, const uint8_t* buffer_end, int fixed_len_size,
       InternalType* v) {
@@ -203,24 +215,22 @@ template <> int ParquetPlainEncoder::Encode(const bool&, int fixed_len_size, uin
 template <> int ParquetPlainEncoder::Decode<bool, parquet::Type::BOOLEAN>(const uint8_t*,
     const uint8_t*, int fixed_len_size, bool* v);
 
-/// Not used for decimals since the plain encoding encodes them using
-/// FIXED_LEN_BYTE_ARRAY.
-inline int DecimalByteSize() {
-  DCHECK(false);
-  return -1;
-}
-
 template <>
 inline int ParquetPlainEncoder::ByteSize(const Decimal4Value&) {
-  return DecimalByteSize();
+  // Only used when the decimal is stored as INT32.
+  return sizeof(Decimal4Value::StorageType);
 }
 template <>
 inline int ParquetPlainEncoder::ByteSize(const Decimal8Value&) {
-  return DecimalByteSize();
+  // Only used when the decimal is stored as INT64.
+  return sizeof(Decimal8Value::StorageType);
 }
 template <>
 inline int ParquetPlainEncoder::ByteSize(const Decimal16Value&) {
-  return DecimalByteSize();
+  // Not used, since such big decimals can only be stored as BYTE_ARRAY or
+  // FIXED_LEN_BYTE_ARRAY.
+  DCHECK(false);
+  return -1;
 }
 
 /// Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit.

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/be/src/exec/parquet-metadata-utils.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-metadata-utils.cc b/be/src/exec/parquet-metadata-utils.cc
index 3d05fe6..d199c6e 100644
--- a/be/src/exec/parquet-metadata-utils.cc
+++ b/be/src/exec/parquet-metadata-utils.cc
@@ -57,8 +57,8 @@ const map<PrimitiveType, set<parquet::Type::type>> SUPPORTED_PHYSICAL_TYPES = {
     {PrimitiveType::TYPE_DATE, {parquet::Type::BYTE_ARRAY}},
     {PrimitiveType::TYPE_DATETIME, {parquet::Type::BYTE_ARRAY}},
     {PrimitiveType::TYPE_BINARY, {parquet::Type::BYTE_ARRAY}},
-    {PrimitiveType::TYPE_DECIMAL, {parquet::Type::FIXED_LEN_BYTE_ARRAY,
-        parquet::Type::BYTE_ARRAY}},
+    {PrimitiveType::TYPE_DECIMAL, {parquet::Type::INT32, parquet::Type::INT64,
+        parquet::Type::FIXED_LEN_BYTE_ARRAY, parquet::Type::BYTE_ARRAY}},
     {PrimitiveType::TYPE_CHAR, {parquet::Type::BYTE_ARRAY}},
     {PrimitiveType::TYPE_VARCHAR, {parquet::Type::BYTE_ARRAY}},
 };
@@ -193,6 +193,19 @@ Status ParquetMetadataUtils::ValidateColumn(const char* filename,
   bool is_converted_type_decimal = schema_element.__isset.converted_type
       && schema_element.converted_type == parquet::ConvertedType::DECIMAL;
   if (slot_desc->type().type == TYPE_DECIMAL) {
+    // TODO: allow converting to wider type (IMPALA-2515)
+    if (schema_element.type == parquet::Type::INT32 &&
+        sizeof(int32_t) != slot_desc->type().GetByteSize()) {
+      return Status(Substitute("File '$0' decimal column '$1' is stored as INT32, but "
+          "based on the precision in the table metadata, another type would needed.",
+          filename, schema_element.name));
+    }
+    if (schema_element.type == parquet::Type::INT64 &&
+        sizeof(int64_t) != slot_desc->type().GetByteSize()) {
+      return Status(Substitute("File '$0' decimal column '$1' is stored as INT64, but "
+          "based on the precision in the table metadata, another type would needed.",
+          filename, schema_element.name));
+    }
     // We require that the scale and byte length be set.
     if (schema_element.type == parquet::Type::FIXED_LEN_BYTE_ARRAY) {
       if (!schema_element.__isset.type_length) {

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/be/src/exec/parquet-plain-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-plain-test.cc b/be/src/exec/parquet-plain-test.cc
index 37acd2c..2bcfa1d 100644
--- a/be/src/exec/parquet-plain-test.cc
+++ b/be/src/exec/parquet-plain-test.cc
@@ -35,12 +35,13 @@ int Encode(const InternalType& v, int encoded_byte_size, uint8_t* buffer,
   return ParquetPlainEncoder::Encode(v, encoded_byte_size, buffer);
 }
 
-// Handle special case of encoding decimal types stored as BYTE_ARRAY since it is not
-// implemented in Impala.
+// Handle special case of encoding decimal types stored as BYTE_ARRAY, INT32, and INT64,
+// since these are not implemented in Impala.
 // When parquet_type equals BYTE_ARRAY: 'encoded_byte_size' is the sum of the
 // minimum number of bytes required to store the unscaled value and the bytes required to
 // store the size. Value 'v' passed to it should not contain leading zeros as this
 // method does not strictly conform to the parquet spec in removing those.
+// When parquet_type is INT32 or INT64, we simply write the unscaled value to the buffer.
 template <typename DecimalType>
 int EncodeDecimal(const DecimalType& v, int encoded_byte_size, uint8_t* buffer,
     parquet::Type::type parquet_type) {
@@ -51,6 +52,9 @@ int EncodeDecimal(const DecimalType& v, int encoded_byte_size, uint8_t* buffer,
     memcpy(buffer, &decimal_size, sizeof(int32_t));
     DecimalUtil::EncodeToFixedLenByteArray(buffer + sizeof(int32_t), decimal_size, v);
     return encoded_byte_size;
+  } else if (parquet_type == parquet::Type::INT32 ||
+             parquet_type == parquet::Type::INT64) {
+    return ParquetPlainEncoder::Encode(v.value(), encoded_byte_size, buffer);
   }
   return -1;
 }
@@ -139,6 +143,10 @@ TEST(PlainEncoding, Basic) {
       sizeof(Decimal4Value));
   TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal4Value(test_val * -1), sizeof(Decimal4Value));
+  TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(test_val),
+      sizeof(int32_t));
+  TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(test_val * -1),
+      sizeof(int32_t));
 
   // Decimal8Value: General test case
   TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(Decimal8Value(test_val),
@@ -149,6 +157,10 @@ TEST(PlainEncoding, Basic) {
       sizeof(Decimal8Value));
   TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal8Value(test_val * -1), sizeof(Decimal8Value));
+  TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(test_val),
+      sizeof(int64_t));
+  TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(test_val * -1),
+      sizeof(int64_t));
 
   // Decimal16Value: General test case
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(Decimal16Value(test_val),
@@ -171,6 +183,10 @@ TEST(PlainEncoding, Basic) {
       Decimal8Value(std::numeric_limits<int32_t>::max()), sizeof(Decimal8Value));
   TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(
       Decimal8Value(std::numeric_limits<int32_t>::min()), sizeof(Decimal8Value));
+  TestType<Decimal8Value, parquet::Type::INT64>(
+      Decimal8Value(std::numeric_limits<int32_t>::max()), sizeof(int64_t));
+  TestType<Decimal8Value, parquet::Type::INT64>(
+      Decimal8Value(std::numeric_limits<int32_t>::min()), sizeof(int64_t));
 
   // Decimal16Value: int32 limits test
   TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(
@@ -205,6 +221,8 @@ TEST(PlainEncoding, Basic) {
           i + sizeof(int32_t));
       TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal4Value(i), i);
       TestType<Decimal4Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal4Value(-i), i);
+      TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(i), sizeof(int32_t));
+      TestType<Decimal4Value, parquet::Type::INT32>(Decimal4Value(-i), sizeof(int32_t));
     }
     if (i <= 8) {
       TestType<Decimal8Value, parquet::Type::BYTE_ARRAY>(Decimal8Value(i),
@@ -213,6 +231,8 @@ TEST(PlainEncoding, Basic) {
           i + sizeof(int32_t));
       TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal8Value(i), i);
       TestType<Decimal8Value, parquet::Type::FIXED_LEN_BYTE_ARRAY>(Decimal8Value(-i), i);
+      TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(i), sizeof(int64_t));
+      TestType<Decimal8Value, parquet::Type::INT64>(Decimal8Value(-i), sizeof(int64_t));
     }
     TestType<Decimal16Value, parquet::Type::BYTE_ARRAY>(Decimal16Value(i),
         i + sizeof(int32_t));

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/testdata/data/README
----------------------------------------------------------------------
diff --git a/testdata/data/README b/testdata/data/README
index fec0e82..ee29090 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -179,3 +179,11 @@ dict_encoding_with_large_bit_width.parquet:
 Parquet file with a single TINYINT column "i" with 33 rows. Created by a modified
 Impala to use 9 bit dictionary indices for encoding. Reading this file used to lead
 to DCHECK errors (IMPALA-7147).
+
+decimal_stored_as_int32.parquet:
+Parquet file generated by Spark 2.3.1 that contains decimals stored as int32.
+Impala needs to be able to read such values (IMPALA-5542)
+
+decimal_stored_as_int64.parquet:
+Parquet file generated by Spark 2.3.1 that contains decimals stored as int64.
+Impala needs to be able to read such values (IMPALA-5542)

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/testdata/data/decimal_stored_as_int32.parquet
----------------------------------------------------------------------
diff --git a/testdata/data/decimal_stored_as_int32.parquet b/testdata/data/decimal_stored_as_int32.parquet
new file mode 100644
index 0000000..2238ba1
Binary files /dev/null and b/testdata/data/decimal_stored_as_int32.parquet differ

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/testdata/data/decimal_stored_as_int64.parquet
----------------------------------------------------------------------
diff --git a/testdata/data/decimal_stored_as_int64.parquet b/testdata/data/decimal_stored_as_int64.parquet
new file mode 100644
index 0000000..5b1a0e3
Binary files /dev/null and b/testdata/data/decimal_stored_as_int64.parquet differ

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/testdata/workloads/functional-query/queries/QueryTest/parquet-decimal-formats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-decimal-formats.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-decimal-formats.test
index 3c54aa1..b57c9e3 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet-decimal-formats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-decimal-formats.test
@@ -23,3 +23,21 @@ select * from decimal_encodings;
 ---- TYPES
 DECIMAL,DECIMAL,DECIMAL
 ====
+---- QUERY
+select score from decimal_stored_as_int32
+---- RESULTS
+12.340
+24.560
+34.123
+---- TYPES
+DECIMAL
+====
+---- QUERY
+select score from decimal_stored_as_int64
+---- RESULTS
+12.3400
+24.5600
+34.1230
+---- TYPES
+DECIMAL
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/7917eac0/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index b4e77be..afbac0f 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -290,6 +290,20 @@ class TestParquet(ImpalaTestSuite):
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')
 
+  def _create_table_from_file(self, table_name, unique_database):
+    filename = '%s.parquet' % table_name
+    local_file = os.path.join(os.environ['IMPALA_HOME'],
+                              'testdata/data/%s' % filename)
+    assert os.path.isfile(local_file)
+    hdfs_file = '/test-warehouse/{0}.db/{1}'.format(unique_database, filename)
+    check_call(['hdfs', 'dfs', '-copyFromLocal', '-f', local_file, hdfs_file])
+
+    qualified_table_name = '%s.%s' % (unique_database, table_name)
+    self.client.execute('create table %s like parquet "%s" stored as parquet' %
+                        (qualified_table_name, hdfs_file))
+    self.client.execute('load data inpath "%s" into table %s' %
+                        (hdfs_file, qualified_table_name))
+
   def test_parquet(self, vector):
     self.run_test_case('QueryTest/parquet', vector)
 
@@ -704,6 +718,9 @@ class TestParquet(ImpalaTestSuite):
                                     "testdata/data/", file_name)
       check_call(['hdfs', 'dfs', '-copyFromLocal', data_file_path, table_loc])
 
+    self._create_table_from_file('decimal_stored_as_int32', unique_database)
+    self._create_table_from_file('decimal_stored_as_int64', unique_database)
+
     self.run_test_case('QueryTest/parquet-decimal-formats', vector, unique_database)
 
   def test_rle_encoded_bools(self, vector, unique_database):


[5/7] impala git commit: IMPALA-7381: Prevent build failure after switching to new CDH_BUILD_NUMBER

Posted by mi...@apache.org.
IMPALA-7381: Prevent build failure after switching to new CDH_BUILD_NUMBER

Switching to a new CDH_BUILD_NUMBER requires downloading new CDH
components as well as forcing Maven to update its local repository.
This patch updates the CDH_COMPONENTS_HOME to include the
CDH_BUILD_NUMBER which will automatically download the new CDH
components after switching to a new CDH_BUILD_NUMBER. When running
a build if it detects that a new CDH_BUILD_NUMBER has changed, the
build will force an update to the local Maven repository. This helps
to prevent build failure even on a fresh Git clone due to stale local
Maven repository.

Testing:
- Manually tested by running buildall.sh with different CDH_BUILD_NUMBER

Change-Id: Ib0ad9c2258663d3bd7470e6df921041d1ca0c0be
Reviewed-on: http://gerrit.cloudera.org:8080/11099
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d5ada970
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d5ada970
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d5ada970

Branch: refs/heads/master
Commit: d5ada970c66ddb22aae5fc7282384e62b729031e
Parents: 479c83c
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Tue Jul 31 20:06:29 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Aug 3 08:23:57 2018 +0000

----------------------------------------------------------------------
 .gitignore           |  1 +
 README.md            |  3 ++-
 bin/impala-config.sh |  2 +-
 buildall.sh          | 12 ++++++++++++
 4 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d5ada970/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 8d8e4e4..a39b39e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ org.eclipse.jdt.ui.prefs
 load-*-generated.sql
 bin/version.info
 bin/impala-config-local.sh
+.cdh
 
 # distcc options
 .impala_compiler_opts

http://git-wip-us.apache.org/repos/asf/impala/blob/d5ada970/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index c0565ab..0d700d7 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,8 @@ can do so through the environment variables and scripts listed below.
 | IMPALA_HOME          |               | Top level Impala directory |
 | IMPALA_TOOLCHAIN     | "${IMPALA_HOME}/toolchain" | Native toolchain directory (for compilers, libraries, etc.) |
 | SKIP_TOOLCHAIN_BOOTSTRAP | "false" | Skips downloading the toolchain any python dependencies if "true" |
-| CDH_COMPONENTS_HOME | "${IMPALA_HOME}/toolchain/cdh_components" OR "${IMPALA_HOME}/thirdparty" (if detected) | If a thirdparty directory is present, components found here will override anything in IMPALA_TOOLCHAIN. |
+| CDH_BUILD_NUMBER | | Identifier to indicate the CDH build number
+| CDH_COMPONENTS_HOME | "${IMPALA_HOME}/toolchain/cdh_components-${CDH_BUILD_NUMBER}" OR "${IMPALA_HOME}/thirdparty" (if detected) | If a thirdparty directory is present, components found here will override anything in IMPALA_TOOLCHAIN. |
 | CDH_MAJOR_VERSION | "5" | Identifier used to uniqueify paths for potentially incompatible component builds. |
 | IMPALA_CONFIG_SOURCED | "1" |  Set by ${IMPALA_HOME}/bin/impala-config.sh (internal use) |
 | JAVA_HOME | "/usr/lib/jvm/${JAVA_VERSION}" | Used to locate Java |

http://git-wip-us.apache.org/repos/asf/impala/blob/d5ada970/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index c627a0e..fe656ac 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -475,7 +475,7 @@ export PATH="$IMPALA_HOME/bin:$IMPALA_TOOLCHAIN/cmake-$IMPALA_CMAKE_VERSION/bin/
 
 # The directory in which all the thirdparty CDH components live.
 if [ "${DOWNLOAD_CDH_COMPONENTS}" = true ]; then
-  export CDH_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdh_components"
+  export CDH_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdh_components-$CDH_BUILD_NUMBER"
 else
   export CDH_COMPONENTS_HOME="$IMPALA_HOME/thirdparty"
 fi

http://git-wip-us.apache.org/repos/asf/impala/blob/d5ada970/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index a86581a..59e27ee 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -465,6 +465,18 @@ create_log_dirs
 
 bootstrap_dependencies
 
+# Create .cdh file that contains the CDH_BUILD_NUMBER. If the content
+# of the file is different than the one in the environment variable,
+# append -U into IMPALA_MAVEN_OPTION to force Maven to update its local
+# cache.
+CDH_FILE="${IMPALA_HOME}/.cdh"
+if [[ -f ${CDH_FILE} ]]; then
+  if [[ $(cat ${CDH_FILE}) != ${CDH_BUILD_NUMBER} ]]; then
+    export IMPALA_MAVEN_OPTIONS="${IMPALA_MAVEN_OPTIONS} -U"
+  fi
+fi
+echo "${CDH_BUILD_NUMBER}" > ${CDH_FILE}
+
 if [[ "$BUILD_FE_ONLY" -eq 1 ]]; then
   build_fe
   exit 0


[3/7] impala git commit: IMPALA-3330: [DOCS] TRANSLATE function updated

Posted by mi...@apache.org.
IMPALA-3330: [DOCS] TRANSLATE function updated

- Better description
- Examples

Change-Id: Ica33ecbb7118e3034f95c5705eed19d169dc16cb
Reviewed-on: http://gerrit.cloudera.org:8080/11074
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Jim Apple <jb...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d0ec011b
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d0ec011b
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d0ec011b

Branch: refs/heads/master
Commit: d0ec011b3d50e968cf6c4074e15c0b887d962ab3
Parents: fb3d47d
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Fri Jul 27 15:21:08 2018 -0700
Committer: Alex Rodoni <ar...@cloudera.com>
Committed: Thu Aug 2 22:18:05 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_string_functions.xml | 34 +++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d0ec011b/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml
index 73fa236..ed5d028 100644
--- a/docs/topics/impala_string_functions.xml
+++ b/docs/topics/impala_string_functions.xml
@@ -1272,11 +1272,39 @@ select split_part('one\|/two\|/three','\|/',3);
         </dt>
 
         <dd>
-          <indexterm audience="hidden">translate() function</indexterm>
-          <b>Purpose:</b> Returns the input string with a set of characters replaced by another set of characters.
+          <b>Purpose:</b> Returns the <codeph>input</codeph> string with each
+          character in the <codeph>from</codeph> argument replaced with the
+          corresponding character in the <codeph>to</codeph> argument. The
+          characters are matched in the order they appear in
+            <codeph>from</codeph> and <codeph>to</codeph>. <p> For example:
+              <codeph>translate ('hello world','world','earth')</codeph> returns
+              <codeph>'hetta earth'</codeph>. </p>
           <p>
-            <b>Return type:</b> <codeph>string</codeph>
+            <b>Return type:</b>
+            <codeph>string</codeph>
           </p>
+          <p>
+            <b>Usage notes:</b>
+          </p>
+          <p> If <codeph>from</codeph> contains more characters than
+              <codeph>to</codeph>, the <codeph>from</codeph> characters that are
+            beyond the length of <codeph>to</codeph> are removed in the result. </p>
+          <p> For example: </p>
+          <p>
+            <codeph>translate('abcdedg', 'bcd', '1')</codeph> returns
+              <codeph>'a1eg'</codeph>. </p>
+          <p><codeph>translate('Unit Number#2', '# ', '_')</codeph> returns
+              <codeph>'UnitNumber_2'</codeph>. </p>
+          <p> If <codeph>from</codeph> is <codeph>NULL</codeph>, the function
+            returns <codeph>NULL</codeph>. </p>
+          <p> If <codeph>to</codeph> contains more characters than
+              <codeph>from</codeph>, the extra characters in <codeph>to</codeph>
+            are ignored. </p>
+          <p> If <codeph>from</codeph> contains duplicate characters, the
+            duplicate character is replaced with the first matching character in
+              <codeph>to</codeph>. </p>
+          <p> For example: <codeph>translate ('hello','ll','67')</codeph>
+            returns <codeph>'he66o'</codeph>. </p>
         </dd>
 
       </dlentry>


[4/7] impala git commit: [DOCS] Added the part 1 of IMPALA-5607 to the upgrade guide

Posted by mi...@apache.org.
[DOCS] Added the part 1 of IMPALA-5607 to the upgrade guide

Change-Id: Ib0e53959bef4c629a31868e16b03b6abc11c9f8d
Reviewed-on: http://gerrit.cloudera.org:8080/11051
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/479c83cf
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/479c83cf
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/479c83cf

Branch: refs/heads/master
Commit: 479c83cf72f82b86fc08bde93a7910bf456dcae1
Parents: d0ec011
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Wed Jul 25 13:44:14 2018 -0700
Committer: Alex Rodoni <ar...@cloudera.com>
Committed: Fri Aug 3 02:38:27 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_upgrading.xml | 344 ++++++++++++++++++++++++----------
 1 file changed, 245 insertions(+), 99 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/479c83cf/docs/topics/impala_upgrading.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_upgrading.xml b/docs/topics/impala_upgrading.xml
index 7bfc0a4..679b77d 100644
--- a/docs/topics/impala_upgrading.xml
+++ b/docs/topics/impala_upgrading.xml
@@ -21,6 +21,7 @@ under the License.
 <concept id="upgrading">
 
   <title>Upgrading Impala</title>
+
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -32,11 +33,11 @@ under the License.
   <conbody>
 
     <p>
-      Upgrading Impala involves building or acquiring new Impala-related binaries,
-      and then restarting Impala services.
+      Upgrading Impala involves building or acquiring new Impala-related binaries, and then
+      restarting Impala services.
     </p>
 
-    </conbody>
+  </conbody>
 
   <concept id="upgrade_manual">
 
@@ -47,8 +48,7 @@ under the License.
       <ul>
         <li>
           <p>
-            Shut down all Impala-related daemons on
-            all relevant hosts in the cluster:
+            Shut down all Impala-related daemons on all relevant hosts in the cluster:
           </p>
           <ol>
             <li>
@@ -67,47 +67,52 @@ under the License.
             </li>
           </ol>
         </li>
+
         <li>
           <p>
-            Follow the build procedure in the
-            <filepath>README.md</filepath> file
-            to produce new Impala binaries.
+            Follow the build procedure in the <filepath>README.md</filepath> file to produce new
+            Impala binaries.
           </p>
         </li>
+
         <li>
           <p>
-            Replace the binaries for all Impala-related
-            daemons on all relevant hosts in the cluster.
+            Replace the binaries for all Impala-related daemons on all relevant hosts in the
+            cluster.
           </p>
         </li>
+
         <li>
-          Check if there are new recommended or required configuration settings to put into place in the
-          configuration files, typically under <filepath>/etc/impala/conf</filepath>. See
-          <xref href="impala_config_performance.xml#config_performance"/> for settings related to performance and
-          scalability.
+          Check if there are new recommended or required configuration settings to put into
+          place in the configuration files, typically under
+          <filepath>/etc/impala/conf</filepath>. See
+          <xref href="impala_config_performance.xml#config_performance"/> for settings related
+          to performance and scalability.
         </li>
+
         <li>
           <p>
-            Restart all Impala-related daemons on
-            all relevant hosts in the cluster:
+            Restart all Impala-related daemons on all relevant hosts in the cluster:
           </p>
           <ol>
             <li>
-              Restart the Impala state store service on the desired nodes in your cluster. Expect to see a process
-              named <codeph>statestored</codeph> if the service started successfully.
+              Restart the Impala state store service on the desired nodes in your cluster.
+              Expect to see a process named <codeph>statestored</codeph> if the service started
+              successfully.
 <codeblock>$ sudo service impala-state-store start
 $ ps ax | grep [s]tatestored
  6819 ?        Sl     0:07 /usr/lib/impala/sbin/statestored -log_dir=/var/log/impala -state_store_port=24000
 </codeblock>
               <p>
-                Restart the state store service <i>before</i> the Impala server service to avoid <q>Not
-                connected</q> errors when you run <codeph>impala-shell</codeph>.
+                Restart the state store service <i>before</i> the Impala server service to avoid
+                <q>Not connected</q> errors when you run <codeph>impala-shell</codeph>.
               </p>
             </li>
 
             <li rev="1.2">
-              Restart the Impala catalog service on whichever host it runs on in your cluster. Expect to see a
-              process named <codeph>catalogd</codeph> if the service started successfully.
+              Restart the Impala catalog service on whichever host it runs on in your cluster.
+              Expect to see a process named <codeph>catalogd</codeph> if the service started
+              successfully.
 <codeblock>$ sudo service impala-catalog restart
 $ ps ax | grep [c]atalogd
  6068 ?        Sl     4:06 /usr/lib/impala/sbin/catalogd
@@ -115,8 +120,8 @@ $ ps ax | grep [c]atalogd
             </li>
 
             <li>
-              Restart the Impala daemon service on each node in your cluster. Expect to see a process named
-              <codeph>impalad</codeph> if the service started successfully.
+              Restart the Impala daemon service on each node in your cluster. Expect to see a
+              process named <codeph>impalad</codeph> if the service started successfully.
 <codeblock>$ sudo service impala-server start
 $ ps ax | grep [i]mpalad
  7936 ?        Sl     0:12 /usr/lib/impala/sbin/impalad -log_dir=/var/log/impala -state_store_port=24000
@@ -129,192 +134,333 @@ $ ps ax | grep [i]mpalad
 
       <note>
         <p>
-          If the services did not start successfully (even though the <codeph>sudo service</codeph> command might
-          display <codeph>[OK]</codeph>), check for errors in the Impala log file, typically in
-          <filepath>/var/log/impala</filepath>.
+          If the services did not start successfully (even though the <codeph>sudo
+          service</codeph> command might display <codeph>[OK]</codeph>), check for errors in the
+          Impala log file, typically in <filepath>/var/log/impala</filepath>.
         </p>
       </note>
+
     </conbody>
+
   </concept>
 
   <concept id="concept_a2p_szq_jdb">
+
     <title>Impala Upgrade Considerations</title>
+
     <concept id="IMPALA-3916">
+
       <title>List of Reserved Words Updated in <keyword keyref="impala30_full"
         /></title>
+
       <conbody>
+
         <p>
-          The list of <keyword keyref="reserved_words">reserved
-            words</keyword> in Impala was updated in <keyword
-            keyref="impala30_full"/>. If you need to use a reserved word as an
-          identifier, e.g. a table name, enclose the word in back-ticks.
+          The list of <keyword keyref="reserved_words">reserved words</keyword> in Impala was
+          updated in <keyword
+            keyref="impala30_full"/>. If you need to use a
+          reserved word as an identifier, e.g. a table name, enclose the word in back-ticks.
         </p>
 
         <p>
-          If you need to use the reserved words from previous versions of
-          Impala, set the <codeph>impalad</codeph> and <codeph>catalogd</codeph>
-          startup flag. Note that this startup option will be deprecated in a
-          future release.
+          If you need to use the reserved words from previous versions of Impala, set the
+          <codeph>impalad</codeph> and <codeph>catalogd</codeph> startup flag. Note that this
+          startup option will be deprecated in a future release.
 <codeblock>--reserved_words_version=2.11.0</codeblock>
         </p>
+
       </conbody>
+
     </concept>
 
     <concept id="IMPALA-4924">
+
       <title>Decimal V2 Used by Default in <keyword keyref="impala30_full"/></title>
+
       <conbody>
+
         <p>
-          In Impala, two different implementations of <codeph>DECIMAL</codeph>
-          types are supported. Starting in <keyword keyref="impala30_full"/>,
-            <codeph>DECIMAL</codeph> V2 is used by default. See <keyword
-            keyref="decimal">DECIMAL Type</keyword> for detail information.
+          In Impala, two different implementations of <codeph>DECIMAL</codeph> types are
+          supported. Starting in <keyword keyref="impala30_full"/>, <codeph>DECIMAL</codeph> V2
+          is used by default. See <keyword
+            keyref="decimal">DECIMAL Type</keyword>
+          for detail information.
         </p>
 
         <p>
-          If you need to continue using the first version of the
-            <codeph>DECIMAL</codeph> type for the backward compatibility of your
-          queries, set the <codeph>DECIMAL_V2</codeph> query option to
-            <codeph>FALSE</codeph>:
+          If you need to continue using the first version of the <codeph>DECIMAL</codeph> type
+          for the backward compatibility of your queries, set the <codeph>DECIMAL_V2</codeph>
+          query option to <codeph>FALSE</codeph>:
 <codeblock>SET DECIMAL_V2=FALSE;</codeblock>
         </p>
+
       </conbody>
+
     </concept>
+
     <concept id="IMPALA-5191">
+
       <title>Behavior of Column Aliases Changed in <keyword
           keyref="impala30_full"/></title>
+
       <conbody>
+
         <p>
-          To conform to the SQL standard, Impala no longer performs alias
-          substitution in the subexpressions of <codeph>GROUP BY</codeph>,
-            <codeph>HAVING</codeph>, and <codeph>ORDER BY</codeph>. See <keyword
-            keyref="aliases"/> for examples of supported and unsupported aliases
-          syntax.
+          To conform to the SQL standard, Impala no longer performs alias substitution in the
+          subexpressions of <codeph>GROUP BY</codeph>, <codeph>HAVING</codeph>, and
+          <codeph>ORDER BY</codeph>. See <keyword
+            keyref="aliases"/> for examples of
+          supported and unsupported aliases syntax.
         </p>
+
       </conbody>
+
     </concept>
 
     <concept id="IMPALA-5037">
+
       <title>Default PARQUET_ARRAY_RESOLUTION Changed in <keyword
           keyref="impala30_full"/></title>
+
       <conbody>
+
         <p>
-          The default value for the <codeph>PARQUET_ARRAY_RESOLUTION</codeph>
-          was changed to <codeph>THREE_LEVEL</codeph> in <keyword
-            keyref="impala30_full"/>, to match the Parquet standard 3-level
-          encoding.
+          The default value for the <codeph>PARQUET_ARRAY_RESOLUTION</codeph> was changed to
+          <codeph>THREE_LEVEL</codeph> in <keyword
+            keyref="impala30_full"/>, to
+          match the Parquet standard 3-level encoding.
         </p>
 
         <p>
-          See <keyword keyref="parquet_array_resolution"/> for the information
-          about the query option.
+          See <keyword keyref="parquet_array_resolution"/> for the information about the query
+          option.
         </p>
+
       </conbody>
+
     </concept>
+
     <concept id="IMPALA-5293">
+
       <title>Enable Clustering Hint for Inserts</title>
+
       <conbody>
+
         <p>
-          In <keyword keyref="impala30_full"/>, the <keyword keyref="hints"
-            >clustered</keyword> hint is enabled by default. The hint adds a
-          local sort by the partitioning columns to a query plan. </p>
-        <p> The <codeph>clustered</codeph> hint is only effective for HDFS and
-          Kudu tables.
+          In <keyword keyref="impala30_full"/>, the
+          <keyword keyref="hints"
+            >clustered</keyword> hint is enabled by default.
+          The hint adds a local sort by the partitioning columns to a query plan.
         </p>
 
         <p>
-          As in previous versions, the <codeph>noclustered</codeph> hint
-          prevents clustering. If a table has ordering columns defined, the
-            <codeph>noclustered</codeph> hint is ignored with a warning.
+          The <codeph>clustered</codeph> hint is only effective for HDFS and Kudu tables.
         </p>
+
+        <p>
+          As in previous versions, the <codeph>noclustered</codeph> hint prevents clustering. If
+          a table has ordering columns defined, the <codeph>noclustered</codeph> hint is ignored
+          with a warning.
+        </p>
+
       </conbody>
+
     </concept>
 
     <concept id="IMPALA-4319">
+
       <title>Deprecated Query Options Removed in <keyword keyref="impala30_full"
         /></title>
+
       <conbody>
-        <p> The following query options have been deprecated for several
-          releases and removed: <ul>
-            <li><codeph>DEFAULT_ORDER_BY_LIMIT</codeph></li>
-            <li><codeph>ABORT_ON_DEFAULT_LIMIT_EXCEEDED</codeph></li>
-            <li><codeph>V_CPU_CORES</codeph></li>
-            <li><codeph>RESERVATION_REQUEST_TIMEOUT</codeph></li>
-            <li><codeph>RM_INITIAL_MEM</codeph></li>
-            <li><codeph>SCAN_NODE_CODEGEN_THRESHOLD</codeph></li>
-            <li><codeph>MAX_IO_BUFFERS</codeph></li>
-            <li><codeph>RM_INITIAL_MEM</codeph></li>
-            <li><codeph>DISABLE_CACHED_READS</codeph></li>
+
+        <p>
+          The following query options have been deprecated for several releases and removed:
+          <ul>
+            <li>
+              <codeph>DEFAULT_ORDER_BY_LIMIT</codeph>
+            </li>
+
+            <li>
+              <codeph>ABORT_ON_DEFAULT_LIMIT_EXCEEDED</codeph>
+            </li>
+
+            <li>
+              <codeph>V_CPU_CORES</codeph>
+            </li>
+
+            <li>
+              <codeph>RESERVATION_REQUEST_TIMEOUT</codeph>
+            </li>
+
+            <li>
+              <codeph>RM_INITIAL_MEM</codeph>
+            </li>
+
+            <li>
+              <codeph>SCAN_NODE_CODEGEN_THRESHOLD</codeph>
+            </li>
+
+            <li>
+              <codeph>MAX_IO_BUFFERS</codeph>
+            </li>
+
+            <li>
+              <codeph>RM_INITIAL_MEM</codeph>
+            </li>
+
+            <li>
+              <codeph>DISABLE_CACHED_READS</codeph>
+            </li>
           </ul>
         </p>
+
       </conbody>
+
     </concept>
 
     <concept id="impala-6648">
+
       <title>Fine-grained Privileges Added in <keyword keyref="impala30_full"
         /></title>
+
       <conbody>
+
         <p>
-          Starting in <keyword keyref="impala30_full"/>, finer grained
-          privileges are enforced, such as the <codeph>REFRESH</codeph>,
-            <codeph>CREATE</codeph>, <codeph>DROP</codeph>, and
-            <codeph>ALTER</codeph> privileges. In particular, running
-            <codeph>REFRESH</codeph> or <codeph>INVALIDATE METADATA</codeph> now
-          requires the new <codeph>REFRESH</codeph> privilege. Users who did not
-          previously have the <codeph>ALL</codeph> privilege will no longer be
-          able to run <codeph>REFRESH</codeph> or <codeph>INVALIDATE
-            METADATA</codeph> after an upgrade. Those users need to have the
-            <codeph>REFRESH</codeph> or <codeph>ALL</codeph> privilege granted
-          to run <codeph>REFRESH</codeph> or <codeph>INVALIDATE
-            METADATA</codeph>.
+          Starting in <keyword keyref="impala30_full"/>, finer grained privileges are enforced,
+          such as the <codeph>REFRESH</codeph>, <codeph>CREATE</codeph>, <codeph>DROP</codeph>,
+          and <codeph>ALTER</codeph> privileges. In particular, running <codeph>REFRESH</codeph>
+          or <codeph>INVALIDATE METADATA</codeph> now requires the new <codeph>REFRESH</codeph>
+          privilege. Users who did not previously have the <codeph>ALL</codeph> privilege will
+          no longer be able to run <codeph>REFRESH</codeph> or <codeph>INVALIDATE
+          METADATA</codeph> after an upgrade. Those users need to have the
+          <codeph>REFRESH</codeph> or <codeph>ALL</codeph> privilege granted to run
+          <codeph>REFRESH</codeph> or <codeph>INVALIDATE METADATA</codeph>.
         </p>
 
         <p>
-          See <keyword keyref="grant"/> for the new privileges, the scope, and
-          other information about the new privileges.
+          See <keyword keyref="grant"/> for the new privileges, the scope, and other information
+          about the new privileges.
         </p>
+
       </conbody>
+
     </concept>
 
     <concept id="IMPALA-3998">
+
       <title>refresh_after_connect Impala Shell Option Removed in <keyword
           keyref="impala30_full"/></title>
+
+      <conbody>
+
+        <p>
+          The deprecated <codeph>refresh_after_connect</codeph> option was removed from Impala
+          Shell in <keyword keyref="impala30_full"/>
+        </p>
+
+      </conbody>
+
+    </concept>
+
+    <concept id="impala-5607">
+
+      <title>Return Type Changed for EXTRACT and DATE_PART Functions in <keyword
+          keyref="impala30_full"/></title>
+
       <conbody>
+
         <p>
-          The deprecated <codeph>refresh_after_connect</codeph> option was
-          removed from Impala Shell in <keyword keyref="impala30_full"/>
+          The following changes were made to the <codeph>EXTRACT</codeph> and
+          <codeph>DATE_PART</codeph> functions:
+          <ul>
+            <li>
+              The output type of the <codeph>EXTRACT</codeph> and <codeph>DATE_PART</codeph>
+              functions was changed to <codeph>BIGINT</codeph>.
+            </li>
+
+            <li>
+              <p>
+                Extracting the millisecond part from a <codeph>TIMESTAMP</codeph> returns the
+                seconds component and the milliseconds component. For example, <codeph>EXTRACT
+                (CAST('2006-05-12 18:27:28.123456789' AS TIMESTAMP), 'MILLISECOND')</codeph>
+                will return <codeph>28123</codeph>.
+              </p>
+            </li>
+          </ul>
         </p>
+
       </conbody>
+
     </concept>
 
     <concept id="concept_mkn_ygr_jdb">
+
       <title>Default Setting Changes</title>
+
       <conbody>
+
         <simpletable frame="all" id="simpletable_x55_ghr_jdb">
+
           <sthead>
+
             <stentry>Release Changed</stentry>
+
             <stentry>Setting</stentry>
+
             <stentry>Default Value</stentry>
+
           </sthead>
+
           <strow>
-            <stentry><keyword keyref="impala212_full"/></stentry>
-            <stentry><codeph>compact_catalog_topic</codeph>
-              <codeph>impalad</codeph> flag</stentry>
-            <stentry><codeph>true</codeph></stentry>
+
+            <stentry><keyword keyref="impala212_full"/>
+
+            </stentry>
+
+            <stentry><codeph>compact_catalog_topic</codeph><codeph>impalad</codeph> flag</stentry>
+
+            <stentry><codeph>true</codeph>
+
+            </stentry>
+
           </strow>
+
           <strow>
-            <stentry><keyword keyref="impala212_full"/></stentry>
-            <stentry><codeph>max_cached_file_handles</codeph>
-              <codeph>impalad</codeph> flag</stentry>
-            <stentry><codeph>20000</codeph></stentry>
+
+            <stentry><keyword keyref="impala212_full"/>
+
+            </stentry>
+
+            <stentry><codeph>max_cached_file_handles</codeph><codeph>impalad</codeph> flag</stentry>
+
+            <stentry><codeph>20000</codeph>
+
+            </stentry>
+
           </strow>
+
           <strow>
-            <stentry><keyword keyref="impala30_full"/></stentry>
+
+            <stentry><keyword keyref="impala30_full"/>
+
+            </stentry>
+
             <stentry><codeph>PARQUET_ARRAY_RESOLUTION</codeph> query
               option</stentry>
-            <stentry><codeph>THREE_LEVEL</codeph></stentry>
+
+            <stentry><codeph>THREE_LEVEL</codeph>
+
+            </stentry>
+
           </strow>
+
         </simpletable>
+
       </conbody>
+
     </concept>
+
   </concept>
+
 </concept>


[6/7] impala git commit: IMPALA-7362: Add query option to set timezone

Posted by mi...@apache.org.
IMPALA-7362: Add query option to set timezone

This change adds a new query option "timezone" which
defines the timezone used for utc<->local conversions.
The main goal is to simplify testing, but I think that
some users may also find it useful so it is added as a
"general" query option.

Examples:
set timezone=UTC;
set timezone="Europe/Budapest"

The timezones are validated, but as query options are not
sent to the coordinator immediately, the error checking
will only happen when running a query.

Leading/trailing " and 'characters are stripped because the
/ character cannot be entered unquoted in some contexts.

Currently the timezone has effect in the following cases:
-function now()
-conversions between unix time and timestamp if flag
 use_local_tz_for_unix_timestamp_conversions is true
-reading parquet timestamps written by Hive if flag
 convert_legacy_hive_parquet_utc_timestamps is true

In the near future Parquet timestamps's isAdjustedToUTC
property will be supported, which will decide whether
to do utc->local conversion on a per file+column basis.
This conversion will be also affected.

Testing:
- Extended test_local_tz_conversion.py to actually
  test utc<->local conversion. Until now the effect
  of flag use_local_tz_for_unix_timestamp_conversions
  was practically untested.
- Added a shell test to check that the default of the
  query option is the system's timezone.
- Added a shell test to check timezone validation.

Change-Id: I73de86eff096e1c581d3b56a0d9330d686f77272
Reviewed-on: http://gerrit.cloudera.org:8080/11064
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/dc32bf77
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/dc32bf77
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/dc32bf77

Branch: refs/heads/master
Commit: dc32bf770381842eaf3f2c123b7899f4a2dda530
Parents: d5ada97
Author: Csaba Ringhofer <cs...@cloudera.com>
Authored: Thu Jul 26 22:06:39 2018 +0200
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Aug 3 17:45:25 2018 +0000

----------------------------------------------------------------------
 be/src/service/impala-server.cc                 |  4 +-
 be/src/service/query-options.cc                 | 14 +++++
 be/src/service/query-options.h                  |  3 +-
 common/thrift/ImpalaInternalService.thrift      |  7 +++
 common/thrift/ImpalaService.thrift              |  4 ++
 .../QueryTest/local-timestamp-functions.test    | 61 ++++++++++++++++++++
 .../custom_cluster/test_local_tz_conversion.py  | 16 +++--
 tests/shell/test_shell_commandline.py           | 43 ++++++++++++++
 tests/shell/test_shell_interactive.py           | 14 +++++
 9 files changed, 159 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 077f634..89b7e41 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -976,7 +976,8 @@ void ImpalaServer::PrepareQueryContext(
   query_ctx->__set_pid(getpid());
   int64_t now_us = UnixMicros();
   const Timezone& utc_tz = TimezoneDatabase::GetUtcTimezone();
-  string local_tz_name = TimezoneDatabase::LocalZoneName();
+  string local_tz_name = query_ctx->client_request.query_options.timezone;
+  if (local_tz_name.empty()) local_tz_name = TimezoneDatabase::LocalZoneName();
   const Timezone* local_tz = TimezoneDatabase::FindTimezone(local_tz_name);
   if (local_tz != nullptr) {
     LOG(INFO) << "Found local timezone \"" << local_tz_name << "\".";
@@ -1283,6 +1284,7 @@ void ImpalaServer::InitializeConfigVariables() {
   map<string, string> string_map;
   TQueryOptionsToMap(default_query_options_, &string_map);
   string_map["SUPPORT_START_OVER"] = "false";
+  string_map["TIMEZONE"] = TimezoneDatabase::LocalZoneName();
   PopulateQueryOptionLevels(&query_option_levels_);
   map<string, string>::const_iterator itr = string_map.begin();
   for (; itr != string_map.end(); ++itr) {

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 5d61664..e401553 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -22,11 +22,13 @@
 #include "util/mem-info.h"
 #include "util/parse-util.h"
 #include "util/string-parser.h"
+#include "exprs/timezone_db.h"
 #include "gen-cpp/ImpalaInternalService_types.h"
 
 #include <sstream>
 #include <boost/algorithm/string.hpp>
 #include <gutil/strings/substitute.h>
+#include <gutil/strings/strip.h>
 
 #include "common/names.h"
 
@@ -681,6 +683,18 @@ Status impala::SetQueryOption(const string& key, const string& value,
             iequals(value, "true") || iequals(value, "1"));
         break;
       }
+      case TImpalaQueryOptions::TIMEZONE: {
+        // Leading/trailing " and ' characters are stripped because the / character
+        // cannot be entered unquoted in some contexts.
+        string timezone = value;
+        TrimString(&timezone, "'\"");
+        timezone = timezone.empty() ? TimezoneDatabase::LocalZoneName() : timezone;
+        if (TimezoneDatabase::FindTimezone(timezone) == nullptr) {
+          return Status(Substitute("Invalid timezone name '$0'.", timezone));
+        }
+        query_options->__set_timezone(timezone);
+        break;
+      }
       default:
         if (IsRemovedQueryOption(key)) {
           LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/be/src/service/query-options.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 01f6e74..08749b8 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -41,7 +41,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // the DCHECK.
 #define QUERY_OPTS_TABLE\
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
-      TImpalaQueryOptions::ALLOW_ERASURE_CODED_FILES + 1);\
+      TImpalaQueryOptions::TIMEZONE + 1);\
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -139,6 +139,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
   QUERY_OPT_FN(kudu_read_mode, KUDU_READ_MODE, TQueryOptionLevel::ADVANCED)\
   QUERY_OPT_FN(allow_erasure_coded_files, ALLOW_ERASURE_CODED_FILES,\
       TQueryOptionLevel::DEVELOPMENT)\
+  QUERY_OPT_FN(timezone, TIMEZONE, TQueryOptionLevel::REGULAR)\
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query state.

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index bf48aaa..c22b662 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -295,6 +295,9 @@ struct TQueryOptions {
 
   // Allow reading of erasure coded files in HDFS.
   69: optional bool allow_erasure_coded_files = false;
+
+  // See comment in ImpalaService.thrift.
+  70: optional string timezone = ""
 }
 
 // Impala currently has two types of sessions: Beeswax and HiveServer2
@@ -423,6 +426,10 @@ struct TQueryCtx {
   // String containing name of the local timezone.
   // It is guaranteed to be a valid timezone on the coordinator (but not necessarily on
   // the executor, since in theory the executor could have a different timezone db).
+  // TODO(Csaba): adding timezone as a query option made this property redundant. It
+  //   still has an effect if TimezoneDatabase::LocalZoneName() cannot find the
+  //   system's local timezone and falls back to UTC. This logic will be removed in
+  //   IMPALA-7359, which will make this member completely obsolete.
   18: required string local_time_zone
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 665144f..5d8260e 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -327,6 +327,10 @@ enum TImpalaQueryOptions {
 
   // Allow reading of erasure coded files.
   ALLOW_ERASURE_CODED_FILES,
+
+  // The timezone used in UTC<->localtime conversions. The default is the OS's timezone
+  // at the coordinator, which can be overridden by environment variable $TZ.
+  TIMEZONE,
 }
 
 // The summary of a DML statement.

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/testdata/workloads/functional-query/queries/QueryTest/local-timestamp-functions.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/local-timestamp-functions.test b/testdata/workloads/functional-query/queries/QueryTest/local-timestamp-functions.test
new file mode 100644
index 0000000..3ed121d
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/local-timestamp-functions.test
@@ -0,0 +1,61 @@
+# This test should be run with -use_local_tz_for_unix_timestamp_conversions=true
+====
+---- QUERY
+SET timezone=CET;
+SELECT
+from_unixtime(0),
+from_unixtime((40 * 365) * 24 * 60 * 60),
+from_unixtime((180 + 40 * 365) * 24 * 60 * 60);
+---- TYPES
+STRING,STRING,STRING
+---- RESULTS
+'1970-01-01 01:00:00','2009-12-22 01:00:00','2010-06-20 02:00:00'
+====
+---- QUERY
+set timezone="America/Los_Angeles";
+SELECT
+from_unixtime(0),
+from_unixtime((40 * 365) * 24 * 60 * 60),
+from_unixtime((180 + 40 * 365) * 24 * 60 * 60);
+---- TYPES
+STRING,STRING,STRING
+---- RESULTS
+'1969-12-31 16:00:00','2009-12-21 16:00:00','2010-06-19 17:00:00'
+====
+---- QUERY
+SET timezone=CET;
+SELECT
+unix_timestamp('1970-01-01 01:00:00'),
+unix_timestamp('2009-12-22 01:00:00'),
+unix_timestamp('2010-06-20 02:00:00');
+---- TYPES
+BIGINT,BIGINT,BIGINT
+---- RESULTS
+0,1261440000,1276992000
+====
+---- QUERY
+set timezone="America/Los_Angeles";
+SELECT
+unix_timestamp('1969-12-31 16:00:00'),
+unix_timestamp('2009-12-21 16:00:00'),
+unix_timestamp('2010-06-19 17:00:00');
+---- TYPES
+BIGINT,BIGINT,BIGINT
+---- RESULTS
+0,1261440000,1276992000
+====
+---- QUERY
+SET timezone=CET;
+select cast(0 as timestamp);
+---- TYPES
+TIMESTAMP
+---- RESULTS
+1970-01-01 01:00:00
+====
+---- QUERY
+SET timezone="America/Los_Angeles";
+select cast(0 as timestamp);
+---- TYPES
+TIMESTAMP
+---- RESULTS
+1969-12-31 16:00:00

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/tests/custom_cluster/test_local_tz_conversion.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_local_tz_conversion.py b/tests/custom_cluster/test_local_tz_conversion.py
index 54ac277..05c2757 100644
--- a/tests/custom_cluster/test_local_tz_conversion.py
+++ b/tests/custom_cluster/test_local_tz_conversion.py
@@ -49,12 +49,18 @@ class TestLocalTzConversion(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--use_local_tz_for_unix_timestamp_conversions=true")
-  def test_utc_timestamp_functions(self, vector):
-    """Tests for UTC timestamp functions, i.e. functions that do not depend on the
-       behavior of the flag --use_local_tz_for_unix_timestamp_conversions. These tests
-       are also executed in test_exprs.py to ensure the same behavior when running
-       without the gflag set."""
+  def test_timestamp_functions(self, vector):
+    """Tests timestamp functions with --use_local_tz_for_unix_timestamp_conversions=true
+    """
     vector.get_value('exec_option')['enable_expr_rewrites'] = \
         vector.get_value('enable_expr_rewrites')
+
+    # Tests for UTC timestamp functions, i.e. functions that do not depend on the
+    # behavior of the flag --use_local_tz_for_unix_timestamp_conversions. These tests
+    # are also executed in test_exprs.py to ensure the same behavior when running
+    # without the gflag set.
     self.run_test_case('QueryTest/utc-timestamp-functions', vector)
 
+    # Tests for local timestamp functions, i.e. functions that depend on the
+    # behavior of the flag --use_local_tz_for_unix_timestamp_conversions.
+    self.run_test_case('QueryTest/local-timestamp-functions', vector)

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 40453c8..1cc7e57 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -39,6 +39,20 @@ QUERY_FILE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'tests', 'shell')
 RUSSIAN_CHARS = (u"А, Б, В, Г, Д, Е, Ё, Ж, З, И, Й, К, Л, М, Н, О, П, Р,"
                  u"С, Т, У, Ф, Х, Ц,Ч, Ш, Щ, Ъ, Ы, Ь, Э, Ю, Я")
 
+
+def find_query_option(key, string, strip_brackets=True):
+  """
+  Parses 'string' for 'key': value pairs, and returns value. It is assumed
+  that the 'string' contains the pair exactly once.
+
+  If 'strip_brackets' is true, enclosing [] are stripped (this is used to mark
+  query options that have their default value).
+  """
+  pattern = r'^\s*%s: (.*)\s*$' % key
+  values = re.findall(pattern, string, re.MULTILINE)
+  assert len(values) == 1
+  return values[0].strip("[]") if strip_brackets else values[0]
+
 @pytest.fixture
 def empty_table(unique_database, request):
   """Create an empty table within the test database before executing test.
@@ -695,3 +709,32 @@ class TestImpalaShell(ImpalaTestSuite):
               actual_time_s, time_limit_s))
     finally:
       os.remove(sql_path)
+
+  def test_default_timezone(self):
+    """Test that the default TIMEZONE query option is a valid timezone.
+
+       It would be nice to check that the default timezone is the system's timezone,
+       but doing this reliably on different Linux distributions is quite hard.
+    """
+    result_set = run_impala_shell_cmd('-q "set;"')
+    tzname = find_query_option("TIMEZONE", result_set.stdout)
+    assert os.path.isfile("/usr/share/zoneinfo/" + tzname)
+
+  def test_find_query_option(self):
+    """Test utility function find_query_option()."""
+    test_input = """
+        not_an_option
+        default: [default]
+        non_default: non_default
+        has_space: has space
+        duplicate: d
+        duplicate: d
+        empty: """
+    assert find_query_option("default", test_input) == "default"
+    assert find_query_option("non_default", test_input) == "non_default"
+    assert find_query_option("has_space", test_input) == "has space"
+    assert find_query_option("empty", test_input) == ""
+    with pytest.raises(AssertionError):
+      find_query_option("duplicate", test_input)
+    with pytest.raises(AssertionError):
+      find_query_option("not_an_option", test_input)

http://git-wip-us.apache.org/repos/asf/impala/blob/dc32bf77/tests/shell/test_shell_interactive.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index 1d81663..c481a21 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -716,6 +716,20 @@ class TestImpalaShellInteractive(object):
     result = shell.get_result()
     assert "ERROR: AnalysisException: Unmatched string literal" in result.stderr
 
+  def test_timezone_validation(self):
+    """Test that query option TIMEZONE is validated when executing a query.
+
+       Query options are not sent to the coordinator immediately, so the error checking
+       will only happen when running a query.
+    """
+    p = ImpalaShell()
+    p.send_cmd('set timezone=BLA;')
+    p.send_cmd('select 1;')
+    results = p.get_result()
+    assert "Fetched 1 row" not in results.stderr
+    assert "ERROR: Errors parsing query options" in results.stderr
+    assert "Invalid timezone name 'BLA'" in results.stderr
+
 def run_impala_shell_interactive(input_lines, shell_args=None):
   """Runs a command in the Impala shell interactively."""
   # if argument "input_lines" is a string, makes it into a list


[2/7] impala git commit: IMPALA-7347: Update tests to accomodate HIVE-18118

Posted by mi...@apache.org.
IMPALA-7347: Update tests to accomodate HIVE-18118

HIVE-18118 adds 'numFilesErasureCoded' to table properties. This patch
addes it to test_show_create_table to work with the latest Hive.

Change-Id: I6aae402dd38374de90b35c32166a9507e6eb29f9
Reviewed-on: http://gerrit.cloudera.org:8080/11108
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/fb3d47d3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/fb3d47d3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/fb3d47d3

Branch: refs/heads/master
Commit: fb3d47d3567750f728d0784cf30f53ad6b06865b
Parents: 7917eac
Author: Tianyi Wang <ti...@apache.org>
Authored: Mon Jul 30 17:58:27 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Aug 2 20:25:31 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                                              | 2 +-
 .../functional-query/queries/QueryTest/show-create-table.test     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/fb3d47d3/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 65eae3b..c627a0e 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -160,7 +160,7 @@ unset IMPALA_KUDU_URL
 : ${CDH_DOWNLOAD_HOST:=native-toolchain.s3.amazonaws.com}
 export CDH_DOWNLOAD_HOST
 export CDH_MAJOR_VERSION=6
-export CDH_BUILD_NUMBER=479815
+export CDH_BUILD_NUMBER=502571
 export IMPALA_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
 export IMPALA_HBASE_VERSION=2.0.0-cdh6.x-SNAPSHOT
 export IMPALA_HIVE_VERSION=2.1.1-cdh6.x-SNAPSHOT

http://git-wip-us.apache.org/repos/asf/impala/blob/fb3d47d3/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
index db38984..0625b32 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test
@@ -252,6 +252,7 @@ ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
 STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
           OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION '$$location_uri$$'
+TBLPROPERTIES ('numFilesErasureCoded'='0')
 ====
 ---- QUERY
 SHOW CREATE TABLE functional.allcomplextypes
@@ -395,4 +396,4 @@ WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,d:bool_col,d:tinyint_col,d:s
                       'serialization.format'='1')
 TBLPROPERTIES ('hbase.table.name'='functional_hbase.alltypes',
                'storage_handler'='org.apache.hadoop.hive.hbase.HBaseStorageHandler')
-====
\ No newline at end of file
+====


[7/7] impala git commit: IMPALA-7377: Update Sentry for the object ownership feature

Posted by mi...@apache.org.
IMPALA-7377: Update Sentry for the object ownership feature

Update CDH_BUILD_NUMBER to 506967, especially for the new Sentry update
with the object owership feature (IMPALA-7075). This patch updates the
Sentry package names and Sentry Maven dependencies.

Testing:
- Ran core tests

Change-Id: I28671d03cf7785334c333055d8f02c8af5645496
Reviewed-on: http://gerrit.cloudera.org:8080/11094
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2b4d0671
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2b4d0671
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2b4d0671

Branch: refs/heads/master
Commit: 2b4d06710695ecc6b5cc6ae3b1656b3522ba97be
Parents: dc32bf7
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Tue Jul 31 14:52:18 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Aug 3 19:47:54 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                            |  2 +-
 fe/pom.xml                                      | 30 +++++++++++++++++---
 .../apache/impala/util/SentryPolicyService.java |  9 +++---
 .../org/apache/impala/util/SentryProxy.java     |  6 ++--
 .../java/org/apache/impala/util/SentryUtil.java |  9 ++----
 .../impala/analysis/AuthorizationStmtTest.java  |  2 +-
 6 files changed, 38 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2b4d0671/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index fe656ac..2e4782e 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -160,7 +160,7 @@ unset IMPALA_KUDU_URL
 : ${CDH_DOWNLOAD_HOST:=native-toolchain.s3.amazonaws.com}
 export CDH_DOWNLOAD_HOST
 export CDH_MAJOR_VERSION=6
-export CDH_BUILD_NUMBER=502571
+export CDH_BUILD_NUMBER=506967
 export IMPALA_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
 export IMPALA_HBASE_VERSION=2.0.0-cdh6.x-SNAPSHOT
 export IMPALA_HIVE_VERSION=2.1.1-cdh6.x-SNAPSHOT

http://git-wip-us.apache.org/repos/asf/impala/blob/2b4d0671/fe/pom.xml
----------------------------------------------------------------------
diff --git a/fe/pom.xml b/fe/pom.xml
index 70ff9cc..b5f171a 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -132,6 +132,19 @@ under the License.
 
     <dependency>
       <groupId>org.apache.sentry</groupId>
+      <artifactId>sentry-provider-db</artifactId>
+      <version>${sentry.version}</version>
+      <exclusions>
+        <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
+        <exclusion>
+          <groupId>net.minidev</groupId>
+          <artifactId>json-smart</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.sentry</groupId>
       <artifactId>sentry-provider-file</artifactId>
       <version>${sentry.version}</version>
     </dependency>
@@ -160,10 +173,6 @@ under the License.
       <artifactId>sentry-binding-hive</artifactId>
       <version>${sentry.version}</version>
       <exclusions>
-        <exclusion>
-          <groupId>org.apache.sentry</groupId>
-          <artifactId>sentry-provider-db</artifactId>
-        </exclusion>
         <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
         <exclusion>
           <groupId>net.minidev</groupId>
@@ -192,6 +201,19 @@ under the License.
     </dependency>
 
     <dependency>
+      <groupId>org.apache.sentry</groupId>
+      <artifactId>sentry-service-api</artifactId>
+      <version>${sentry.version}</version>
+      <exclusions>
+        <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
+        <exclusion>
+          <groupId>net.minidev</groupId>
+          <artifactId>json-smart</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-hadoop-bundle</artifactId>
       <version>${parquet.version}</version>

http://git-wip-us.apache.org/repos/asf/impala/blob/2b4d0671/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java b/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
index f55ac30..8fc72c9 100644
--- a/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
+++ b/fe/src/main/java/org/apache/impala/util/SentryPolicyService.java
@@ -19,10 +19,10 @@ package org.apache.impala.util;
 
 import java.util.List;
 
-import org.apache.sentry.provider.db.service.thrift.SentryPolicyServiceClient;
-import org.apache.sentry.provider.db.service.thrift.TSentryGrantOption;
-import org.apache.sentry.provider.db.service.thrift.TSentryPrivilege;
-import org.apache.sentry.provider.db.service.thrift.TSentryRole;
+import org.apache.sentry.api.service.thrift.SentryPolicyServiceClient;
+import org.apache.sentry.api.service.thrift.TSentryGrantOption;
+import org.apache.sentry.api.service.thrift.TSentryPrivilege;
+import org.apache.sentry.api.service.thrift.TSentryRole;
 import org.apache.sentry.service.thrift.SentryServiceClientFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -37,7 +37,6 @@ import org.apache.impala.common.InternalException;
 import org.apache.impala.thrift.TPrivilege;
 import org.apache.impala.thrift.TPrivilegeLevel;
 import org.apache.impala.thrift.TPrivilegeScope;
-import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/2b4d0671/fe/src/main/java/org/apache/impala/util/SentryProxy.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/SentryProxy.java b/fe/src/main/java/org/apache/impala/util/SentryProxy.java
index f2df66b..7863923 100644
--- a/fe/src/main/java/org/apache/impala/util/SentryProxy.java
+++ b/fe/src/main/java/org/apache/impala/util/SentryProxy.java
@@ -24,9 +24,9 @@ import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.log4j.Logger;
-import org.apache.sentry.provider.db.service.thrift.TSentryGroup;
-import org.apache.sentry.provider.db.service.thrift.TSentryPrivilege;
-import org.apache.sentry.provider.db.service.thrift.TSentryRole;
+import org.apache.sentry.api.service.thrift.TSentryGroup;
+import org.apache.sentry.api.service.thrift.TSentryPrivilege;
+import org.apache.sentry.api.service.thrift.TSentryRole;
 
 import org.apache.impala.authorization.SentryConfig;
 import org.apache.impala.authorization.User;

http://git-wip-us.apache.org/repos/asf/impala/blob/2b4d0671/fe/src/main/java/org/apache/impala/util/SentryUtil.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/SentryUtil.java b/fe/src/main/java/org/apache/impala/util/SentryUtil.java
index f85e890..c4aaf73 100644
--- a/fe/src/main/java/org/apache/impala/util/SentryUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/SentryUtil.java
@@ -22,12 +22,9 @@ import java.util.Set;
 import org.apache.sentry.core.common.exception.SentryAccessDeniedException;
 import org.apache.sentry.core.common.exception.SentryAlreadyExistsException;
 import org.apache.sentry.core.common.exception.SentryGroupNotFoundException;
-import org.apache.sentry.provider.db.service.thrift.SentryPolicyServiceClient;
-import org.apache.sentry.provider.db.service.thrift.TSentryRole;
-// See IMPALA-5540. Sentry over-shades itself (to avoid leaking Thrift),
-// causing this unusual package name. In the code below, we typically
-// check for either variant when it's available in the classpath.
-import sentry.org.apache.sentry.core.common.exception.SentryUserException;
+import org.apache.sentry.api.service.thrift.SentryPolicyServiceClient;
+import org.apache.sentry.api.service.thrift.TSentryRole;
+import org.apache.sentry.core.common.exception.SentryUserException;
 
 /**
  * Wrapper to facilitate differences in Sentry APIs across Sentry versions.

http://git-wip-us.apache.org/repos/asf/impala/blob/2b4d0671/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java b/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
index 3095ff8..627ec1a 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AuthorizationStmtTest.java
@@ -45,7 +45,7 @@ import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.thrift.TResultRow;
 import org.apache.impala.thrift.TTableName;
 import org.apache.impala.util.SentryPolicyService;
-import org.apache.sentry.provider.db.service.thrift.TSentryRole;
+import org.apache.sentry.api.service.thrift.TSentryRole;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;