You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2018/01/10 18:48:03 UTC
[1/4] impala git commit: IMPALA-6128: Add support for AES-CTR
encryption when spilling to disk
Repository: impala
Updated Branches:
refs/heads/master f810458ca -> 31c6a1719
IMPALA-6128: Add support for AES-CTR encryption when spilling to disk
CFB mode is a stream cipher and is secure when used with a different nonce/IV
for every message. However it can be a performance bottleneck.
CTR mode is also stream cipher and is secure, 4~6x faster than CFB mode in
OpenSSL. AES-CTR+SHA256 is about 40~70% faster than AES-CFB+SHA256.
CTR mode is used if OpenSSL version>=1.0.1 at runtime, otherwise
fall back to using CFB mode.
Testing:
run runtime tmp-file-mgr-test, openssl-util-test, buffer-pool-test and
buffered-tuple-stream-test
The ut case openssl-util-test.EncryptInPlace tests encryption in both modes.
Change-Id: I9debc240615dd8cdbf00ec8730cff62ffef52aff
Reviewed-on: http://gerrit.cloudera.org:8080/8861
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/514dfaf9
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/514dfaf9
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/514dfaf9
Branch: refs/heads/master
Commit: 514dfaf9fdff219256eaa9baf3efcc66bfdfafda
Parents: f810458
Author: Xianda Ke <ke...@gmail.com>
Authored: Sun Nov 26 15:35:22 2017 +0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 05:39:09 2018 +0000
----------------------------------------------------------------------
be/src/runtime/tmp-file-mgr.cc | 4 +--
be/src/util/openssl-util-test.cc | 65 ++++++++++++++++++++---------------
be/src/util/openssl-util.cc | 32 ++++++++++++-----
be/src/util/openssl-util.h | 41 +++++++++++++++++-----
4 files changed, 96 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/514dfaf9/be/src/runtime/tmp-file-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc
index 24217de..650af0b 100644
--- a/be/src/runtime/tmp-file-mgr.cc
+++ b/be/src/runtime/tmp-file-mgr.cc
@@ -612,8 +612,8 @@ void TmpFileMgr::WriteHandle::WaitForWrite() {
Status TmpFileMgr::WriteHandle::EncryptAndHash(MemRange buffer) {
DCHECK(FLAGS_disk_spill_encryption);
SCOPED_TIMER(encryption_timer_);
- // Since we're using AES-CFB mode, we must take care not to reuse a key/IV pair.
- // Regenerate a new key and IV for every data buffer we write.
+ // Since we're using AES-CTR/AES-CFB mode, we must take care not to reuse a
+ // key/IV pair. Regenerate a new key and IV for every data buffer we write.
key_.InitializeRandom();
RETURN_IF_ERROR(key_.Encrypt(buffer.data(), buffer.len(), buffer.data()));
hash_.Compute(buffer.data(), buffer.len());
http://git-wip-us.apache.org/repos/asf/impala/blob/514dfaf9/be/src/util/openssl-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc
index ef1b28e..8d98b0d 100644
--- a/be/src/util/openssl-util-test.cc
+++ b/be/src/util/openssl-util-test.cc
@@ -56,24 +56,29 @@ TEST_F(OpenSSLUtilTest, Encryption) {
vector<uint8_t> decrypted(buffer_size);
GenerateRandomData(original.data(), buffer_size);
- // Iterate multiple times to ensure that key regeneration works correctly.
- EncryptionKey key;
- for (int i = 0; i < 2; ++i) {
- key.InitializeRandom(); // Generate a new key for each iteration.
-
- // Check that OpenSSL is happy with the amount of entropy we're feeding it.
- DCHECK_EQ(1, RAND_status());
-
- ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data()));
- if (i > 0) {
- // Check that we're not somehow reusing the same key.
- ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size));
+ // Check both CTR & CFB
+ AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
+ for (auto m : modes) {
+ // Iterate multiple times to ensure that key regeneration works correctly.
+ EncryptionKey key;
+ for (int i = 0; i < 2; ++i) {
+ key.InitializeRandom(); // Generate a new key for each iteration.
+ key.SetCipherMode(m);
+
+ // Check that OpenSSL is happy with the amount of entropy we're feeding it.
+ DCHECK_EQ(1, RAND_status());
+
+ ASSERT_OK(key.Encrypt(original.data(), buffer_size, encrypted.data()));
+ if (i > 0) {
+ // Check that we're not somehow reusing the same key.
+ ASSERT_NE(0, memcmp(encrypted.data(), prev_encrypted.data(), buffer_size));
+ }
+ memcpy(prev_encrypted.data(), encrypted.data(), buffer_size);
+
+ // We should get the original data by decrypting it.
+ ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data()));
+ ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size));
}
- memcpy(prev_encrypted.data(), encrypted.data(), buffer_size);
-
- // We should get the original data by decrypting it.
- ASSERT_OK(key.Decrypt(encrypted.data(), buffer_size, decrypted.data()));
- ASSERT_EQ(0, memcmp(original.data(), decrypted.data(), buffer_size));
}
}
@@ -83,17 +88,23 @@ TEST_F(OpenSSLUtilTest, EncryptInPlace) {
vector<uint8_t> original(buffer_size);
vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption.
- GenerateRandomData(original.data(), buffer_size);
- memcpy(scratch.data(), original.data(), buffer_size);
-
EncryptionKey key;
- key.InitializeRandom();
- ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
- // Check that encryption did something
- ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
- ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
- // Check that we get the original data back.
- ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
+ // Check both CTR & CFB
+ AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
+ for (auto m : modes) {
+ GenerateRandomData(original.data(), buffer_size);
+ memcpy(scratch.data(), original.data(), buffer_size);
+
+ key.InitializeRandom();
+ key.SetCipherMode(m);
+
+ ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
+ // Check that encryption did something
+ ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
+ ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
+ // Check that we get the original data back.
+ ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
+ }
}
/// Test that encryption works with buffer lengths that don't fit in a 32-bit integer.
http://git-wip-us.apache.org/repos/asf/impala/blob/514dfaf9/be/src/util/openssl-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util.cc b/be/src/util/openssl-util.cc
index e3b2299..a8ec976 100644
--- a/be/src/util/openssl-util.cc
+++ b/be/src/util/openssl-util.cc
@@ -26,6 +26,7 @@
#include <openssl/sha.h>
#include "common/atomic.h"
+#include "gutil/port.h" // ATTRIBUTE_WEAK
#include "gutil/strings/substitute.h"
#include "common/names.h"
@@ -99,13 +100,15 @@ Status EncryptionKey::EncryptInternal(
EVP_CIPHER_CTX_init(&ctx);
EVP_CIPHER_CTX_set_padding(&ctx, 0);
- int success;
-
// Start encryption/decryption. We use a 256-bit AES key, and the cipher block mode
- // is CFB because this gives us a stream cipher, which supports arbitrary
- // length ciphertexts - it doesn't have to be a multiple of 16 bytes.
- success = encrypt ? EVP_EncryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_) :
- EVP_DecryptInit_ex(&ctx, EVP_aes_256_cfb(), NULL, key_, iv_);
+ // is either CTR or CFB(stream cipher), both of which support arbitrary length
+ // ciphertexts - it doesn't have to be a multiple of 16 bytes. Additionally, CTR
+ // mode is well-optimized(instruction level parallelism) with hardware acceleration
+ // on x86 and PowerPC
+ const EVP_CIPHER* evpCipher = GetCipher();
+ int success = encrypt ? EVP_EncryptInit_ex(&ctx, evpCipher, NULL, key_, iv_) :
+ EVP_DecryptInit_ex(&ctx, evpCipher, NULL, key_, iv_);
+
if (success != 1) {
return OpenSSLErr(encrypt ? "EVP_EncryptInit_ex" : "EVP_DecryptInit_ex");
}
@@ -122,7 +125,7 @@ Status EncryptionKey::EncryptInternal(
if (success != 1) {
return OpenSSLErr(encrypt ? "EVP_EncryptUpdate" : "EVP_DecryptUpdate");
}
- // This is safe because we're using CFB mode without padding.
+ // This is safe because we're using CTR/CFB mode without padding.
DCHECK_EQ(in_len, out_len);
offset += in_len;
}
@@ -134,8 +137,21 @@ Status EncryptionKey::EncryptInternal(
if (success != 1) {
return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal");
}
- // Again safe due to CFB with no padding
+ // Again safe due to CTR/CFB with no padding
DCHECK_EQ(final_out_len, 0);
return Status::OK();
}
+
+extern "C" {
+ATTRIBUTE_WEAK
+const EVP_CIPHER* EVP_aes_256_ctr();
+}
+
+const EVP_CIPHER* EncryptionKey::GetCipher() const {
+ // use weak symbol to avoid compiling error on OpenSSL 1.0.0 environment
+ if (mode_ == AES_256_CTR && EVP_aes_256_ctr) return EVP_aes_256_ctr();
+
+ // otherwise, fallback to CFB mode
+ return EVP_aes_256_cfb();
+}
}
http://git-wip-us.apache.org/repos/asf/impala/blob/514dfaf9/be/src/util/openssl-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util.h b/be/src/util/openssl-util.h
index 4b32db6..22f8235 100644
--- a/be/src/util/openssl-util.h
+++ b/be/src/util/openssl-util.h
@@ -19,16 +19,25 @@
#define IMPALA_UTIL_OPENSSL_UTIL_H
#include <openssl/aes.h>
+#include <openssl/evp.h>
#include <openssl/sha.h>
#include "common/status.h"
namespace impala {
+#define OPENSSL_VERSION_1_0_1 0x1000100L
+
/// Add entropy from the system RNG to OpenSSL's global RNG. Called at system startup
/// and again periodically to add new entropy.
void SeedOpenSSLRNG();
+enum AES_CIPHER_MODE {
+ AES_256_CTR,
+ AES_256_CFB,
+ AES_256_GCM // not supported now.
+};
+
/// The hash of a data buffer used for checking integrity. A SHA256 hash is used
/// internally.
class IntegrityHash {
@@ -47,20 +56,23 @@ class IntegrityHash {
/// The key and initialization vector (IV) required to encrypt and decrypt a buffer of
/// data. This should be regenerated for each buffer of data.
///
-/// We use AES with a 256-bit key and CFB cipher block mode, which gives us a stream
-/// cipher that can support arbitrary-length ciphertexts. The IV is used as an input to
-/// the cipher as the "block to supply before the first block of plaintext". This is
-/// required because all ciphers (except the weak ECB) are built such that each block
-/// depends on the output from the previous block. Since the first block doesn't have
-/// a previous block, we supply this IV. Think of it as starting off the chain of
+/// We use AES with a 256-bit key and CTR/CFB cipher block mode, which gives us a stream
+/// cipher that can support arbitrary-length ciphertexts. If OpenSSL version at runtime
+/// is 1.0.1 or above, CTR mode is used, otherwise CFB mode is used. The IV is used as
+/// an input to the cipher as the "block to supply before the first block of plaintext".
+/// This is required because all ciphers (except the weak ECB) are built such that each
+/// block depends on the output from the previous block. Since the first block doesn't
+/// have a previous block, we supply this IV. Think of it as starting off the chain of
/// encryption.
class EncryptionKey {
public:
- EncryptionKey() : initialized_(false) {}
+ EncryptionKey() : initialized_(false) {
+ mode_ = SSLeay() < OPENSSL_VERSION_1_0_1 ? AES_256_CFB : AES_256_CTR;
+ }
/// Initialize a key for temporary use with randomly generated data. Reinitializes with
- /// new random values if the key was already initialized. We use AES-CFB mode so key/IV
- /// pairs should not be reused. This function automatically reseeds the RNG
+ /// new random values if the key was already initialized. We use AES-CTR/AES-CFB mode
+ /// so key/IV pairs should not be reused. This function automatically reseeds the RNG
/// periodically, so callers do not need to do it.
void InitializeRandom();
@@ -75,6 +87,11 @@ class EncryptionKey {
/// otherwise the buffers must not overlap.
Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const WARN_UNUSED_RESULT;
+ /// Specify a cipher mode. Currently used only for testing but maybe in future we
+ /// can provide a configuration option for the end user who can choose a preferred
+ /// mode(GCM, CTR, CFB...) based on their software/hardware environment.
+ void SetCipherMode(AES_CIPHER_MODE m) { mode_ = m; }
+
private:
/// Helper method that encrypts/decrypts if 'encrypt' is true/false respectively.
/// A buffer of input data 'data' of length 'len' is encrypted/decrypted with this
@@ -88,11 +105,17 @@ class EncryptionKey {
/// uninitialized keys.
bool initialized_;
+ /// return a EVP_CIPHER according to cipher mode at runtime
+ const EVP_CIPHER* GetCipher() const;
+
/// An AES 256-bit key.
uint8_t key_[32];
/// An initialization vector to feed as the first block to AES.
uint8_t iv_[AES_BLOCK_SIZE];
+
+ /// Cipher Mode
+ AES_CIPHER_MODE mode_;
};
}
[3/4] impala git commit: IMPALA-5317: [DOCS] Doc for DATE_TRUNC()
function
Posted by jr...@apache.org.
IMPALA-5317: [DOCS] Doc for DATE_TRUNC() function
Change-Id: Ifcf38903bb10db12cbb8d73a2dc875aef29cd359
Reviewed-on: http://gerrit.cloudera.org:8080/8768
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1f4d687a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1f4d687a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1f4d687a
Branch: refs/heads/master
Commit: 1f4d687a9bd51a5c869dd806fb31449cdfb34180
Parents: c86b0a9
Author: John Russell <jr...@cloudera.com>
Authored: Tue Dec 5 14:19:22 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 18:41:31 2018 +0000
----------------------------------------------------------------------
docs/impala_keydefs.ditamap | 4 ++
docs/shared/impala_common.xml | 5 +-
docs/topics/impala_datetime_functions.xml | 96 ++++++++++++++++++++++++++
3 files changed, 104 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/1f4d687a/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 56de937..02cff8a 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10516,6 +10516,7 @@ under the License.
<keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/>
<!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. -->
+ <keydef keys="impala211"><topicmeta><keywords><keyword>Impala 2.11</keyword></keywords></topicmeta></keydef>
<keydef keys="impala210"><topicmeta><keywords><keyword>Impala 2.10</keyword></keywords></topicmeta></keydef>
<keydef keys="impala29"><topicmeta><keywords><keyword>Impala 2.9</keyword></keywords></topicmeta></keydef>
<keydef keys="impala28"><topicmeta><keywords><keyword>Impala 2.8</keyword></keywords></topicmeta></keydef>
@@ -10531,6 +10532,9 @@ under the License.
<keydef keys="impala13"><topicmeta><keywords><keyword>Impala 1.3</keyword></keywords></topicmeta></keydef>
<!-- 3-part forms of version numbers, for use in release notes. -->
+<!-- For the 2.11.0 entry, have to space out the digits with underscores to avoid a conflict with the
+ keydef for Impala 2.1.10. -->
+ <keydef keys="impala2_11_0"><topicmeta><keywords><keyword>Impala 2.11.0</keyword></keywords></topicmeta></keydef>
<keydef keys="impala2100"><topicmeta><keywords><keyword>Impala 2.10.0</keyword></keywords></topicmeta></keydef>
<keydef keys="impala290"><topicmeta><keywords><keyword>Impala 2.9.0</keyword></keywords></topicmeta></keydef>
<keydef keys="impala280"><topicmeta><keywords><keyword>Impala 2.8.0</keyword></keywords></topicmeta></keydef>
http://git-wip-us.apache.org/repos/asf/impala/blob/1f4d687a/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index c272893..dc8cdb5 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -2787,7 +2787,10 @@ flight_num: INT32 SNAPPY DO:83456393 FPO:83488603 SZ:10216514/11474301
each value.
</p>
- <p rev="2.9.0" id="added_in_2100">
+ <p rev="2.11.0" id="added_in_2110">
+ <b>Added in:</b> <keyword keyref="impala2_11_0"/>
+ </p>
+ <p rev="2.10.0" id="added_in_2100">
<b>Added in:</b> <keyword keyref="impala2100"/>
</p>
<p rev="2.9.0" id="added_in_290">
http://git-wip-us.apache.org/repos/asf/impala/blob/1f4d687a/docs/topics/impala_datetime_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_datetime_functions.xml b/docs/topics/impala_datetime_functions.xml
index de8291b..f4d062a 100644
--- a/docs/topics/impala_datetime_functions.xml
+++ b/docs/topics/impala_datetime_functions.xml
@@ -389,6 +389,102 @@ select date_sub(cast('2016-05-31' as timestamp), interval 1 months) as 'april_31
</dlentry>
+ <dlentry rev="2.11.0 IMPALA-5317" id="date_trunc">
+
+ <dt>
+ <codeph>date_trunc(string unit, timestamp)</codeph>
+ </dt>
+
+ <dd>
+ <indexterm audience="hidden">date_trunc() function</indexterm>
+ <b>Purpose:</b> Truncates a <codeph>TIMESTAMP</codeph> value to the specified precision.
+ <p>
+ <b>Unit argument:</b> The <codeph>unit</codeph> argument value for truncating
+ <codeph>TIMESTAMP</codeph> values is not case-sensitive. This argument string
+ can be one of:
+ </p>
+ <ul>
+ <li>microseconds</li>
+ <li>milliseconds</li>
+ <li>second</li>
+ <li>minute</li>
+ <li>hour</li>
+ <li>day</li>
+ <li>week</li>
+ <li>month</li>
+ <li>year</li>
+ <li>decade</li>
+ <li>century</li>
+ <li>millennium</li>
+ </ul>
+ <p>
+ For example, calling <codeph>date_trunc('hour',ts)</codeph> truncates
+ <codeph>ts</codeph> to the beginning of the corresponding hour, with
+ all minutes, seconds, milliseconds, and so on set to zero. Calling
+ <codeph>date_trunc('milliseconds',ts)</codeph> truncates
+ <codeph>ts</codeph> to the beginning of the corresponding millisecond,
+ with all microseconds and nanoseconds set to zero.
+ </p>
+ <note>
+ The sub-second units are specified in plural form. All units representing
+ one second or more are specified in singular form.
+ </note>
+ <p conref="../shared/impala_common.xml#common/added_in_2110"/>
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+ <p>
+ Although this function is similar to calling <codeph>TRUNC()</codeph>
+ with a <codeph>TIMESTAMP</codeph> argument, the order of arguments
+ and the recognized units are different between <codeph>TRUNC()</codeph>
+ and <codeph>DATE_TRUNC()</codeph>. Therefore, these functions are not
+ interchangeable.
+ </p>
+ <p>
+ This function is typically used in <codeph>GROUP BY</codeph>
+ queries to aggregate results from the same hour, day, week, month, quarter, and so on.
+ You can also use this function in an <codeph>INSERT ... SELECT</codeph> into a
+ partitioned table to divide <codeph>TIMESTAMP</codeph> values into the correct partition.
+ </p>
+ <p>
+ Because the return value is a <codeph>TIMESTAMP</codeph>, if you cast the result of
+ <codeph>DATE_TRUNC()</codeph> to <codeph>STRING</codeph>, you will often see zeroed-out portions such as
+ <codeph>00:00:00</codeph> in the time field. If you only need the individual units such as hour, day,
+ month, or year, use the <codeph>EXTRACT()</codeph> function instead. If you need the individual units
+ from a truncated <codeph>TIMESTAMP</codeph> value, run the <codeph>TRUNCATE()</codeph> function on the
+ original value, then run <codeph>EXTRACT()</codeph> on the result.
+ </p>
+ <p>
+ <b>Return type:</b> <codeph>timestamp</codeph>
+ </p>
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+ <p>
+ The following examples show how to call <codeph>DATE_TRUNC()</codeph> with different unit values:
+ </p>
+<codeblock>
+select now(), date_trunc('second', now());
++-------------------------------+-----------------------------------+
+| now() | date_trunc('second', now()) |
++-------------------------------+-----------------------------------+
+| 2017-12-05 13:58:04.565403000 | 2017-12-05 13:58:04 |
++-------------------------------+-----------------------------------+
+
+select now(), date_trunc('hour', now());
++-------------------------------+---------------------------+
+| now() | date_trunc('hour', now()) |
++-------------------------------+---------------------------+
+| 2017-12-05 13:59:01.884459000 | 2017-12-05 13:00:00 |
++-------------------------------+---------------------------+
+
+select now(), date_trunc('millennium', now());
++-------------------------------+---------------------------------+
+| now() | date_trunc('millennium', now()) |
++-------------------------------+---------------------------------+
+| 2017-12-05 14:00:30.296812000 | 2000-01-01 00:00:00 |
++-------------------------------+---------------------------------+
+</codeblock>
+ </dd>
+
+ </dlentry>
+
<dlentry id="datediff">
<dt>
[4/4] impala git commit: [DOCS] Recommend using Kudu Java API for
rapid DMLs
Posted by jr...@apache.org.
[DOCS] Recommend using Kudu Java API for rapid DMLs
Change-Id: I0098f0c3d5d07c89e6bb589c4c04edce300c1ad3
Reviewed-on: http://gerrit.cloudera.org:8080/8976
Reviewed-by: Jean-Daniel Cryans <jd...@apache.org>
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/31c6a171
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/31c6a171
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/31c6a171
Branch: refs/heads/master
Commit: 31c6a1719a271810f0ec09873a3424311e5627ec
Parents: 1f4d687
Author: John Russell <jr...@cloudera.com>
Authored: Tue Jan 9 11:56:19 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 18:42:04 2018 +0000
----------------------------------------------------------------------
docs/topics/impala_jdbc.xml | 14 ++++++++++++++
1 file changed, 14 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/31c6a171/docs/topics/impala_jdbc.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_jdbc.xml b/docs/topics/impala_jdbc.xml
index 8d3599f..c920ec1 100644
--- a/docs/topics/impala_jdbc.xml
+++ b/docs/topics/impala_jdbc.xml
@@ -334,4 +334,18 @@ ARRAY<VARCHAR(10)> becomes ARRAY<VARCHAR(10)>
</conbody>
</concept>
+ <concept id="jdbc_kudu">
+ <title>Kudu Considerations for DML Statements</title>
+ <conbody>
+ <p>
+ Currently, Impala <codeph>INSERT</codeph>, <codeph>UPDATE</codeph>, or
+ other DML statements issued through the JDBC interface against a Kudu
+ table do not return JDBC error codes for conditions such as duplicate
+ primary key columns. Therefore, for applications that issue a high
+ volume of DML statements, prefer to use the Kudu Java API directly
+ rather than a JDBC application.
+ </p>
+ </conbody>
+ </concept>
+
</concept>
[2/4] impala git commit: IMPALA-5014: Part 2: Round when casting
decimal to timestamp
Posted by jr...@apache.org.
IMPALA-5014: Part 2: Round when casting decimal to timestamp
When there are too many digits to the right of the dot in a decimal, we
would always truncate when casting to timestamp. In this patch we change
the behavior to round instead of truncating when decimal_v2 is enabled.
Testing:
- Added some EE tests, ran BE tests on my machine.
Change-Id: I8fb3a7d976ab980b8572d7e9524850572bad57da
Reviewed-on: http://gerrit.cloudera.org:8080/8969
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c86b0a97
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c86b0a97
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c86b0a97
Branch: refs/heads/master
Commit: c86b0a9736ee1e19b95a2d06771ca2ab8577950f
Parents: 514dfaf
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Thu Dec 21 15:47:06 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 05:47:23 2018 +0000
----------------------------------------------------------------------
be/src/exprs/decimal-operators-ir.cc | 27 ++++++++-----
be/src/exprs/decimal-operators.h | 11 ++++--
be/src/runtime/timestamp-test.cc | 16 ++++++++
be/src/runtime/timestamp-value.h | 3 +-
.../queries/QueryTest/decimal-exprs.test | 41 ++++++++++++++++++++
5 files changed, 83 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/c86b0a97/be/src/exprs/decimal-operators-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators-ir.cc b/be/src/exprs/decimal-operators-ir.cc
index 8612561..fd0c404 100644
--- a/be/src/exprs/decimal-operators-ir.cc
+++ b/be/src/exprs/decimal-operators-ir.cc
@@ -596,7 +596,8 @@ StringVal DecimalOperators::CastToStringVal(
}
template <typename T>
-IR_ALWAYS_INLINE int32_t DecimalOperators::ConvertToNanoseconds(T val, int scale) {
+IR_ALWAYS_INLINE int32_t DecimalOperators::ConvertToNanoseconds(
+ T val, int scale, bool round) {
// Nanosecond scale means there should be 9 decimal digits, which is representable
// with int32_t.
const int NANOSECOND_SCALE = 9;
@@ -605,10 +606,11 @@ IR_ALWAYS_INLINE int32_t DecimalOperators::ConvertToNanoseconds(T val, int scale
nanoseconds = val * DecimalUtil::GetScaleMultiplier<T>(
NANOSECOND_SCALE - scale);
} else {
- nanoseconds = val / DecimalUtil::GetScaleMultiplier<T>(
- scale - NANOSECOND_SCALE);
+ nanoseconds = DecimalUtil::ScaleDownAndRound<T>(
+ val, scale - NANOSECOND_SCALE, round);
+ DCHECK(nanoseconds <= 1000000000);
+ DCHECK(nanoseconds != 1000000000 || round);
}
-
DCHECK(nanoseconds >= numeric_limits<int32_t>::min()
&& nanoseconds <= numeric_limits<int32_t>::max());
@@ -616,7 +618,8 @@ IR_ALWAYS_INLINE int32_t DecimalOperators::ConvertToNanoseconds(T val, int scale
}
template <typename T>
-TimestampVal DecimalOperators::ConvertToTimestampVal(const T& decimal_value, int scale) {
+TimestampVal DecimalOperators::ConvertToTimestampVal(
+ const T& decimal_value, int scale, bool round) {
typename T::StorageType seconds = decimal_value.whole_part(scale);
if (seconds < numeric_limits<int64_t>::min() ||
seconds > numeric_limits<int64_t>::max()) {
@@ -624,8 +627,8 @@ TimestampVal DecimalOperators::ConvertToTimestampVal(const T& decimal_value, int
return TimestampVal::null();
}
int32_t nanoseconds =
- ConvertToNanoseconds(decimal_value.fractional_part(scale), scale);
- if(decimal_value.is_negative()) nanoseconds *= -1;
+ ConvertToNanoseconds(decimal_value.fractional_part(scale), scale, round);
+ if (decimal_value.is_negative()) nanoseconds *= -1;
TimestampVal result;
TimestampValue::FromUnixTimeNanos(seconds, nanoseconds).ToTimestampVal(&result);
return result;
@@ -637,11 +640,15 @@ TimestampVal DecimalOperators::CastToTimestampVal(
if (val.is_null) return TimestampVal::null();
int precision = ctx->impl()->GetConstFnAttr(FunctionContextImpl::ARG_TYPE_PRECISION, 0);
int scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::ARG_TYPE_SCALE, 0);
+ bool is_decimal_v2 = ctx->impl()->GetConstFnAttr(FunctionContextImpl::DECIMAL_V2);
TimestampVal result;
switch (ColumnType::GetDecimalByteSize(precision)) {
- case 4: return ConvertToTimestampVal(Decimal4Value(val.val4), scale);
- case 8: return ConvertToTimestampVal(Decimal8Value(val.val8), scale);
- case 16: return ConvertToTimestampVal(Decimal16Value(val.val16), scale);
+ case 4:
+ return ConvertToTimestampVal(Decimal4Value(val.val4), scale, is_decimal_v2);
+ case 8:
+ return ConvertToTimestampVal(Decimal8Value(val.val8), scale, is_decimal_v2);
+ case 16:
+ return ConvertToTimestampVal(Decimal16Value(val.val16), scale, is_decimal_v2);
default:
DCHECK(false);
return TimestampVal::null();
http://git-wip-us.apache.org/repos/asf/impala/blob/c86b0a97/be/src/exprs/decimal-operators.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators.h b/be/src/exprs/decimal-operators.h
index c2d8779..e34dbf1 100644
--- a/be/src/exprs/decimal-operators.h
+++ b/be/src/exprs/decimal-operators.h
@@ -163,13 +163,16 @@ class DecimalOperators {
static T RoundDelta(const DecimalValue<T>& v, int src_scale,
int target_scale, const DecimalRoundOp& op);
- /// Converts a decimal value (interpreted as unix time) to TimestampVal.
+ /// Converts a decimal value (interpreted as unix time) to TimestampVal. Rounds
+ /// instead of truncating if 'round' is true.
template <typename T>
- static TimestampVal ConvertToTimestampVal(const T& decimal_value, int scale);
+ static TimestampVal ConvertToTimestampVal(
+ const T& decimal_value, int scale, bool round);
- /// Converts fractional 'val' with the given 'scale' to nanoseconds.
+ /// Converts fractional 'val' with the given 'scale' to nanoseconds. Rounds
+ /// instead of truncating if 'round' is true.
template <typename T>
- static int32_t ConvertToNanoseconds(T val, int scale);
+ static int32_t ConvertToNanoseconds(T val, int scale, bool round);
};
}
http://git-wip-us.apache.org/repos/asf/impala/blob/c86b0a97/be/src/runtime/timestamp-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-test.cc b/be/src/runtime/timestamp-test.cc
index 3a54026..66bd896 100644
--- a/be/src/runtime/timestamp-test.cc
+++ b/be/src/runtime/timestamp-test.cc
@@ -732,6 +732,22 @@ TEST(TimestampTest, Basic) {
EXPECT_EQ("2038-01-19 03:14:09",
TimestampValue::FromUnixTime(2147483649).ToString());
+ // Tests for the cases where abs(nanoseconds) >= 1e9.
+ EXPECT_EQ("2018-01-10 16:00:00",
+ TimestampValue::FromUnixTimeNanos(1515600000, 0).ToString());
+ EXPECT_EQ("2018-01-10 16:00:00.999999999",
+ TimestampValue::FromUnixTimeNanos(1515600000, 999999999).ToString());
+ EXPECT_EQ("2018-01-10 15:59:59.000000001",
+ TimestampValue::FromUnixTimeNanos(1515600000, -999999999).ToString());
+ EXPECT_EQ("2018-01-10 16:00:01",
+ TimestampValue::FromUnixTimeNanos(1515600000, 1000000000).ToString());
+ EXPECT_EQ("2018-01-10 15:59:59",
+ TimestampValue::FromUnixTimeNanos(1515600000, -1000000000).ToString());
+ EXPECT_EQ("2018-01-10 16:30:00",
+ TimestampValue::FromUnixTimeNanos(1515600000, 1800000000000).ToString());
+ EXPECT_EQ("2018-01-10 15:30:00",
+ TimestampValue::FromUnixTimeNanos(1515600000, -1800000000000).ToString());
+
// Test FromUnixTime around the boundary of the values that are converted via boost via
// gmtime (IMPALA-5357). Tests 1 second before and after the values supported by the
// boost conversion logic.
http://git-wip-us.apache.org/repos/asf/impala/blob/c86b0a97/be/src/runtime/timestamp-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-value.h b/be/src/runtime/timestamp-value.h
index 445189a..5a5e733 100644
--- a/be/src/runtime/timestamp-value.h
+++ b/be/src/runtime/timestamp-value.h
@@ -100,7 +100,8 @@ class TimestampValue {
}
/// Same as FromUnixTime() above, but adds the specified number of nanoseconds to the
- /// resulting TimestampValue. Handles negative nanoseconds too.
+ /// resulting TimestampValue. Handles negative nanoseconds and the case where
+ /// abs(nanos) >= 1e9.
static TimestampValue FromUnixTimeNanos(time_t unix_time, int64_t nanos) {
boost::posix_time::ptime temp = UnixTimeToPtime(unix_time);
temp += boost::posix_time::nanoseconds(nanos);
http://git-wip-us.apache.org/repos/asf/impala/blob/c86b0a97/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
index 328fbaf..be75c23 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
@@ -413,3 +413,44 @@ cast(42608445511 as decimal(18, 0)) % 3
---- TYPES
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
====
+---- QUERY
+# IMPALA-5014: Check that we round when converting a decimal to timestamp
+set decimal_v2=false;
+select
+cast(cast(12333333333.9999999994 as decimal(38, 10)) as timestamp),
+cast(cast(12333333333.9999999995 as decimal(38, 10)) as timestamp),
+cast(cast(333.9999999994 as decimal(13, 10)) as timestamp),
+cast(cast(333.9999999995 as decimal(13, 10)) as timestamp),
+cast(cast(12333333333.1111111114 as decimal(38, 10)) as timestamp),
+cast(cast(12333333333.1111111115 as decimal(38, 10)) as timestamp),
+cast(cast(12333333333.111111111411111 as decimal(38, 15)) as timestamp),
+cast(cast(12333333333.111111111511111 as decimal(38, 15)) as timestamp),
+cast(cast(12333333333.1111111114 as decimal(38, 27)) as timestamp),
+cast(cast(12333333333.1111111115 as decimal(38, 27)) as timestamp),
+cast(cast(12333333333.111 as decimal(38, 3)) as timestamp),
+cast(cast(12333333333 as decimal(38, 0)) as timestamp);
+---- RESULTS
+2360-10-29 21:55:33.999999999,2360-10-29 21:55:33.999999999,1970-01-01 00:05:33.999999999,1970-01-01 00:05:33.999999999,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111000000,2360-10-29 21:55:33
+---- TYPES
+TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP
+====
+---- QUERY
+set decimal_v2=true;
+select
+cast(cast(12333333333.9999999994 as decimal(38, 10)) as timestamp),
+cast(cast(12333333333.9999999995 as decimal(38, 10)) as timestamp),
+cast(cast(333.9999999994 as decimal(13, 10)) as timestamp),
+cast(cast(333.9999999995 as decimal(13, 10)) as timestamp),
+cast(cast(12333333333.1111111114 as decimal(38, 10)) as timestamp),
+cast(cast(12333333333.1111111115 as decimal(38, 10)) as timestamp),
+cast(cast(12333333333.111111111411111 as decimal(38, 15)) as timestamp),
+cast(cast(12333333333.111111111511111 as decimal(38, 15)) as timestamp),
+cast(cast(12333333333.1111111114 as decimal(38, 27)) as timestamp),
+cast(cast(12333333333.1111111115 as decimal(38, 27)) as timestamp),
+cast(cast(12333333333.111 as decimal(38, 3)) as timestamp),
+cast(cast(12333333333 as decimal(38, 0)) as timestamp);
+---- RESULTS
+2360-10-29 21:55:33.999999999,2360-10-29 21:55:34,1970-01-01 00:05:33.999999999,1970-01-01 00:05:34,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111112,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111112,2360-10-29 21:55:33.111111111,2360-10-29 21:55:33.111111112,2360-10-29 21:55:33.111000000,2360-10-29 21:55:33
+---- TYPES
+TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP, TIMESTAMP
+====