You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/08/08 19:02:23 UTC

[09/11] incubator-impala git commit: IMPALA-5572: Timestamp codegen for text scanner

IMPALA-5572: Timestamp codegen for text scanner

Currently codegen is disabled when scanning text tables with timestamp
columns. The message is "Timestamp not yet supported for codegen."
This patch adds support for timestamp codegen.
A simple query in the comment section of this issue performs a little
better (4%) than interpreted version.

Testing: The patch passed test with exhaustive workload exploration
strategy.

Change-Id: I00cbf8ec7784ca9594e14e952f46dc54a5ede44b
Reviewed-on: http://gerrit.cloudera.org:8080/7556
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d61065d6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d61065d6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d61065d6

Branch: refs/heads/master
Commit: d61065d6383ece19fdafd2526ecf00dab4e1f4d4
Parents: 0c46147
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Tue Aug 1 14:52:55 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Aug 8 10:23:29 2017 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py |  1 +
 be/src/exec/hdfs-scanner-ir.cc        |  6 ++++++
 be/src/exec/hdfs-scanner.cc           |  3 ---
 be/src/exec/text-converter.cc         | 19 ++++++++++++++++---
 be/src/util/string-parser.h           | 11 +++++++++++
 5 files changed, 34 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index be4be80..3cc195c 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -185,6 +185,7 @@ ir_functions = [
   ["STRING_TO_INT64", "IrStringToInt64"],
   ["STRING_TO_FLOAT", "IrStringToFloat"],
   ["STRING_TO_DOUBLE", "IrStringToDouble"],
+  ["STRING_TO_TIMESTAMP", "IrStringToTimestamp"],
   ["IS_NULL_STRING", "IrIsNullString"],
   ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
   ["RAW_VALUE_COMPARE",

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/exec/hdfs-scanner-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc
index 96aad07..143adb7 100644
--- a/be/src/exec/hdfs-scanner-ir.cc
+++ b/be/src/exec/hdfs-scanner-ir.cc
@@ -117,6 +117,12 @@ double IrStringToDouble(const char* s, int len, StringParser::ParseResult* resul
 }
 
 extern "C"
+TimestampValue IrStringToTimestamp(const char* s, int len,
+    StringParser::ParseResult* result) {
+  return StringParser::StringToTimestamp(s, len, result);
+}
+
+extern "C"
 bool IrIsNullString(const char* data, int len) {
   return data == NULL || (len == 2 && data[0] == '\\' && data[1] == 'N');
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index 10e4edd..23fcc20 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -311,9 +311,6 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node,
   // TODO: Timestamp is not yet supported
   for (int i = 0; i < node->materialized_slots().size(); ++i) {
     SlotDescriptor* slot_desc = node->materialized_slots()[i];
-    if (slot_desc->type().type == TYPE_TIMESTAMP) {
-      return Status::Expected("Timestamp not yet supported for codegen.");
-    }
     if (slot_desc->type().type == TYPE_DECIMAL) {
       return Status::Expected("Decimal not yet supported for codegen.");
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/exec/text-converter.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index e12be85..db390c4 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -220,6 +220,9 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
       case TYPE_DOUBLE:
         parse_fn_enum = IRFunction::STRING_TO_DOUBLE;
         break;
+      case TYPE_TIMESTAMP:
+        parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP;
+        break;
       default:
         DCHECK(false);
         return NULL;
@@ -234,9 +237,18 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
     LlvmCodeGen::NamedVariable parse_result("parse_result", codegen->GetType(TYPE_INT));
     Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(fn, parse_result);
 
+    Value* parse_return;
     // Call Impala's StringTo* function
-    Value* result = builder.CreateCall(parse_fn,
-        ArrayRef<Value*>({args[1], args[2], parse_result_ptr}));
+    if (parse_fn->arg_size() == 3) {
+      parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr});
+    } else {
+      DCHECK(parse_fn->arg_size() == 4);
+      // If the return value is large (more than 16 bytes in our toolchain) the first
+      // parameter would be a pointer to value parsed and the return value of callee
+      // should be ignored
+      builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr});
+      parse_return = nullptr;
+    }
     Value* parse_result_val = builder.CreateLoad(parse_result_ptr, "parse_result");
     Value* failed_value = codegen->GetIntConstant(TYPE_INT, StringParser::PARSE_FAILURE);
 
@@ -254,7 +266,8 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
 
     // Parse succeeded
     builder.SetInsertPoint(parse_success_block);
-    builder.CreateStore(result, slot);
+    // If the parsed value is in parse_return, move it into slot
+    if (parse_fn->arg_size() == 3) builder.CreateStore(parse_return, slot);
     builder.CreateRet(codegen->true_value());
 
     // Parse failed, set slot to null and return false

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/util/string-parser.h
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index cc9c517..d6b94c7 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -25,6 +25,8 @@
 
 #include "common/logging.h"
 #include "runtime/decimal-value.h"
+#include "runtime/timestamp-parse-util.h"
+#include "runtime/timestamp-value.h"
 #include "util/decimal-util.h"
 
 namespace impala {
@@ -95,6 +97,15 @@ class StringParser {
     return StringToBoolInternal(s + i, len - i, result);
   }
 
+  /// Parse a TimestampValue from s.
+  static inline TimestampValue StringToTimestamp(const char* s, int len,
+      ParseResult* result) {
+    boost::gregorian::date d;
+    boost::posix_time::time_duration t;
+    *result = TimestampParser::Parse(s, len, &d, &t) ? PARSE_SUCCESS : PARSE_FAILURE;
+    return {d, t};
+  }
+
   /// Parses a decimal from s, returning the result.
   /// The parse status is returned in *result.
   /// On overflow or invalid values, the return value is undefined.