You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/05/19 17:55:49 UTC

hive git commit: HIVE-16702: Use LazyBinarySerDe for LLAP InputFormat (Jason Dere, reviewed by Gunther Hagleitner)

Repository: hive
Updated Branches:
  refs/heads/master fa59d4753 -> 85415f7b8


HIVE-16702: Use LazyBinarySerDe for LLAP InputFormat (Jason Dere, reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/85415f7b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/85415f7b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/85415f7b

Branch: refs/heads/master
Commit: 85415f7b8a6054da2826566176f7ddd394a0d885
Parents: fa59d47
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri May 19 10:55:06 2017 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri May 19 10:55:06 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hive/llap/LlapRowRecordReader.java   | 20 ++++++++++++--------
 .../hadoop/hive/llap/LlapRowInputFormat.java    |  6 ++++--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  7 ++++++-
 3 files changed, 22 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/85415f7b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
index e3c0955..c50d226 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
@@ -29,6 +29,7 @@ import java.util.Map;
 import java.util.Properties;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
@@ -48,6 +49,7 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -68,15 +70,17 @@ public class LlapRowRecordReader implements RecordReader<NullWritable, Row> {
   private static final Logger LOG = LoggerFactory.getLogger(LlapRowRecordReader.class);
 
   protected final Configuration conf;
-  protected final RecordReader<NullWritable, Text> reader;
+  protected final RecordReader<NullWritable, BytesWritable> reader;
   protected final Schema schema;
   protected final AbstractSerDe serde;
-  protected final Text textData = new Text();
+  protected final BytesWritable data;
 
-  public LlapRowRecordReader(Configuration conf, Schema schema, RecordReader<NullWritable, Text> reader) throws IOException {
+  public LlapRowRecordReader(Configuration conf, Schema schema,
+      RecordReader<NullWritable, BytesWritable> reader) throws IOException {
     this.conf = conf;
     this.schema = schema;
     this.reader = reader;
+    this.data = new BytesWritable();
 
     try {
       serde = initSerDe(conf);
@@ -114,17 +118,17 @@ public class LlapRowRecordReader implements RecordReader<NullWritable, Row> {
   public boolean next(NullWritable key, Row value) throws IOException {
     Preconditions.checkArgument(value != null);
 
-    boolean hasNext = reader.next(key,  textData);
+    boolean hasNext = reader.next(key,  data);
     if (hasNext) {
-      // Deserialize Text to column values, and populate the row record
+      // Deserialize data to column values, and populate the row record
       Object rowObj;
       try {
         StructObjectInspector rowOI = (StructObjectInspector) serde.getObjectInspector();
-        rowObj = serde.deserialize(textData);
+        rowObj = serde.deserialize(data);
         setRowFromStruct(value, rowObj, rowOI);
       } catch (SerDeException err) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Error deserializing row from text: " + textData);
+          LOG.debug("Error deserializing row from data: " + data);
         }
         throw new IOException("Error deserializing row data", err);
       }
@@ -246,7 +250,7 @@ public class LlapRowRecordReader implements RecordReader<NullWritable, Row> {
     props.put(serdeConstants.LIST_COLUMNS, columns);
     props.put(serdeConstants.LIST_COLUMN_TYPES, types);
     props.put(serdeConstants.ESCAPE_CHAR, "\\");
-    AbstractSerDe serde = new LazySimpleSerDe();
+    AbstractSerDe serde = new LazyBinarySerDe();
     serde.initialize(conf, props);
 
     return serde;

http://git-wip-us.apache.org/repos/asf/hive/blob/85415f7b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapRowInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapRowInputFormat.java b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapRowInputFormat.java
index c3001e9..4a6e9b1 100644
--- a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapRowInputFormat.java
+++ b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapRowInputFormat.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.llap.LlapRowRecordReader;
 import org.apache.hadoop.hive.llap.Row;
 import org.apache.hadoop.hive.llap.Schema;
 
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
@@ -37,7 +38,7 @@ import org.apache.hadoop.mapred.Reporter;
 
 public class LlapRowInputFormat implements InputFormat<NullWritable, Row> {
 
-  private LlapBaseInputFormat<Text> baseInputFormat = new LlapBaseInputFormat<Text>();
+  private LlapBaseInputFormat<BytesWritable> baseInputFormat = new LlapBaseInputFormat<BytesWritable>();
 
   @Override
   public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
@@ -48,7 +49,8 @@ public class LlapRowInputFormat implements InputFormat<NullWritable, Row> {
   public RecordReader<NullWritable, Row> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
       throws IOException {
     LlapInputSplit llapSplit = (LlapInputSplit) split;
-    LlapBaseRecordReader<Text> reader = (LlapBaseRecordReader<Text>) baseInputFormat.getRecordReader(llapSplit, job, reporter);
+    LlapBaseRecordReader<BytesWritable> reader =
+        (LlapBaseRecordReader<BytesWritable>) baseInputFormat.getRecordReader(llapSplit, job, reporter);
     return new LlapRowRecordReader(job, reader.getSchema(), reader);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/85415f7b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index eb7ef00..35fc68a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -216,6 +216,7 @@ import org.apache.hadoop.hive.serde2.NullStructSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -7138,9 +7139,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
               conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
           } else {
               fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
+              Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
+              if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
+                serdeClass = LazyBinarySerDe.class;
+              }
               table_desc =
                          PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat,
-                           LazySimpleSerDe.class);
+                           serdeClass);
           }
         } else {
           table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);