You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/03 01:59:13 UTC
[02/40] hive git commit: HIVE-12878: Support Vectorization for
TEXTFILE and other formats (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out b/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out
new file mode 100644
index 0000000..4f00bed
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcstr
+PREHOOK: query: insert overwrite table orcstr select null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: insert overwrite table orcstr select null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcstr
+POSTHOOK: Lineage: orcstr.vcol EXPRESSION []
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: insert overwrite table orcstr select "" from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: insert overwrite table orcstr select "" from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcstr
+POSTHOOK: Lineage: orcstr.vcol EXPRESSION []
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
index a733991..f23a359 100644
--- a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
@@ -592,6 +592,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
Reducer 2
Execution mode: vectorized
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out
new file mode 100644
index 0000000..b2bf415
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out
@@ -0,0 +1,307 @@
+PREHOOK: query: explain
+select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Row Limit Per Split: 1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'key1' (type: string), 'value1' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+_c0 _c1
+key1 value1
+PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@decimal_2
+POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@decimal_2
+PREHOOK: query: explain
+insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Row Limit Per Split: 1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 17.29 (type: decimal(18,9))
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.decimal_2
+ Execution mode: vectorized
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.decimal_2
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+PREHOOK: query: insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@decimal_2
+POSTHOOK: query: insert overwrite table decimal_2
+ select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@decimal_2
+POSTHOOK: Lineage: decimal_2.t EXPRESSION []
+_col0
+PREHOOK: query: select count(*) from decimal_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from decimal_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_2
+#### A masked pattern was here ####
+c0
+1
+PREHOOK: query: drop table decimal_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_2
+PREHOOK: Output: default@decimal_2
+POSTHOOK: query: drop table decimal_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_2
+POSTHOOK: Output: default@decimal_2
+PREHOOK: query: -- Dummy tables HIVE-13190
+explain
+select count(1) from (select * from (Select 1 a) x order by x.a) y
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Dummy tables HIVE-13190
+explain
+select count(1) from (select * from (Select 1 a) x order by x.a) y
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 1 (type: int)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+_c0
+1
+PREHOOK: query: explain
+create temporary table dual as select 1
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain
+create temporary table dual as select 1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dual
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-4
+ Create Table Operator:
+ Create Table
+ columns: _c0 int
+ input format: org.apache.hadoop.mapred.TextInputFormat
+#### A masked pattern was here ####
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dual
+ isTemporary: true
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create temporary table dual as select 1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dual
+POSTHOOK: query: create temporary table dual as select 1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dual
+_c0
+PREHOOK: query: select * from dual
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dual
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dual
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dual
+#### A masked pattern was here ####
+dual._c0
+1
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
index a783a36..ef92b89 100644
--- a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
@@ -554,6 +554,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
index 25f2996..7e5f24d 100644
--- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
+++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out
@@ -195,7 +195,6 @@ STAGE PLANS:
key expressions: 1 (type: int)
sort order: +
Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized
Reduce Operator Tree:
Select Operator
Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
@@ -287,7 +286,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dual
- Execution mode: vectorized
Stage: Stage-7
Conditional Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
index c6ff748..be36ba4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.serde2.binarysortable.fast;
-import java.io.EOFException;
import java.io.IOException;
import java.math.BigInteger;
import java.util.Arrays;
@@ -29,21 +28,10 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
-import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.Text;
/*
@@ -60,11 +48,9 @@ import org.apache.hadoop.io.Text;
* other type specific buffers. So, those references are only valid until the next time set is
* called.
*/
-public final class BinarySortableDeserializeRead implements DeserializeRead {
+public final class BinarySortableDeserializeRead extends DeserializeRead {
public static final Logger LOG = LoggerFactory.getLogger(BinarySortableDeserializeRead.class.getName());
- private TypeInfo[] typeInfos;
-
// The sort order (ascending/descending) for each field. Set to true when descending (invert).
private boolean[] columnSortOrderIsDesc;
@@ -76,11 +62,10 @@ public final class BinarySortableDeserializeRead implements DeserializeRead {
private int start;
- private DecimalTypeInfo saveDecimalTypeInfo;
- private HiveDecimal saveDecimal;
+ private byte[] tempTimestampBytes;
+ private Text tempText;
private byte[] tempDecimalBuffer;
- private HiveDecimalWritable tempHiveDecimalWritable;
private boolean readBeyondConfiguredFieldsWarned;
private boolean readBeyondBufferRangeWarned;
@@ -97,7 +82,7 @@ public final class BinarySortableDeserializeRead implements DeserializeRead {
public BinarySortableDeserializeRead(TypeInfo[] typeInfos,
boolean[] columnSortOrderIsDesc) {
- this.typeInfos = typeInfos;
+ super(typeInfos);
fieldCount = typeInfos.length;
if (columnSortOrderIsDesc != null) {
this.columnSortOrderIsDesc = columnSortOrderIsDesc;
@@ -113,13 +98,7 @@ public final class BinarySortableDeserializeRead implements DeserializeRead {
// Not public since we must have column information.
private BinarySortableDeserializeRead() {
- }
-
- /*
- * The primitive type information for all fields.
- */
- public TypeInfo[] typeInfos() {
- return typeInfos;
+ super();
}
/*
@@ -148,42 +127,242 @@ public final class BinarySortableDeserializeRead implements DeserializeRead {
if (fieldIndex >= fieldCount) {
// Reading beyond the specified field count produces NULL.
if (!readBeyondConfiguredFieldsWarned) {
- // Warn only once.
- LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but "
- + " reading more (NULLs returned). Ignoring similar problems.");
- readBeyondConfiguredFieldsWarned = true;
+ doReadBeyondConfiguredFieldsWarned();
}
return true;
}
if (inputByteBuffer.isEof()) {
// Also, reading beyond our byte range produces NULL.
if (!readBeyondBufferRangeWarned) {
- // Warn only once.
- int length = inputByteBuffer.tell() - start;
- LOG.info("Reading beyond buffer range! Buffer range " + start
- + " for length " + length + " but reading more... "
- + "(total buffer length " + inputByteBuffer.getData().length + ")"
- + " Ignoring similar problems.");
- readBeyondBufferRangeWarned = true;
+ doReadBeyondBufferRangeWarned();
}
// We cannot read beyond so we must return NULL here.
return true;
}
- byte isNull = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
+ byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
- if (isNull == 0) {
+ if (isNullByte == 0) {
return true;
}
- // We have a field and are positioned to it.
+ /*
+ * We have a field and are positioned to it. Read it.
+ */
+ boolean isNull = false; // Assume.
+ switch (primitiveCategories[fieldIndex]) {
+ case BOOLEAN:
+ currentBoolean = (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) == 2);
+ break;
+ case BYTE:
+ currentByte = (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80);
+ break;
+ case SHORT:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int v = inputByteBuffer.read(invert) ^ 0x80;
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ currentShort = (short) v;
+ }
+ break;
+ case INT:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int v = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 3; i++) {
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ currentInt = v;
+ }
+ break;
+ case LONG:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ long v = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 7; i++) {
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ currentLong = v;
+ }
+ break;
+ case DATE:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int v = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 3; i++) {
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ currentDateWritable.set(v);
+ }
+ break;
+ case TIMESTAMP:
+ {
+ if (tempTimestampBytes == null) {
+ tempTimestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
+ }
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ for (int i = 0; i < tempTimestampBytes.length; i++) {
+ tempTimestampBytes[i] = inputByteBuffer.read(invert);
+ }
+ currentTimestampWritable.setBinarySortable(tempTimestampBytes, 0);
+ }
+ break;
+ case FLOAT:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int v = 0;
+ for (int i = 0; i < 4; i++) {
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ if ((v & (1 << 31)) == 0) {
+ // negative number, flip all bits
+ v = ~v;
+ } else {
+ // positive number, flip the first bit
+ v = v ^ (1 << 31);
+ }
+ currentFloat = Float.intBitsToFloat(v);
+ }
+ break;
+ case DOUBLE:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ long v = 0;
+ for (int i = 0; i < 8; i++) {
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ if ((v & (1L << 63)) == 0) {
+ // negative number, flip all bits
+ v = ~v;
+ } else {
+ // positive number, flip the first bit
+ v = v ^ (1L << 63);
+ }
+ currentDouble = Double.longBitsToDouble(v);
+ }
+ break;
+ case BINARY:
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ {
+ if (tempText == null) {
+ tempText = new Text();
+ }
+ BinarySortableSerDe.deserializeText(
+ inputByteBuffer, columnSortOrderIsDesc[fieldIndex], tempText);
+ currentBytes = tempText.getBytes();
+ currentBytesStart = 0;
+ currentBytesLength = tempText.getLength();
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int v = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 3; i++) {
+ v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ currentHiveIntervalYearMonthWritable.set(v);
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ long totalSecs = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 7; i++) {
+ totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ int nanos = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 3; i++) {
+ nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+ currentHiveIntervalDayTimeWritable.set(totalSecs, nanos);
+ }
+ break;
+ case DECIMAL:
+ {
+ // Since enforcing precision and scale can cause a HiveDecimal to become NULL,
+ // we must read it, enforce it here, and either return NULL or buffer the result.
+
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int b = inputByteBuffer.read(invert) - 1;
+ assert (b == 1 || b == -1 || b == 0);
+ boolean positive = b != -1;
+
+ int factor = inputByteBuffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 3; i++) {
+ factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff);
+ }
+
+ if (!positive) {
+ factor = -factor;
+ }
+
+ int start = inputByteBuffer.tell();
+ int length = 0;
+
+ do {
+ b = inputByteBuffer.read(positive ? invert : !invert);
+ assert(b != 1);
+
+ if (b == 0) {
+ // end of digits
+ break;
+ }
+
+ length++;
+ } while (true);
+
+ if(tempDecimalBuffer == null || tempDecimalBuffer.length < length) {
+ tempDecimalBuffer = new byte[length];
+ }
+
+ inputByteBuffer.seek(start);
+ for (int i = 0; i < length; ++i) {
+ tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
+ }
+
+ // read the null byte again
+ inputByteBuffer.read(positive ? invert : !invert);
+
+ String digits = new String(tempDecimalBuffer, 0, length, BinarySortableSerDe.decimalCharSet);
+ BigInteger bi = new BigInteger(digits);
+ HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor-length);
+
+ if (!positive) {
+ bd = bd.negate();
+ }
+
+ // We have a decimal. After we enforce precision and scale, will it become a NULL?
+
+ currentHiveDecimalWritable.set(bd);
- if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) {
- return false;
+ DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex];
+
+ int precision = decimalTypeInfo.getPrecision();
+ int scale = decimalTypeInfo.getScale();
+
+ HiveDecimal decimal = currentHiveDecimalWritable.getHiveDecimal(precision, scale);
+ if (decimal == null) {
+ isNull = true;
+ } else {
+ // Put value back into writable.
+ currentHiveDecimalWritable.set(decimal);
+ }
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected primitive type category " + primitiveCategories[fieldIndex]);
+ }
+
+ /*
+ * Now that we have read through the field -- did we really want it?
+ */
+ if (columnsToInclude != null && !columnsToInclude[fieldIndex]) {
+ isNull = true;
}
- // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read
- // it here.
- return earlyReadHiveDecimal();
+ return isNull;
}
/*
@@ -196,7 +375,7 @@ public final class BinarySortableDeserializeRead implements DeserializeRead {
// Warn only once.
int length = inputByteBuffer.getEnd() - start;
int remaining = inputByteBuffer.getEnd() - inputByteBuffer.tell();
- LOG.info("Not all fields were read in the buffer range! Buffer range " + start
+ LOG.info("Not all fields were read in the buffer range! Buffer range " + start
+ " for length " + length + " but " + remaining + " bytes remain. "
+ "(total buffer length " + inputByteBuffer.getData().length + ")"
+ " Ignoring similar problems.");
@@ -222,526 +401,23 @@ public final class BinarySortableDeserializeRead implements DeserializeRead {
}
/*
- * BOOLEAN.
- */
- @Override
- public boolean readBoolean() throws IOException {
- byte b = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]);
- return (b == 2);
- }
-
- /*
- * BYTE.
- */
- @Override
- public byte readByte() throws IOException {
- return (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80);
- }
-
- /*
- * SHORT.
- */
- @Override
- public short readShort() throws IOException {
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- int v = inputByteBuffer.read(invert) ^ 0x80;
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- return (short) v;
- }
-
- /*
- * INT.
- */
- @Override
- public int readInt() throws IOException {
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- int v = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 3; i++) {
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- return v;
- }
-
- /*
- * LONG.
- */
- @Override
- public long readLong() throws IOException {
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- long v = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 7; i++) {
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- return v;
- }
-
- /*
- * FLOAT.
+ * Pull these out of the regular execution path.
*/
- @Override
- public float readFloat() throws IOException {
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- int v = 0;
- for (int i = 0; i < 4; i++) {
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- if ((v & (1 << 31)) == 0) {
- // negative number, flip all bits
- v = ~v;
- } else {
- // positive number, flip the first bit
- v = v ^ (1 << 31);
- }
- return Float.intBitsToFloat(v);
- }
-
- /*
- * DOUBLE.
- */
- @Override
- public double readDouble() throws IOException {
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- long v = 0;
- for (int i = 0; i < 8; i++) {
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- if ((v & (1L << 63)) == 0) {
- // negative number, flip all bits
- v = ~v;
- } else {
- // positive number, flip the first bit
- v = v ^ (1L << 63);
- }
- return Double.longBitsToDouble(v);
- }
-
- // This class is for internal use.
- private static class BinarySortableReadStringResults extends ReadStringResults {
-
- // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable
- // format string into.
- private Text text;
-
- public BinarySortableReadStringResults() {
- super();
- text = new Text();
- }
- }
-
- // Reading a STRING field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different bytes field.
- @Override
- public ReadStringResults createReadStringResults() {
- return new BinarySortableReadStringResults();
- }
-
-
- @Override
- public void readString(ReadStringResults readStringResults) throws IOException {
- BinarySortableReadStringResults binarySortableReadStringResults =
- (BinarySortableReadStringResults) readStringResults;
-
- BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadStringResults.text);
- readStringResults.bytes = binarySortableReadStringResults.text.getBytes();
- readStringResults.start = 0;
- readStringResults.length = binarySortableReadStringResults.text.getLength();
- }
-
-
- /*
- * CHAR.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadHiveCharResults extends ReadHiveCharResults {
-
- public BinarySortableReadHiveCharResults() {
- super();
- }
-
- public HiveCharWritable getHiveCharWritable() {
- return hiveCharWritable;
- }
- }
-
- // Reading a CHAR field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different CHAR field.
- @Override
- public ReadHiveCharResults createReadHiveCharResults() {
- return new BinarySortableReadHiveCharResults();
- }
-
- public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException {
- BinarySortableReadHiveCharResults binarySortableReadHiveCharResults =
- (BinarySortableReadHiveCharResults) readHiveCharResults;
-
- if (!binarySortableReadHiveCharResults.isInit()) {
- binarySortableReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]);
- }
-
- HiveCharWritable hiveCharWritable = binarySortableReadHiveCharResults.getHiveCharWritable();
-
- // Decode the bytes into our Text buffer, then truncate.
- BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveCharWritable.getTextValue());
- hiveCharWritable.enforceMaxLength(binarySortableReadHiveCharResults.getMaxLength());
-
- readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes();
- readHiveCharResults.start = 0;
- readHiveCharResults.length = hiveCharWritable.getTextValue().getLength();
- }
-
- /*
- * VARCHAR.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadHiveVarcharResults extends ReadHiveVarcharResults {
-
- public BinarySortableReadHiveVarcharResults() {
- super();
- }
-
- public HiveVarcharWritable getHiveVarcharWritable() {
- return hiveVarcharWritable;
- }
- }
-
- // Reading a VARCHAR field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different VARCHAR field.
- @Override
- public ReadHiveVarcharResults createReadHiveVarcharResults() {
- return new BinarySortableReadHiveVarcharResults();
- }
-
- public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException {
- BinarySortableReadHiveVarcharResults binarySortableReadHiveVarcharResults = (BinarySortableReadHiveVarcharResults) readHiveVarcharResults;
-
- if (!binarySortableReadHiveVarcharResults.isInit()) {
- binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]);
- }
-
- HiveVarcharWritable hiveVarcharWritable = binarySortableReadHiveVarcharResults.getHiveVarcharWritable();
-
- // Decode the bytes into our Text buffer, then truncate.
- BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveVarcharWritable.getTextValue());
- hiveVarcharWritable.enforceMaxLength(binarySortableReadHiveVarcharResults.getMaxLength());
-
- readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes();
- readHiveVarcharResults.start = 0;
- readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength();
- }
-
- /*
- * BINARY.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadBinaryResults extends ReadBinaryResults {
-
- // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable
- // format string into.
- private Text text;
-
- public BinarySortableReadBinaryResults() {
- super();
- text = new Text();
- }
- }
-
- // Reading a BINARY field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different bytes field.
- @Override
- public ReadBinaryResults createReadBinaryResults() {
- return new BinarySortableReadBinaryResults();
- }
-
- @Override
- public void readBinary(ReadBinaryResults readBinaryResults) throws IOException {
- BinarySortableReadBinaryResults binarySortableReadBinaryResults =
- (BinarySortableReadBinaryResults) readBinaryResults;
-
- BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadBinaryResults.text);
- readBinaryResults.bytes = binarySortableReadBinaryResults.text.getBytes();
- readBinaryResults.start = 0;
- readBinaryResults.length = binarySortableReadBinaryResults.text.getLength();
- }
-
- /*
- * DATE.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadDateResults extends ReadDateResults {
-
- public BinarySortableReadDateResults() {
- super();
- }
-
- public DateWritable getDateWritable() {
- return dateWritable;
- }
- }
-
- // Reading a DATE field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different DATE field.
- @Override
- public ReadDateResults createReadDateResults() {
- return new BinarySortableReadDateResults();
- }
-
- @Override
- public void readDate(ReadDateResults readDateResults) throws IOException {
- BinarySortableReadDateResults binarySortableReadDateResults = (BinarySortableReadDateResults) readDateResults;
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- int v = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 3; i++) {
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- DateWritable dateWritable = binarySortableReadDateResults.getDateWritable();
- dateWritable.set(v);
- }
-
- /*
- * TIMESTAMP.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadTimestampResults extends ReadTimestampResults {
-
- private byte[] timestampBytes;
-
- public BinarySortableReadTimestampResults() {
- super();
- timestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
- }
-
- public TimestampWritable getTimestampWritable() {
- return timestampWritable;
- }
- }
-
- // Reading a TIMESTAMP field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different TIMESTAMP field.
- @Override
- public ReadTimestampResults createReadTimestampResults() {
- return new BinarySortableReadTimestampResults();
- }
-
- @Override
- public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException {
- BinarySortableReadTimestampResults binarySortableReadTimestampResults = (BinarySortableReadTimestampResults) readTimestampResults;
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- byte[] timestampBytes = binarySortableReadTimestampResults.timestampBytes;
- for (int i = 0; i < timestampBytes.length; i++) {
- timestampBytes[i] = inputByteBuffer.read(invert);
- }
- TimestampWritable timestampWritable = binarySortableReadTimestampResults.getTimestampWritable();
- timestampWritable.setBinarySortable(timestampBytes, 0);
- }
-
- /*
- * INTERVAL_YEAR_MONTH.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadIntervalYearMonthResults extends ReadIntervalYearMonthResults {
-
- public BinarySortableReadIntervalYearMonthResults() {
- super();
- }
-
- public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() {
- return hiveIntervalYearMonthWritable;
- }
- }
-
- // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information.
- // A separate results object is created by the caller at initialization per different
- // INTERVAL_YEAR_MONTH field.
- @Override
- public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() {
- return new BinarySortableReadIntervalYearMonthResults();
- }
-
- @Override
- public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults)
- throws IOException {
- BinarySortableReadIntervalYearMonthResults binarySortableReadIntervalYearMonthResults =
- (BinarySortableReadIntervalYearMonthResults) readIntervalYearMonthResults;
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- int v = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 3; i++) {
- v = (v << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable =
- binarySortableReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable();
- hiveIntervalYearMonthWritable.set(v);
- }
-
- /*
- * INTERVAL_DAY_TIME.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadIntervalDayTimeResults extends ReadIntervalDayTimeResults {
-
- public BinarySortableReadIntervalDayTimeResults() {
- super();
- }
-
- public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() {
- return hiveIntervalDayTimeWritable;
- }
- }
-
- // Reading a INTERVAL_DAY_TIME field require a results object to receive value information.
- // A separate results object is created by the caller at initialization per different
- // INTERVAL_DAY_TIME field.
- @Override
- public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() {
- return new BinarySortableReadIntervalDayTimeResults();
- }
-
- @Override
- public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults)
- throws IOException {
- BinarySortableReadIntervalDayTimeResults binarySortableReadIntervalDayTimeResults =
- (BinarySortableReadIntervalDayTimeResults) readIntervalDayTimeResults;
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- long totalSecs = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 7; i++) {
- totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- int nanos = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 3; i++) {
- nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
- HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable =
- binarySortableReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable();
- hiveIntervalDayTimeWritable.set(totalSecs, nanos);
- }
-
- /*
- * DECIMAL.
- */
-
- // This class is for internal use.
- private static class BinarySortableReadDecimalResults extends ReadDecimalResults {
-
- public HiveDecimal hiveDecimal;
-
- public BinarySortableReadDecimalResults() {
- super();
- }
-
- @Override
- public void init(DecimalTypeInfo decimalTypeInfo) {
- super.init(decimalTypeInfo);
- }
-
- @Override
- public HiveDecimal getHiveDecimal() {
- return hiveDecimal;
- }
- }
-
- // Reading a DECIMAL field require a results object to receive value information. A separate
- // results object is created by the caller at initialization per different DECIMAL field.
- @Override
- public ReadDecimalResults createReadDecimalResults() {
- return new BinarySortableReadDecimalResults();
- }
-
- @Override
- public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException {
- BinarySortableReadDecimalResults binarySortableReadDecimalResults =
- (BinarySortableReadDecimalResults) readDecimalResults;
-
- if (!binarySortableReadDecimalResults.isInit()) {
- binarySortableReadDecimalResults.init(saveDecimalTypeInfo);
- }
-
- binarySortableReadDecimalResults.hiveDecimal = saveDecimal;
-
- saveDecimal = null;
- saveDecimalTypeInfo = null;
- }
-
- /**
- * We read the whole HiveDecimal value and then enforce precision and scale, which may
- * make it a NULL.
- * @return Returns true if this HiveDecimal enforced to a NULL.
- * @throws IOException
- */
- private boolean earlyReadHiveDecimal() throws IOException {
-
- // Since enforcing precision and scale can cause a HiveDecimal to become NULL,
- // we must read it, enforce it here, and either return NULL or buffer the result.
-
- final boolean invert = columnSortOrderIsDesc[fieldIndex];
- int b = inputByteBuffer.read(invert) - 1;
- assert (b == 1 || b == -1 || b == 0);
- boolean positive = b != -1;
-
- int factor = inputByteBuffer.read(invert) ^ 0x80;
- for (int i = 0; i < 3; i++) {
- factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff);
- }
-
- if (!positive) {
- factor = -factor;
- }
-
- int start = inputByteBuffer.tell();
- int length = 0;
-
- do {
- b = inputByteBuffer.read(positive ? invert : !invert);
- assert(b != 1);
-
- if (b == 0) {
- // end of digits
- break;
- }
-
- length++;
- } while (true);
-
- if(tempDecimalBuffer == null || tempDecimalBuffer.length < length) {
- tempDecimalBuffer = new byte[length];
- }
-
- inputByteBuffer.seek(start);
- for (int i = 0; i < length; ++i) {
- tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert);
- }
-
- // read the null byte again
- inputByteBuffer.read(positive ? invert : !invert);
-
- String digits = new String(tempDecimalBuffer, 0, length, BinarySortableSerDe.decimalCharSet);
- BigInteger bi = new BigInteger(digits);
- HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor-length);
-
- if (!positive) {
- bd = bd.negate();
- }
-
- // We have a decimal. After we enforce precision and scale, will it become a NULL?
-
- if (tempHiveDecimalWritable == null) {
- tempHiveDecimalWritable = new HiveDecimalWritable();
- }
- tempHiveDecimalWritable.set(bd);
-
- saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex];
-
- int precision = saveDecimalTypeInfo.getPrecision();
- int scale = saveDecimalTypeInfo.getScale();
-
- saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale);
- // Now return whether it is NULL or NOT NULL.
- return (saveDecimal == null);
+ private void doReadBeyondConfiguredFieldsWarned() {
+ // Warn only once.
+ LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but "
+ + " reading more (NULLs returned). Ignoring similar problems.");
+ readBeyondConfiguredFieldsWarned = true;
+ }
+
+ private void doReadBeyondBufferRangeWarned() {
+ // Warn only once.
+ int length = inputByteBuffer.tell() - start;
+ LOG.info("Reading beyond buffer range! Buffer range " + start
+ + " for length " + length + " but reading more... "
+ + "(total buffer length " + inputByteBuffer.getData().length + ")"
+ + " Ignoring similar problems.");
+ readBeyondBufferRangeWarned = true;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
index c2b0cfc..2fad2af 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
@@ -19,31 +19,22 @@
package org.apache.hadoop.hive.serde2.fast;
import java.io.IOException;
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
/*
* Directly deserialize with the caller reading field-by-field a serialization format.
- *
+ *
* The caller is responsible for calling the read method for the right type of each field
* (after calling readCheckNull).
- *
+ *
* Reading some fields require a results object to receive value information. A separate
* results object is created by the caller at initialization per different field even for the same
* type.
@@ -52,17 +43,88 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
* other type specific buffers. So, those references are only valid until the next time set is
* called.
*/
-public interface DeserializeRead {
+public abstract class DeserializeRead {
+
+ protected TypeInfo[] typeInfos;
+
+ protected boolean[] columnsToInclude;
+
+ protected Category[] categories;
+ protected PrimitiveCategory[] primitiveCategories;
+
+ public DeserializeRead(TypeInfo[] typeInfos) {
+ this.typeInfos = typeInfos;
+ final int count = typeInfos.length;
+ categories = new Category[count];
+ primitiveCategories = new PrimitiveCategory[count];
+ for (int i = 0; i < count; i++) {
+ TypeInfo typeInfo = typeInfos[i];
+ Category category = typeInfo.getCategory();
+ categories[i] = category;
+ if (category == Category.PRIMITIVE) {
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ primitiveCategories[i] = primitiveCategory;
+
+ switch (primitiveCategory) {
+ case DATE:
+ if (currentDateWritable == null) {
+ currentDateWritable = new DateWritable();
+ }
+ break;
+ case TIMESTAMP:
+ if (currentTimestampWritable == null) {
+ currentTimestampWritable = new TimestampWritable();
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ if (currentHiveIntervalYearMonthWritable == null) {
+ currentHiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ if (currentHiveIntervalDayTimeWritable == null) {
+ currentHiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
+ }
+ break;
+ case DECIMAL:
+ if (currentHiveDecimalWritable == null) {
+ currentHiveDecimalWritable = new HiveDecimalWritable();
+ }
+ break;
+ default:
+ // No writable needed for this data type.
+ }
+ }
+ }
+
+ columnsToInclude = null;
+ }
+
+ // Don't allow for public.
+ protected DeserializeRead() {
+ }
/*
* The type information for all fields.
*/
- TypeInfo[] typeInfos();
+ public TypeInfo[] typeInfos() {
+ return typeInfos;
+ }
+
+ /*
+ * If some fields are are not going to be used by the query, use this routine to specify
+ * the columns to return. The readCheckNull method will automatically return NULL for the
+ * other columns.
+ */
+ public void setColumnsToInclude(boolean[] columnsToInclude) {
+ this.columnsToInclude = columnsToInclude;
+ }
/*
* Set the range of bytes to be deserialized.
*/
- void set(byte[] bytes, int offset, int length);
+ public abstract void set(byte[] bytes, int offset, int length);
/*
* Reads the NULL information for a field.
@@ -70,318 +132,91 @@ public interface DeserializeRead {
* @return Return true when the field is NULL; reading is positioned to the next field.
* Otherwise, false when the field is NOT NULL; reading is positioned to the field data.
*/
- boolean readCheckNull() throws IOException;
+ public abstract boolean readCheckNull() throws IOException;
/*
* Call this method after all fields have been read to check for extra fields.
*/
- void extraFieldsCheck();
-
+ public abstract void extraFieldsCheck();
+
/*
* Read integrity warning flags.
*/
- boolean readBeyondConfiguredFieldsWarned();
- boolean readBeyondBufferRangeWarned();
- boolean bufferRangeHasExtraDataWarned();
+ public abstract boolean readBeyondConfiguredFieldsWarned();
+ public abstract boolean readBeyondBufferRangeWarned();
+ public abstract boolean bufferRangeHasExtraDataWarned();
+
+ /*
+ * These members hold the current value that was read when readCheckNull return false.
+ */
/*
* BOOLEAN.
*/
- boolean readBoolean() throws IOException;
+ public boolean currentBoolean;
/*
* BYTE.
*/
- byte readByte() throws IOException;
+ public byte currentByte;
/*
* SHORT.
*/
- short readShort() throws IOException;
+ public short currentShort;
/*
* INT.
*/
- int readInt() throws IOException;
+ public int currentInt;
/*
* LONG.
*/
- long readLong() throws IOException;
+ public long currentLong;
/*
* FLOAT.
*/
- float readFloat() throws IOException;
+ public float currentFloat;
/*
* DOUBLE.
*/
- double readDouble() throws IOException;
-
- /*
- * This class is the base abstract read bytes results for STRING, CHAR, VARCHAR, and BINARY.
- */
- public abstract class ReadBytesResults {
-
- public byte[] bytes;
- public int start;
- public int length;
-
- public ReadBytesResults() {
- bytes = null;
- start = 0;
- length = 0;
- }
- }
+ public double currentDouble;
/*
- * STRING.
+ * STRING, CHAR, VARCHAR, and BINARY.
*
- * Can be used to read CHAR and VARCHAR when the caller takes responsibility for
+ * For CHAR and VARCHAR when the caller takes responsibility for
* truncation/padding issues.
*/
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadStringResults extends ReadBytesResults {
-
- public ReadStringResults() {
- super();
- }
- }
-
- // Reading a STRING field require a results object to receive value information. A separate
- // results object is created at initialization per different bytes field.
- ReadStringResults createReadStringResults();
-
- void readString(ReadStringResults readStringResults) throws IOException;
-
- /*
- * CHAR.
- */
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadHiveCharResults extends ReadBytesResults {
-
- private CharTypeInfo charTypeInfo;
- private int maxLength;
-
- protected HiveCharWritable hiveCharWritable;
-
- public ReadHiveCharResults() {
- super();
- }
-
- public void init(CharTypeInfo charTypeInfo) {
- this.charTypeInfo = charTypeInfo;
- this.maxLength = charTypeInfo.getLength();
- hiveCharWritable = new HiveCharWritable();
- }
-
- public boolean isInit() {
- return (charTypeInfo != null);
- }
-
- public int getMaxLength() {
- return maxLength;
- }
-
- public HiveChar getHiveChar() {
- return hiveCharWritable.getHiveChar();
- }
- }
-
- // Reading a CHAR field require a results object to receive value information. A separate
- // results object is created at initialization per different CHAR field.
- ReadHiveCharResults createReadHiveCharResults();
-
- void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException;
-
- /*
- * VARCHAR.
- */
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadHiveVarcharResults extends ReadBytesResults {
-
- private VarcharTypeInfo varcharTypeInfo;
- private int maxLength;
-
- protected HiveVarcharWritable hiveVarcharWritable;
-
- public ReadHiveVarcharResults() {
- super();
- }
-
- public void init(VarcharTypeInfo varcharTypeInfo) {
- this.varcharTypeInfo = varcharTypeInfo;
- this.maxLength = varcharTypeInfo.getLength();
- hiveVarcharWritable = new HiveVarcharWritable();
- }
-
- public boolean isInit() {
- return (varcharTypeInfo != null);
- }
-
- public int getMaxLength() {
- return maxLength;
- }
-
- public HiveVarchar getHiveVarchar() {
- return hiveVarcharWritable.getHiveVarchar();
- }
- }
-
- // Reading a VARCHAR field require a results object to receive value information. A separate
- // results object is created at initialization per different VARCHAR field.
- ReadHiveVarcharResults createReadHiveVarcharResults();
-
- void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException;
-
- /*
- * BINARY.
- */
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadBinaryResults extends ReadBytesResults {
-
- public ReadBinaryResults() {
- super();
- }
- }
-
- // Reading a BINARY field require a results object to receive value information. A separate
- // results object is created at initialization per different bytes field.
- ReadBinaryResults createReadBinaryResults();
-
- void readBinary(ReadBinaryResults readBinaryResults) throws IOException;
+ public byte[] currentBytes;
+ public int currentBytesStart;
+ public int currentBytesLength;
/*
* DATE.
*/
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadDateResults {
-
- protected DateWritable dateWritable;
-
- public ReadDateResults() {
- dateWritable = new DateWritable();
- }
-
- public Date getDate() {
- return dateWritable.get();
- }
-
- public int getDays() {
- return dateWritable.getDays();
- }
- }
-
- // Reading a DATE field require a results object to receive value information. A separate
- // results object is created at initialization per different DATE field.
- ReadDateResults createReadDateResults();
-
- void readDate(ReadDateResults readDateResults) throws IOException;
+ public DateWritable currentDateWritable;
/*
* TIMESTAMP.
*/
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadTimestampResults {
-
- protected TimestampWritable timestampWritable;
-
- public ReadTimestampResults() {
- timestampWritable = new TimestampWritable();
- }
-
- public Timestamp getTimestamp() {
- return timestampWritable.getTimestamp();
- }
- }
-
- // Reading a TIMESTAMP field require a results object to receive value information. A separate
- // results object is created at initialization per different TIMESTAMP field.
- ReadTimestampResults createReadTimestampResults();
-
- void readTimestamp(ReadTimestampResults readTimestampResult) throws IOException;
+ public TimestampWritable currentTimestampWritable;
/*
* INTERVAL_YEAR_MONTH.
*/
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadIntervalYearMonthResults {
-
- protected HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable;
-
- public ReadIntervalYearMonthResults() {
- hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
- }
-
- public HiveIntervalYearMonth getHiveIntervalYearMonth() {
- return hiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
- }
- }
-
- // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information.
- // A separate results object is created at initialization per different INTERVAL_YEAR_MONTH field.
- ReadIntervalYearMonthResults createReadIntervalYearMonthResults();
-
- void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResult) throws IOException;
+ public HiveIntervalYearMonthWritable currentHiveIntervalYearMonthWritable;
/*
* INTERVAL_DAY_TIME.
*/
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadIntervalDayTimeResults {
-
- protected HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable;
-
- public ReadIntervalDayTimeResults() {
- hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
- }
-
- public HiveIntervalDayTime getHiveIntervalDayTime() {
- return hiveIntervalDayTimeWritable.getHiveIntervalDayTime();
- }
- }
-
- // Reading a INTERVAL_DAY_TIME field require a results object to receive value information.
- // A separate results object is created at initialization per different INTERVAL_DAY_TIME field.
- ReadIntervalDayTimeResults createReadIntervalDayTimeResults();
-
- void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResult) throws IOException;
+ public HiveIntervalDayTimeWritable currentHiveIntervalDayTimeWritable;
/*
* DECIMAL.
*/
-
- // This class is for abstract since each format may need its own specialization.
- public abstract class ReadDecimalResults {
-
- protected DecimalTypeInfo decimalTypeInfo;
-
- public ReadDecimalResults() {
- }
-
- public void init(DecimalTypeInfo decimalTypeInfo) {
- this.decimalTypeInfo = decimalTypeInfo;
- }
-
- public boolean isInit() {
- return (decimalTypeInfo != null);
- }
-
- public abstract HiveDecimal getHiveDecimal();
- }
-
- // Reading a DECIMAL field require a results object to receive value information. A separate
- // results object is created at initialization per different DECIMAL field.
- ReadDecimalResults createReadDecimalResults();
-
- void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException;
+ public HiveDecimalWritable currentHiveDecimalWritable;
}
\ No newline at end of file