You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ga...@apache.org on 2017/12/18 23:25:57 UTC
[14/50] [abbrv] hive git commit: HIVE-18209: Fix API call in
VectorizedListColumnReader to get value from BytesColumnVector (Colin Ma,
reviewed by Ferdinand Xu)
HIVE-18209: Fix API call in VectorizedListColumnReader to get value from BytesColumnVector (Colin Ma, reviewed by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/11227eba
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/11227eba
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/11227eba
Branch: refs/heads/standalone-metastore
Commit: 11227ebab390df10970fb8ef61f3e24421d6c66e
Parents: 7acc4ce
Author: Ferdinand Xu <ch...@intel.com>
Authored: Mon Dec 18 10:01:13 2017 +0800
Committer: Ferdinand Xu <ch...@intel.com>
Committed: Mon Dec 18 10:01:13 2017 +0800
----------------------------------------------------------------------
.../vector/VectorizedListColumnReader.java | 3 +-
.../parquet/TestVectorizedListColumnReader.java | 34 +++++++++++++++++++-
.../parquet/VectorizedColumnReaderTestBase.java | 1 +
3 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
index ea4f2f2..12af77c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
@@ -258,7 +258,8 @@ public class VectorizedListColumnReader extends BaseVectorizedColumnReader {
lcv.child = new BytesColumnVector(total);
lcv.child.init();
for (int i = 0; i < valueList.size(); i++) {
- ((BytesColumnVector)lcv.child).setVal(i, ((List<byte[]>)valueList).get(i));
+ byte[] src = ((List<byte[]>)valueList).get(i);
+ ((BytesColumnVector)lcv.child).setRef(i, src, 0, src.length);
}
break;
case FLOAT:
http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
index de19615..8ea5d25 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
@@ -72,8 +72,9 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
}
}
for (int j = 0; j < listMaxSize; j++) {
- group.append("list_int32_field_for_repeat_test", getIntValue(isDictionaryEncoding, j));
+ group.append("list_binary_field_for_repeat_test", getBinaryValue(isDictionaryEncoding, i));
}
+
writer.write(group);
}
writer.close();
@@ -157,6 +158,14 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
removeFile();
}
+ @Test
+ public void testUnrepeatedStringWithoutNullListRead() throws Exception {
+ removeFile();
+ writeListData(initWriterFromFile(), false, 1025);
+ testUnRepeateStringWithoutNullListRead();
+ removeFile();
+ }
+
private void testListReadAllType(boolean isDictionaryEncoding, int elementNum) throws Exception {
testListRead(isDictionaryEncoding, "int", elementNum);
testListRead(isDictionaryEncoding, "long", elementNum);
@@ -250,6 +259,10 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
try {
while (reader.next(NullWritable.get(), previous)) {
ListColumnVector vector = (ListColumnVector) previous.cols[0];
+
+ //since Repeating only happens when offset length is 1.
+ assertEquals((vector.offsets.length == 1),vector.isRepeating);
+
for (int i = 0; i < vector.offsets.length; i++) {
if (row == elementNum) {
assertEquals(i, vector.offsets.length - 1);
@@ -305,4 +318,23 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
reader.close();
}
}
+
+ private void testUnRepeateStringWithoutNullListRead() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(IOConstants.COLUMNS, "list_binary_field_for_repeat_test");
+ conf.set(IOConstants.COLUMNS_TYPES, "array<string>");
+ conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+ VectorizedParquetRecordReader reader = createTestParquetReader(
+ "message hive_schema {repeated binary list_binary_field_for_repeat_test;}", conf);
+ VectorizedRowBatch previous = reader.createValue();
+ try {
+ while (reader.next(NullWritable.get(), previous)) {
+ ListColumnVector vector = (ListColumnVector) previous.cols[0];
+ assertEquals((vector.offsets.length == 1),vector.isRepeating);
+ }
+ } finally {
+ reader.close();
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
index 33c5c82..db7777d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
@@ -124,6 +124,7 @@ public class VectorizedColumnReaderTestBase {
+ "repeated fixed_len_byte_array(3) list_byte_array_field;"
+ "repeated binary list_binary_field;"
+ "repeated binary list_decimal_field (DECIMAL(5,2));"
+ + "repeated binary list_binary_field_for_repeat_test;"
+ "repeated int32 list_int32_field_for_repeat_test;"
+ "repeated group map_int32 (MAP_KEY_VALUE) {\n"
+ " required int32 key;\n"