You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2018/08/24 04:21:42 UTC
hive git commit: Working
Repository: hive
Updated Branches:
refs/heads/HIVE-20445 [created] a4ef8df46
Working
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4ef8df4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4ef8df4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4ef8df4
Branch: refs/heads/HIVE-20445
Commit: a4ef8df46a5b881dde9895aab7c60ff24185b10a
Parents: 611770d
Author: Teddy Choi <pu...@gmail.com>
Authored: Fri Aug 24 13:21:35 2018 +0900
Committer: Teddy Choi <pu...@gmail.com>
Committed: Fri Aug 24 13:21:35 2018 +0900
----------------------------------------------------------------------
.../ql/io/arrow/ArrowColumnarBatchSerDe.java | 17 +-
.../hadoop/hive/ql/io/arrow/Deserializer.java | 22 +-
.../hadoop/hive/ql/io/arrow/Serializer.java | 33 +--
.../ql/exec/vector/VectorRandomRowSource.java | 55 ++++-
.../io/arrow/TestArrowColumnarBatchSerDe.java | 213 +++++++------------
5 files changed, 134 insertions(+), 206 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
index ed82d2d..0dd959e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
@@ -233,17 +233,16 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe {
}
static ListColumnVector toStructListVector(MapColumnVector mapVector) {
- final StructColumnVector structVector;
- final ListColumnVector structListVector;
- structVector = new StructColumnVector();
- structVector.fields = new ColumnVector[] {mapVector.keys, mapVector.values};
- structListVector = new ListColumnVector();
- structListVector.child = structVector;
- structListVector.childCount = mapVector.childCount;
+ final StructColumnVector structVector =
+ new StructColumnVector(mapVector.childCount, mapVector.keys, mapVector.values);
+ final ListColumnVector structListVector =
+ new ListColumnVector(mapVector.isNull.length, structVector);
structListVector.isRepeating = mapVector.isRepeating;
structListVector.noNulls = mapVector.noNulls;
- System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, mapVector.childCount);
- System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, mapVector.childCount);
+
+ System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, mapVector.offsets.length);
+ System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, mapVector.lengths.length);
+ System.arraycopy(mapVector.isNull, 0, structListVector.isNull, 0, mapVector.isNull.length);
return structListVector;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
index edc4b39..51de786 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
@@ -136,15 +136,13 @@ class Deserializer {
case LIST:
readList(arrowVector, (ListColumnVector) hiveVector, (ListTypeInfo) typeInfo);
break;
- case MAP:
- readMap(arrowVector, (MapColumnVector) hiveVector, (MapTypeInfo) typeInfo);
- break;
case STRUCT:
readStruct(arrowVector, (StructColumnVector) hiveVector, (StructTypeInfo) typeInfo);
break;
case UNION:
readUnion(arrowVector, (UnionColumnVector) hiveVector, (UnionTypeInfo) typeInfo);
break;
+ case MAP:
default:
throw new IllegalArgumentException();
}
@@ -407,24 +405,6 @@ class Deserializer {
}
}
- private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo) {
- final int size = arrowVector.getValueCount();
- final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo);
- final ListColumnVector mapStructListVector = toStructListVector(hiveVector);
- final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child;
-
- read(arrowVector, mapStructListVector, mapStructListTypeInfo);
-
- hiveVector.isRepeating = mapStructListVector.isRepeating;
- hiveVector.childCount = mapStructListVector.childCount;
- hiveVector.noNulls = mapStructListVector.noNulls;
- hiveVector.keys = mapStructVector.fields[0];
- hiveVector.values = mapStructVector.fields[1];
- System.arraycopy(mapStructListVector.offsets, 0, hiveVector.offsets, 0, size);
- System.arraycopy(mapStructListVector.lengths, 0, hiveVector.lengths, 0, size);
- System.arraycopy(mapStructListVector.isNull, 0, hiveVector.isNull, 0, size);
- }
-
private void readStruct(FieldVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo) {
final int size = arrowVector.getValueCount();
final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index 08e0fb2..0add293 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -181,6 +181,8 @@ public class Serializer {
public ArrowWrapperWritable serializeBatch(VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
rootVector.setValueCount(0);
+ final int size = isNative ? vectorizedRowBatch.size : batchSize;
+
for (int fieldIndex = 0; fieldIndex < vectorizedRowBatch.projectionSize; fieldIndex++) {
final int projectedColumn = vectorizedRowBatch.projectedColumns[fieldIndex];
final ColumnVector hiveVector = vectorizedRowBatch.cols[projectedColumn];
@@ -195,20 +197,18 @@ public class Serializer {
}
final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class);
if(fieldExists) {
- arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize);
+ arrowVector.setValueCount(size);
} else {
- arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize);
+ arrowVector.setInitialCapacity(size);
arrowVector.allocateNew();
}
- write(arrowVector, hiveVector, fieldTypeInfo, isNative ? vectorizedRowBatch.size : batchSize, vectorizedRowBatch, isNative);
+ write(arrowVector, hiveVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
}
if(!isNative) {
//Only mutate batches that are constructed by this serde
vectorizedRowBatch.reset();
- rootVector.setValueCount(batchSize);
- } else {
- rootVector.setValueCount(vectorizedRowBatch.size);
}
+ rootVector.setValueCount(size);
batchSize = 0;
VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector);
@@ -266,7 +266,6 @@ public class Serializer {
case STRUCT:
return ArrowType.Struct.INSTANCE;
case MAP:
- return ArrowType.List.INSTANCE;
case UNION:
default:
throw new IllegalArgumentException();
@@ -288,31 +287,11 @@ public class Serializer {
case UNION:
writeUnion(arrowVector, hiveVector, typeInfo, size, vectorizedRowBatch, isNative);
break;
- case MAP:
- writeMap((ListVector) arrowVector, (MapColumnVector) hiveVector, (MapTypeInfo) typeInfo, size, vectorizedRowBatch, isNative);
- break;
default:
throw new IllegalArgumentException();
}
}
- private static void writeMap(ListVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo,
- int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
- final ListTypeInfo structListTypeInfo = toStructListTypeInfo(typeInfo);
- final ListColumnVector structListVector = toStructListVector(hiveVector);
-
- write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative);
-
- final ArrowBuf validityBuffer = arrowVector.getValidityBuffer();
- for (int rowIndex = 0; rowIndex < size; rowIndex++) {
- if (hiveVector.isNull[rowIndex]) {
- BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0);
- } else {
- BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex);
- }
- }
- }
-
private static void writeUnion(FieldVector arrowVector, ColumnVector hiveVector, TypeInfo typeInfo,
int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index af73ee6..5b3ca43 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -40,7 +40,6 @@ import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.serde2.RandomTypeUtil;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -86,7 +85,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hive.common.util.DateUtils;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
@@ -286,7 +284,7 @@ public class VectorRandomRowSource {
}
public enum SupportedTypes {
- ALL, PRIMITIVES, ALL_EXCEPT_MAP
+ ALL, PRIMITIVES, ALL_EXCEPT_MAP, ALL_EXCEPT_MAP_UNION
}
public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth) {
@@ -363,6 +361,17 @@ public class VectorRandomRowSource {
"map"
};
+ private static String[] possibleHiveComplexTypeNamesWithoutMap = {
+ "array",
+ "struct",
+ "uniontype"
+ };
+
+ private static String[] possibleHiveComplexTypeNamesWithoutMapUnion = {
+ "array",
+ "struct"
+ };
+
public static String getRandomTypeName(Random random, SupportedTypes supportedTypes,
Set<String> allowedTypeNameSet) {
@@ -376,7 +385,12 @@ public class VectorRandomRowSource {
typeName = possibleHivePrimitiveTypeNames[random.nextInt(possibleHivePrimitiveTypeNames.length)];
break;
case ALL_EXCEPT_MAP:
- typeName = possibleHiveComplexTypeNames[random.nextInt(possibleHiveComplexTypeNames.length - 1)];
+ typeName = possibleHiveComplexTypeNamesWithoutMap[random.nextInt(
+ possibleHiveComplexTypeNamesWithoutMap.length)];
+ break;
+ case ALL_EXCEPT_MAP_UNION:
+ typeName = possibleHiveComplexTypeNamesWithoutMapUnion[random.nextInt(
+ possibleHiveComplexTypeNamesWithoutMapUnion.length)];
break;
case ALL:
typeName = possibleHiveComplexTypeNames[random.nextInt(possibleHiveComplexTypeNames.length)];
@@ -583,10 +597,16 @@ public class VectorRandomRowSource {
if (allTypes) {
switch (supportedTypes) {
case ALL:
- columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
+ columnCount = possibleHivePrimitiveTypeNames.length +
+ possibleHiveComplexTypeNames.length;
break;
case ALL_EXCEPT_MAP:
- columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
+ columnCount = possibleHivePrimitiveTypeNames.length +
+ possibleHiveComplexTypeNamesWithoutMap.length;
+ break;
+ case ALL_EXCEPT_MAP_UNION:
+ columnCount = possibleHivePrimitiveTypeNames.length +
+ possibleHiveComplexTypeNamesWithoutMapUnion.length;
break;
case PRIMITIVES:
columnCount = possibleHivePrimitiveTypeNames.length;
@@ -627,7 +647,10 @@ public class VectorRandomRowSource {
maxTypeNum = possibleHivePrimitiveTypeNames.length;
break;
case ALL_EXCEPT_MAP:
- maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
+ maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNamesWithoutMap.length - 1;
+ break;
+ case ALL_EXCEPT_MAP_UNION:
+ maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNamesWithoutMapUnion.length;
break;
case ALL:
maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
@@ -656,9 +679,23 @@ public class VectorRandomRowSource {
if (typeNum < possibleHivePrimitiveTypeNames.length) {
typeName = possibleHivePrimitiveTypeNames[typeNum];
} else {
- typeName = possibleHiveComplexTypeNames[typeNum - possibleHivePrimitiveTypeNames.length];
+ switch (supportedTypes) {
+ case ALL:
+ typeName = possibleHiveComplexTypeNames[typeNum -
+ possibleHivePrimitiveTypeNames.length];
+ break;
+ case ALL_EXCEPT_MAP:
+ typeName = possibleHiveComplexTypeNamesWithoutMap[typeNum -
+ possibleHivePrimitiveTypeNames.length];
+ break;
+ case ALL_EXCEPT_MAP_UNION:
+ typeName = possibleHiveComplexTypeNamesWithoutMapUnion[typeNum -
+ possibleHivePrimitiveTypeNames.length];
+ break;
+ default:
+ throw new IllegalArgumentException();
+ }
}
-
}
String decoratedTypeName =
http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
index c9a5812..057f2ad 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -58,6 +59,9 @@ import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Test;
+import java.lang.reflect.Array;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -204,11 +208,30 @@ public class TestArrowColumnarBatchSerDe {
serializeAndDeserialize(serDe, rows, rowOI);
}
+ private StructObjectInspector initSerDe(AbstractSerDe serDe, TypeInfo[] typeInfos)
+ throws SerDeException {
+ List<String> fieldNameList = new ArrayList<>();
+ List<String> fieldTypeList = new ArrayList<>();
+
+ for (int i = 0; i < typeInfos.length; i++) {
+ fieldNameList.add("col" + i);
+ fieldTypeList.add(typeInfos[i].getTypeName());
+ }
+
+ Properties schemaProperties = new Properties();
+ schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(fieldNameList));
+ schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+ Joiner.on(',').join(fieldTypeList));
+ SerDeUtils.initializeSerDe(serDe, conf, schemaProperties, null);
+ return (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ TypeInfoFactory.getStructTypeInfo(fieldNameList, Arrays.asList(typeInfos)));
+ }
+
private StructObjectInspector initSerDe(AbstractSerDe serDe, String[][] schema)
throws SerDeException {
- List<String> fieldNameList = newArrayList();
- List<String> fieldTypeList = newArrayList();
- List<TypeInfo> typeInfoList = newArrayList();
+ List<String> fieldNameList = new ArrayList<>();
+ List<String> fieldTypeList = new ArrayList<>();
+ List<TypeInfo> typeInfoList = new ArrayList<>();
for (String[] nameAndType : schema) {
String name = nameAndType[0];
@@ -218,12 +241,10 @@ public class TestArrowColumnarBatchSerDe {
typeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(type));
}
- String fieldNames = Joiner.on(',').join(fieldNameList);
- String fieldTypes = Joiner.on(',').join(fieldTypeList);
-
Properties schemaProperties = new Properties();
- schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
- schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
+ schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(fieldNameList));
+ schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+ Joiner.on(',').join(fieldTypeList));
SerDeUtils.initializeSerDe(serDe, conf, schemaProperties, null);
return (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
TypeInfoFactory.getStructTypeInfo(fieldNameList, typeInfoList));
@@ -251,6 +272,9 @@ public class TestArrowColumnarBatchSerDe {
for (int fieldIndex = 0; fieldIndex < fields.size(); fieldIndex++) {
final StructField field = fields.get(fieldIndex);
final ObjectInspector fieldObjInspector = field.getFieldObjectInspector();
+ if (row[fieldIndex] == null && deserializedRow[fieldIndex] == null) {
+ continue;
+ }
switch (fieldObjInspector.getCategory()) {
case PRIMITIVE:
final PrimitiveObjectInspector primitiveObjInspector =
@@ -262,13 +286,38 @@ public class TestArrowColumnarBatchSerDe {
assertEquals(Objects.toString(row[fieldIndex]),
Objects.toString(deserializedRow[fieldIndex]));
break;
+ case TIMESTAMP: {
+ TimestampWritableV2 source = (TimestampWritableV2) row[fieldIndex];
+ TimestampWritableV2 deserialized =
+ (TimestampWritableV2) deserializedRow[fieldIndex];
+ long sourceMilli = source.getTimestamp().toEpochMilli();
+ long deserializedMilli = deserialized.getTimestamp().toEpochMilli();
+ assertEquals(sourceMilli, deserializedMilli);
+ break;
+ }
+ case INTERVAL_DAY_TIME: {
+ HiveIntervalDayTimeWritable source =
+ (HiveIntervalDayTimeWritable) row[fieldIndex];
+ HiveIntervalDayTimeWritable deserialized =
+ (HiveIntervalDayTimeWritable) deserializedRow[fieldIndex];
+ assertEquals(source.getHiveIntervalDayTime().getTotalSeconds(),
+ deserialized.getHiveIntervalDayTime().getTotalSeconds());
+ assertEquals(source.getHiveIntervalDayTime().getNanos() / 1_000_000,
+ deserialized.getHiveIntervalDayTime().getNanos() / 1_000_000);
+ break;
+ }
default:
assertEquals(row[fieldIndex], deserializedRow[fieldIndex]);
break;
}
break;
case STRUCT:
- final Object[] rowStruct = (Object[]) row[fieldIndex];
+ final Object[] rowStruct;
+ if (row[fieldIndex] instanceof ArrayList) {
+ rowStruct = ((ArrayList) row[fieldIndex]).toArray();
+ } else {
+ rowStruct = (Object[]) row[fieldIndex];
+ }
final List deserializedRowStruct = (List) deserializedRow[fieldIndex];
if (rowStruct == null) {
assertNull(deserializedRowStruct);
@@ -280,26 +329,26 @@ public class TestArrowColumnarBatchSerDe {
case UNION:
assertEquals(row[fieldIndex], deserializedRow[fieldIndex]);
break;
- case MAP:
- final Map rowMap = (Map) row[fieldIndex];
- final Map deserializedRowMap = (Map) deserializedRow[fieldIndex];
- if (rowMap == null) {
- assertNull(deserializedRowMap);
- } else {
- final Set rowMapKeySet = rowMap.keySet();
- final Set deserializedRowMapKeySet = deserializedRowMap.keySet();
- assertEquals(rowMapKeySet, deserializedRowMapKeySet);
- for (Object key : rowMapKeySet) {
- assertEquals(rowMap.get(key), deserializedRowMap.get(key));
- }
- }
- break;
}
}
}
}
@Test
+ public void testRandom() throws SerDeException {
+ Random random = new Random(3);
+ for (int i = 0; i < 1; i++) {
+ VectorRandomRowSource source = new VectorRandomRowSource();
+ source.init(random, VectorRandomRowSource.SupportedTypes.ALL_EXCEPT_MAP_UNION, 0);
+ Object[][] rows = source.randomRows(100);
+
+ ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe();
+ StructObjectInspector structObjectInspector = initSerDe(serDe, source.typeInfos());
+ serializeAndDeserialize(serDe, rows, structObjectInspector);
+ }
+ }
+
+ @Test
public void testComprehensive() throws SerDeException {
String[][] schema = {
{"datatypes.c1", "int"},
@@ -307,17 +356,13 @@ public class TestArrowColumnarBatchSerDe {
{"datatypes.c3", "double"},
{"datatypes.c4", "string"},
{"datatypes.c5", "array<int>"},
- {"datatypes.c6", "map<int,string>"},
- {"datatypes.c7", "map<string,string>"},
{"datatypes.c8", "struct<r:string,s:int,t:double>"},
{"datatypes.c9", "tinyint"},
{"datatypes.c10", "smallint"},
{"datatypes.c11", "float"},
{"datatypes.c12", "bigint"},
{"datatypes.c13", "array<array<string>>"},
- {"datatypes.c14", "map<int,map<int,int>>"},
{"datatypes.c15", "struct<r:int,s:struct<a:int,b:string>>"},
- {"datatypes.c16", "array<struct<m:map<string,string>,n:int>>"},
{"datatypes.c17", "timestamp"},
{"datatypes.c18", "decimal(16,7)"},
{"datatypes.c19", "binary"},
@@ -334,12 +379,6 @@ public class TestArrowColumnarBatchSerDe {
doubleW(0), // c3:double
text("Hello"), // c4:string
newArrayList(intW(0), intW(1), intW(2)), // c5:array<int>
- Maps.toMap(
- newArrayList(intW(0), intW(1), intW(2)),
- input -> text("Number " + input)), // c6:map<int,string>
- Maps.toMap(
- newArrayList(text("apple"), text("banana"), text("carrot")),
- input -> text(input.toString().toUpperCase())), // c7:map<string,string>
new Object[] {text("0"), intW(1), doubleW(2)}, // c8:struct<r:string,s:int,t:double>
byteW(0), // c9:tinyint
shortW(0), // c10:smallint
@@ -348,22 +387,11 @@ public class TestArrowColumnarBatchSerDe {
newArrayList(
newArrayList(text("a"), text("b"), text("c")),
newArrayList(text("A"), text("B"), text("C"))), // c13:array<array<string>>
- Maps.toMap(
- newArrayList(intW(0), intW(1), intW(2)),
- x -> Maps.toMap(
- newArrayList(x, intW(x.get() * 2)),
- y -> y)), // c14:map<int,map<int,int>>
new Object[] {
intW(0),
newArrayList(
intW(1),
text("Hello"))}, // c15:struct<r:int,s:struct<a:int,b:string>>
- Collections.singletonList(
- newArrayList(
- Maps.toMap(
- newArrayList(text("hello")),
- input -> text(input.toString().toUpperCase())),
- intW(0))), // c16:array<struct<m:map<string,string>,n:int>>
new TimestampWritableV2(TIMESTAMP), // c17:timestamp
decimalW(HiveDecimal.create(0, 0)), // c18:decimal(16,7)
new BytesWritable("Hello".getBytes()), // c19:binary
@@ -373,8 +401,7 @@ public class TestArrowColumnarBatchSerDe {
new BytesWritable("world!".getBytes()), // c23:binary
}, {
null, null, null, null, null, null, null, null, null, null, // c1-c10
- null, null, null, null, null, null, null, null, null, null, // c11-c20
- null, null, null, // c21-c23
+ null, null, null, null, null, null, null, null, null, // c21-c23
}
};
@@ -690,98 +717,4 @@ public class TestArrowColumnarBatchSerDe {
initAndSerializeAndDeserialize(schema, toStruct(BINARY_ROWS));
}
-
- private Object[][] toMap(Object[][] rows) {
- Map[][] array = new Map[rows.length][];
- for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) {
- Object[] row = rows[rowIndex];
- array[rowIndex] = new Map[row.length];
- for (int fieldIndex = 0; fieldIndex < row.length; fieldIndex++) {
- Map map = Maps.newHashMap();
- map.put(new Text(String.valueOf(row[fieldIndex])), row[fieldIndex]);
- array[rowIndex][fieldIndex] = map;
- }
- }
- return array;
- }
-
- @Test
- public void testMapInteger() throws SerDeException {
- String[][] schema = {
- {"tinyint_map", "map<string,tinyint>"},
- {"smallint_map", "map<string,smallint>"},
- {"int_map", "map<string,int>"},
- {"bigint_map", "map<string,bigint>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(INTEGER_ROWS));
- }
-
- @Test
- public void testMapFloat() throws SerDeException {
- String[][] schema = {
- {"float_map", "map<string,float>"},
- {"double_map", "map<string,double>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(FLOAT_ROWS));
- }
-
- @Test
- public void testMapString() throws SerDeException {
- String[][] schema = {
- {"string_map", "map<string,string>"},
- {"char_map", "map<string,char(10)>"},
- {"varchar_map", "map<string,varchar(10)>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(STRING_ROWS));
- }
-
- @Test
- public void testMapDTI() throws SerDeException {
- String[][] schema = {
- {"date_map", "map<string,date>"},
- {"timestamp_map", "map<string,timestamp>"},
- {"interval_year_month_map", "map<string,interval_year_month>"},
- {"interval_day_time_map", "map<string,interval_day_time>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(DTI_ROWS));
- }
-
- @Test
- public void testMapBoolean() throws SerDeException {
- String[][] schema = {
- {"boolean_map", "map<string,boolean>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(BOOLEAN_ROWS));
- }
-
- @Test
- public void testMapBinary() throws SerDeException {
- String[][] schema = {
- {"binary_map", "map<string,binary>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(BINARY_ROWS));
- }
-
- public void testMapDecimal() throws SerDeException {
- String[][] schema = {
- {"decimal_map", "map<string,decimal(38,10)>"},
- };
-
- initAndSerializeAndDeserialize(schema, toMap(DECIMAL_ROWS));
- }
-
- public void testListDecimal() throws SerDeException {
- String[][] schema = {
- {"decimal_list", "array<decimal(38,10)>"},
- };
-
- initAndSerializeAndDeserialize(schema, toList(DECIMAL_ROWS));
- }
-
}