You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/05/09 01:18:18 UTC
hive git commit: HIVE-16568: Support complex types in external LLAP
InputFormat (Jason Dere, reviewed by Prasanth Jayachandran)
Repository: hive
Updated Branches:
refs/heads/master 5791cdd89 -> db7b57cd9
HIVE-16568: Support complex types in external LLAP InputFormat (Jason Dere, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/db7b57cd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/db7b57cd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/db7b57cd
Branch: refs/heads/master
Commit: db7b57cd9c99e874a9d6f43ecacaadac3b360f2d
Parents: 5791cdd
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon May 8 18:03:24 2017 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon May 8 18:03:24 2017 -0700
----------------------------------------------------------------------
.../hive/llap/ext/TestLlapInputSplit.java | 6 +-
.../apache/hive/jdbc/TestJdbcWithMiniLlap.java | 160 ++++++++++++
.../hadoop/hive/llap/LlapRowRecordReader.java | 126 ++++++++--
.../org/apache/hadoop/hive/llap/FieldDesc.java | 20 +-
.../java/org/apache/hadoop/hive/llap/Row.java | 252 +++++++++++--------
.../org/apache/hadoop/hive/llap/TypeDesc.java | 108 --------
.../org/apache/hadoop/hive/llap/TestRow.java | 15 +-
.../ql/udf/generic/GenericUDTFGetSplits.java | 71 +-----
.../hive/ql/io/orc/TestInputOutputFormat.java | 1 -
9 files changed, 426 insertions(+), 333 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java
index 654e92b..a154349 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java
@@ -27,7 +27,7 @@ import java.util.ArrayList;
import org.apache.hadoop.hive.llap.LlapInputSplit;
import org.apache.hadoop.hive.llap.Schema;
import org.apache.hadoop.hive.llap.FieldDesc;
-import org.apache.hadoop.hive.llap.TypeDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.mapred.SplitLocationInfo;
import org.junit.Test;
@@ -46,8 +46,8 @@ public class TestLlapInputSplit {
};
ArrayList<FieldDesc> colDescs = new ArrayList<FieldDesc>();
- colDescs.add(new FieldDesc("col1", new TypeDesc(TypeDesc.Type.STRING)));
- colDescs.add(new FieldDesc("col2", new TypeDesc(TypeDesc.Type.INT)));
+ colDescs.add(new FieldDesc("col1", TypeInfoFactory.stringTypeInfo));
+ colDescs.add(new FieldDesc("col2", TypeInfoFactory.intTypeInfo));
Schema schema = new Schema(colDescs);
byte[] tokenBytes = new byte[] { 1 };
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java
index de47412..4cc9045 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.llap.FieldDesc;
import org.apache.hadoop.hive.llap.LlapRowRecordReader;
import org.apache.hadoop.hive.llap.Row;
import org.apache.hadoop.hive.llap.Schema;
@@ -72,6 +73,12 @@ import org.apache.hive.jdbc.miniHS2.MiniHS2;
import org.apache.hive.jdbc.miniHS2.MiniHS2.MiniClusterType;
import org.apache.hadoop.hive.llap.LlapBaseInputFormat;
import org.apache.hadoop.hive.llap.LlapRowInputFormat;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.datanucleus.ClassLoaderResolver;
import org.datanucleus.NucleusContext;
@@ -160,6 +167,40 @@ public class TestJdbcWithMiniLlap {
stmt.close();
}
+ private void createTableWithComplexTypes(String tableName) throws Exception {
+ Statement stmt = hs2Conn.createStatement();
+
+ // create table
+ stmt.execute("DROP TABLE IF EXISTS " + tableName);
+ stmt.execute("CREATE TABLE " + tableName
+ + " (c0 int, c1 array<int>, c2 map<int, string>, c3 struct<f1:int, f2:string, f3:array<int>>, c4 array<struct<f1:int, f2:string, f3:array<int>>>)");
+
+ // load data
+ stmt.execute("insert into " + tableName
+ + " select 1"
+ + ", array(1, 2, 3)"
+ + ", map(1, 'one', 2, 'two')"
+ + ", named_struct('f1', 1, 'f2', 'two', 'f3', array(1,2,3))"
+ + ", array(named_struct('f1', 11, 'f2', 'two', 'f3', array(2,3,4)))");
+
+ // Inserting nulls into complex columns doesn't work without this CASE workaround - what a hack.
+ stmt.execute("insert into " + tableName
+ + " select 2"
+ + ", case when 2 = 2 then null else array(1, 2, 3) end"
+ + ", case when 2 = 2 then null else map(1, 'one', 2, 'two') end"
+ + ", case when 2 = 2 then null else named_struct('f1', 1, 'f2', 'two', 'f3', array(1,2,3)) end"
+ + ", case when 2 = 2 then null else array(named_struct('f1', 11, 'f2', 'two', 'f3', array(2,3,4))) end");
+
+ // TODO: test nested nulls in complex types. Currently blocked by HIVE-16587.
+ //stmt.execute("insert into " + tableName
+ // + " select 3"
+ // + ", array(1, 2, null)"
+ // + ", map(1, 'one', 2, null)"
+ // + ", named_struct('f1', cast(null as int), 'f2', cast(null as string), 'f3', array(1,2,null))"
+ // + ", array(named_struct('f1', 11, 'f2', 'two', 'f3', array(2,3,4)))");
+ stmt.close();
+ }
+
@Test(timeout = 60000)
public void testLlapInputFormatEndToEnd() throws Exception {
createTestTable("testtab1");
@@ -212,6 +253,105 @@ public class TestJdbcWithMiniLlap {
assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(2));
}
+ @Test(timeout = 60000)
+ public void testComplexTypes() throws Exception {
+ createTableWithComplexTypes("complex1");
+ RowCollector2 rowCollector = new RowCollector2();
+ String query = "select * from complex1";
+ int rowCount = processQuery(query, 1, rowCollector);
+ assertEquals(2, rowCount);
+
+ // Verify schema
+ FieldDesc c0Desc = rowCollector.schema.getColumns().get(0);
+ assertEquals("complex1.c0", c0Desc.getName());
+ assertEquals("int", c0Desc.getTypeInfo().getTypeName());
+
+ FieldDesc c1Desc = rowCollector.schema.getColumns().get(1);
+ assertEquals("complex1.c1", c1Desc.getName());
+ assertEquals("array<int>", c1Desc.getTypeInfo().getTypeName());
+
+ FieldDesc c2Desc = rowCollector.schema.getColumns().get(2);
+ assertEquals("complex1.c2", c2Desc.getName());
+ assertEquals("map<int,string>", c2Desc.getTypeInfo().getTypeName());
+
+ FieldDesc c3Desc = rowCollector.schema.getColumns().get(3);
+ assertEquals("complex1.c3", c3Desc.getName());
+ assertEquals(Category.STRUCT, c3Desc.getTypeInfo().getCategory());
+ verifyStructFieldSchema((StructTypeInfo) c3Desc.getTypeInfo());
+
+ FieldDesc c4Desc = rowCollector.schema.getColumns().get(4);
+ assertEquals("complex1.c4", c4Desc.getName());
+ assertEquals(Category.LIST, c4Desc.getTypeInfo().getCategory());
+ TypeInfo c4ElementType = ((ListTypeInfo) c4Desc.getTypeInfo()).getListElementTypeInfo();
+ assertEquals(Category.STRUCT, c4ElementType.getCategory());
+ verifyStructFieldSchema((StructTypeInfo) c4ElementType);
+
+ // First row
+ Object[] rowValues = rowCollector.rows.get(0);
+ assertEquals(Integer.valueOf(1), ((Integer) rowValues[0]));
+
+ // assertEquals("[1, 2, 3]", rowValues[1]);
+ List<?> c1Value = (List<?>) rowValues[1];
+ assertEquals(3, c1Value.size());
+ assertEquals(Integer.valueOf(1), c1Value.get(0));
+ assertEquals(Integer.valueOf(2), c1Value.get(1));
+ assertEquals(Integer.valueOf(3), c1Value.get(2));
+
+ // assertEquals("{1=one, 2=two}", rowValues[2]);
+ Map<?,?> c2Value = (Map<?,?>) rowValues[2];
+ assertEquals(2, c2Value.size());
+ assertEquals("one", c2Value.get(Integer.valueOf(1)));
+ assertEquals("two", c2Value.get(Integer.valueOf(2)));
+
+ // assertEquals("[1, two, [1, 2, 3]]", rowValues[3]);
+ List<?> c3Value = (List<?>) rowValues[3];
+ assertEquals(Integer.valueOf(1), c3Value.get(0));
+ assertEquals("two", c3Value.get(1));
+ List<?> f3Value = (List<?>) c3Value.get(2);
+ assertEquals(Integer.valueOf(1), f3Value.get(0));
+ assertEquals(Integer.valueOf(2), f3Value.get(1));
+ assertEquals(Integer.valueOf(3), f3Value.get(2));
+
+ // assertEquals("[[11, two, [2, 3, 4]]]", rowValues[4]);
+ List<?> c4Value = (List<?>) rowValues[4];
+ assertEquals(1, c4Value.size());
+ List<?> c4Element = (List<?>) c4Value.get(0);
+ assertEquals(Integer.valueOf(11), c4Element.get(0));
+ assertEquals("two", c4Element.get(1));
+ f3Value = (List<?>) c4Element.get(2);
+ assertEquals(3, f3Value.size());
+ assertEquals(Integer.valueOf(2), f3Value.get(0));
+ assertEquals(Integer.valueOf(3), f3Value.get(1));
+ assertEquals(Integer.valueOf(4), f3Value.get(2));
+
+ // Second row
+ rowValues = rowCollector.rows.get(1);
+ assertEquals(Integer.valueOf(2), ((Integer) rowValues[0]));
+ assertEquals(null, rowValues[1]);
+ assertEquals(null, rowValues[2]);
+ assertEquals(null, rowValues[3]);
+ assertEquals(null, rowValues[4]);
+ }
+
+ private void verifyStructFieldSchema(StructTypeInfo structType) {
+ assertEquals("f1", structType.getAllStructFieldNames().get(0));
+ TypeInfo f1Type = structType.getStructFieldTypeInfo("f1");
+ assertEquals(Category.PRIMITIVE, f1Type.getCategory());
+ assertEquals(PrimitiveCategory.INT, ((PrimitiveTypeInfo) f1Type).getPrimitiveCategory());
+
+ assertEquals("f2", structType.getAllStructFieldNames().get(1));
+ TypeInfo f2Type = structType.getStructFieldTypeInfo("f2");
+ assertEquals(Category.PRIMITIVE, f2Type.getCategory());
+ assertEquals(PrimitiveCategory.STRING, ((PrimitiveTypeInfo) f2Type).getPrimitiveCategory());
+
+ assertEquals("f3", structType.getAllStructFieldNames().get(2));
+ TypeInfo f3Type = structType.getStructFieldTypeInfo("f3");
+ assertEquals(Category.LIST, f3Type.getCategory());
+ assertEquals(
+ PrimitiveCategory.INT,
+ ((PrimitiveTypeInfo) ((ListTypeInfo) f3Type).getListElementTypeInfo()).getPrimitiveCategory());
+ }
+
private interface RowProcessor {
void process(Row row);
}
@@ -235,6 +375,26 @@ public class TestJdbcWithMiniLlap {
}
}
+ // Save the actual values from each row as opposed to the String representation.
+ private static class RowCollector2 implements RowProcessor {
+ ArrayList<Object[]> rows = new ArrayList<Object[]>();
+ Schema schema = null;
+ int numColumns = 0;
+
+ public void process(Row row) {
+ if (schema == null) {
+ schema = row.getSchema();
+ numColumns = schema.getColumns().size();
+ }
+
+ Object[] arr = new Object[numColumns];
+ for (int idx = 0; idx < numColumns; ++idx) {
+ arr[idx] = row.getValue(idx);
+ }
+ rows.add(arr);
+ }
+ }
+
private int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception {
String url = miniHS2.getJdbcURL();
String user = System.getProperty("user.name");
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
index ee92f3e..e3c0955 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java
@@ -21,7 +21,11 @@ package org.apache.hadoop.hive.llap;
import com.google.common.base.Preconditions;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
@@ -32,17 +36,23 @@ import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.llap.Row;
import org.apache.hadoop.hive.llap.FieldDesc;
import org.apache.hadoop.hive.llap.Schema;
-import org.apache.hadoop.hive.llap.TypeDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -111,27 +121,7 @@ public class LlapRowRecordReader implements RecordReader<NullWritable, Row> {
try {
StructObjectInspector rowOI = (StructObjectInspector) serde.getObjectInspector();
rowObj = serde.deserialize(textData);
- List<? extends StructField> colFields = rowOI.getAllStructFieldRefs();
- for (int idx = 0; idx < colFields.size(); ++idx) {
- StructField field = colFields.get(idx);
- Object colValue = rowOI.getStructFieldData(rowObj, field);
- Preconditions.checkState(field.getFieldObjectInspector().getCategory() == Category.PRIMITIVE,
- "Cannot handle non-primitive column type " + field.getFieldObjectInspector().getTypeName());
-
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) field.getFieldObjectInspector();
- // char/varchar special cased here since the row record handles them using Text
- switch (poi.getPrimitiveCategory()) {
- case CHAR:
- value.setValue(idx, ((HiveCharWritable) poi.getPrimitiveWritableObject(colValue)).getPaddedValue());
- break;
- case VARCHAR:
- value.setValue(idx, ((HiveVarcharWritable) poi.getPrimitiveWritableObject(colValue)).getTextValue());
- break;
- default:
- value.setValue(idx, (Writable) poi.getPrimitiveWritableObject(colValue));
- break;
- }
- }
+ setRowFromStruct(value, rowObj, rowOI);
} catch (SerDeException err) {
if (LOG.isDebugEnabled()) {
LOG.debug("Error deserializing row from text: " + textData);
@@ -147,6 +137,96 @@ public class LlapRowRecordReader implements RecordReader<NullWritable, Row> {
return schema;
}
+ static Object convertPrimitive(Object val, PrimitiveObjectInspector poi) {
+ switch (poi.getPrimitiveCategory()) {
+ // Save char/varchar as string
+ case CHAR:
+ return ((HiveChar) poi.getPrimitiveJavaObject(val)).getPaddedValue();
+ case VARCHAR:
+ return ((HiveVarchar) poi.getPrimitiveJavaObject(val)).toString();
+ case DECIMAL:
+ return ((HiveDecimal) poi.getPrimitiveJavaObject(val)).bigDecimalValue();
+ default:
+ return poi.getPrimitiveJavaObject(val);
+ }
+ }
+
+ static Object convertValue(Object val, ObjectInspector oi) {
+ if (val == null) {
+ return null;
+ }
+
+ Object convertedVal = null;
+ ObjectInspector.Category oiCategory = oi.getCategory();
+ switch (oiCategory) {
+ case PRIMITIVE:
+ convertedVal = convertPrimitive(val, (PrimitiveObjectInspector) oi);
+ break;
+ case LIST:
+ ListObjectInspector loi = (ListObjectInspector) oi;
+ int listSize = loi.getListLength(val);
+ // Per ListObjectInpsector.getListLength(), -1 length means null list.
+ if (listSize < 0) {
+ return null;
+ }
+ List<Object> convertedList = new ArrayList<Object>(listSize);
+ ObjectInspector listElementOI = loi.getListElementObjectInspector();
+ for (int idx = 0; idx < listSize; ++idx) {
+ convertedList.add(convertValue(loi.getListElement(val, idx), listElementOI));
+ }
+ convertedVal = convertedList;
+ break;
+ case MAP:
+ MapObjectInspector moi = (MapObjectInspector) oi;
+ int mapSize = moi.getMapSize(val);
+ // Per MapObjectInpsector.getMapSize(), -1 length means null map.
+ if (mapSize < 0) {
+ return null;
+ }
+ Map<Object, Object> convertedMap = new LinkedHashMap<Object, Object>(mapSize);
+ ObjectInspector mapKeyOI = moi.getMapKeyObjectInspector();
+ ObjectInspector mapValOI = moi.getMapValueObjectInspector();
+ Map<?, ?> mapCol = moi.getMap(val);
+ for (Object mapKey : mapCol.keySet()) {
+ Object convertedMapKey = convertValue(mapKey, mapKeyOI);
+ Object convertedMapVal = convertValue(mapCol.get(mapKey), mapValOI);
+ convertedMap.put(convertedMapKey, convertedMapVal);
+ }
+ convertedVal = convertedMap;
+ break;
+ case STRUCT:
+ StructObjectInspector soi = (StructObjectInspector) oi;
+ List<Object> convertedRow = new ArrayList<Object>();
+ for (StructField structField : soi.getAllStructFieldRefs()) {
+ Object convertedFieldValue = convertValue(
+ soi.getStructFieldData(val, structField),
+ structField.getFieldObjectInspector());
+ convertedRow.add(convertedFieldValue);
+ }
+ convertedVal = convertedRow;
+ break;
+ default:
+ throw new IllegalArgumentException("Cannot convert type " + oiCategory);
+ }
+
+ return convertedVal;
+ }
+
+ static void setRowFromStruct(Row row, Object structVal, StructObjectInspector soi) {
+ Schema structSchema = row.getSchema();
+ // Add struct field data to the Row
+ List<FieldDesc> fieldDescs = structSchema.getColumns();
+ for (int idx = 0; idx < fieldDescs.size(); ++idx) {
+ FieldDesc fieldDesc = fieldDescs.get(idx);
+ StructField structField = soi.getStructFieldRef(fieldDesc.getName());
+
+ Object convertedFieldValue = convertValue(
+ soi.getStructFieldData(structVal, structField),
+ structField.getFieldObjectInspector());
+ row.setValue(idx, convertedFieldValue);
+ }
+ }
+
protected AbstractSerDe initSerDe(Configuration conf) throws SerDeException {
Properties props = new Properties();
StringBuffer columnsBuffer = new StringBuffer();
@@ -158,7 +238,7 @@ public class LlapRowRecordReader implements RecordReader<NullWritable, Row> {
typesBuffer.append(',');
}
columnsBuffer.append(colDesc.getName());
- typesBuffer.append(colDesc.getTypeDesc().toString());
+ typesBuffer.append(colDesc.getTypeInfo().toString());
isFirst = false;
}
String columns = columnsBuffer.toString();
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java
----------------------------------------------------------------------
diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java b/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java
index 9621978..19f482d 100644
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java
+++ b/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java
@@ -21,43 +21,45 @@ package org.apache.hadoop.hive.llap;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
public class FieldDesc implements Writable {
private String name;
- private TypeDesc typeDesc;
+ private TypeInfo typeInfo;
public FieldDesc() {
- typeDesc = new TypeDesc();
}
- public FieldDesc(String name, TypeDesc typeDesc) {
+ public FieldDesc(String name, TypeInfo typeInfo) {
this.name = name;
- this.typeDesc = typeDesc;
+ this.typeInfo = typeInfo;
}
public String getName() {
return name;
}
- public TypeDesc getTypeDesc() {
- return typeDesc;
+ public TypeInfo getTypeInfo() {
+ return typeInfo;
}
@Override
public String toString() {
- return getName() + ":" + getTypeDesc().toString();
+ return getName() + ":" + getTypeInfo().toString();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
- typeDesc.write(out);
+ out.writeUTF(typeInfo.toString());
}
@Override
public void readFields(DataInput in) throws IOException {
name = in.readUTF();
- typeDesc.readFields(in);
+ typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(in.readUTF());
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java
----------------------------------------------------------------------
diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java b/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java
index a84fadc..40a6ef5 100644
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java
+++ b/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java
@@ -17,150 +17,184 @@
*/
package org.apache.hadoop.hive.llap;
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.google.common.base.Preconditions;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
-import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-
public class Row {
private final Schema schema;
- private final Writable[] colValues;
- private final boolean[] nullIndicators;
+ private final Object[] colValues;
private Map<String, Integer> nameToIndexMapping;
public Row(Schema schema) {
this.schema = schema;
- this.colValues = new Writable[schema.getColumns().size()];
- this.nullIndicators = new boolean[schema.getColumns().size()];
+ this.colValues = new Object[schema.getColumns().size()];
this.nameToIndexMapping = new HashMap<String, Integer>(schema.getColumns().size());
List<FieldDesc> colDescs = schema.getColumns();
for (int idx = 0; idx < colDescs.size(); ++idx) {
FieldDesc colDesc = colDescs.get(idx);
nameToIndexMapping.put(colDesc.getName(), idx);
- colValues[idx] = createWritableForType(colDesc.getTypeDesc());
}
}
- public Writable getValue(int colIndex) {
- if (nullIndicators[colIndex]) {
- return null;
- }
+ public Object getValue(int colIndex) {
return colValues[colIndex];
}
- public Writable getValue(String colName) {
+ public Object getValue(String colName) {
Integer idx = nameToIndexMapping.get(colName);
Preconditions.checkArgument(idx != null);
return getValue(idx);
}
+ public Boolean getBoolean(int idx) {
+ return (Boolean) getValue(idx);
+ }
+
+ public Boolean getBoolean(String colName) {
+ return (Boolean) getValue(colName);
+ }
+
+ public Byte getByte(int idx) {
+ return (Byte) getValue(idx);
+ }
+
+ public Byte getByte(String colName) {
+ return (Byte) getValue(colName);
+ }
+
+ public Short getShort(int idx) {
+ return (Short) getValue(idx);
+ }
+
+ public Short getShort(String colName) {
+ return (Short) getValue(colName);
+ }
+
+ public Integer getInt(int idx) {
+ return (Integer) getValue(idx);
+ }
+
+ public Integer getInt(String colName) {
+ return (Integer) getValue(colName);
+ }
+
+ public Long getLong(int idx) {
+ return (Long) getValue(idx);
+ }
+
+ public Long getLong(String colName) {
+ return (Long) getValue(colName);
+ }
+
+ public Float getFloat(int idx) {
+ return (Float) getValue(idx);
+ }
+
+ public Float getFloat(String colName) {
+ return (Float) getValue(colName);
+ }
+
+ public Double getDouble(int idx) {
+ return (Double) getValue(idx);
+ }
+
+ public Double getDouble(String colName) {
+ return (Double) getValue(colName);
+ }
+
+ public String getString(int idx) {
+ return (String) getValue(idx);
+ }
+
+ public String getString(String colName) {
+ return (String) getValue(colName);
+ }
+
+ public Date getDate(int idx) {
+ return (Date) getValue(idx);
+ }
+
+ public Date getDate(String colName) {
+ return (Date) getValue(colName);
+ }
+
+ public Timestamp getTimestamp(int idx) {
+ return (Timestamp) getValue(idx);
+ }
+
+ public Timestamp getTimestamp(String colName) {
+ return (Timestamp) getValue(colName);
+ }
+
+ public byte[] getBytes(int idx) {
+ return (byte[]) getValue(idx);
+ }
+
+ public byte[] getBytes(String colName) {
+ return (byte[]) getValue(colName);
+ }
+
+ public BigDecimal getDecimal(int idx) {
+ return (BigDecimal) getValue(idx);
+ }
+
+ public BigDecimal getDecimal(String colName) {
+ return (BigDecimal) getValue(colName);
+ }
+
+ public List<?> getList(int idx) {
+ return (List<?>) getValue(idx);
+ }
+
+ public List<?> getList(String colName) {
+ return (List<?>) getValue(colName);
+ }
+
+ public Map<?, ?> getMap(int idx) {
+ return (Map<?, ?>) getValue(idx);
+ }
+
+ public Map<?, ?> getMap(String colName) {
+ return (Map<?, ?>) getValue(colName);
+ }
+
+ // Struct value is simply a list of values.
+ // The schema can be used to map the field name to the position in the list.
+ public List<?> getStruct(int idx) {
+ return (List<?>) getValue(idx);
+ }
+
+ public List<?> getStruct(String colName) {
+ return (List<?>) getValue(colName);
+ }
+
public Schema getSchema() {
return schema;
}
- void setValue(int colIdx, Writable value) {
- Preconditions.checkArgument(colIdx <= schema.getColumns().size());
-
- if (value == null) {
- nullIndicators[colIdx] = true;
- } else {
- nullIndicators[colIdx] = false;
- FieldDesc colDesc = schema.getColumns().get(colIdx);
- switch (colDesc.getTypeDesc().getType()) {
- case BOOLEAN:
- ((BooleanWritable) colValues[colIdx]).set(((BooleanWritable) value).get());
- break;
- case TINYINT:
- ((ByteWritable) colValues[colIdx]).set(((ByteWritable) value).get());
- break;
- case SMALLINT:
- ((ShortWritable) colValues[colIdx]).set(((ShortWritable) value).get());
- break;
- case INT:
- ((IntWritable) colValues[colIdx]).set(((IntWritable) value).get());
- break;
- case BIGINT:
- ((LongWritable) colValues[colIdx]).set(((LongWritable) value).get());
- break;
- case FLOAT:
- ((FloatWritable) colValues[colIdx]).set(((FloatWritable) value).get());
- break;
- case DOUBLE:
- ((DoubleWritable) colValues[colIdx]).set(((DoubleWritable) value).get());
- break;
- case STRING:
- // Just handle char/varchar as Text
- case CHAR:
- case VARCHAR:
- ((Text) colValues[colIdx]).set((Text) value);
- break;
- case DATE:
- ((DateWritable) colValues[colIdx]).set((DateWritable) value);
- break;
- case TIMESTAMP:
- ((TimestampWritable) colValues[colIdx]).set((TimestampWritable) value);
- break;
- case BINARY:
- ((BytesWritable) colValues[colIdx]).set(((BytesWritable) value));
- break;
- case DECIMAL:
- ((HiveDecimalWritable) colValues[colIdx]).set((HiveDecimalWritable) value);
- break;
- }
- }
+ void setValue(int colIdx, Object obj) {
+ colValues[colIdx] = obj;
}
- private Writable createWritableForType(TypeDesc typeDesc) {
- switch (typeDesc.getType()) {
- case BOOLEAN:
- return new BooleanWritable();
- case TINYINT:
- return new ByteWritable();
- case SMALLINT:
- return new ShortWritable();
- case INT:
- return new IntWritable();
- case BIGINT:
- return new LongWritable();
- case FLOAT:
- return new FloatWritable();
- case DOUBLE:
- return new DoubleWritable();
- case STRING:
- // Just handle char/varchar as Text
- case CHAR:
- case VARCHAR:
- return new Text();
- case DATE:
- return new DateWritable();
- case TIMESTAMP:
- return new TimestampWritable();
- case BINARY:
- return new BytesWritable();
- case DECIMAL:
- return new HiveDecimalWritable();
- default:
- throw new RuntimeException("Cannot create writable for " + typeDesc.getType());
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("[");
+ for (int idx = 0; idx < schema.getColumns().size(); ++idx) {
+ if (idx > 0) {
+ sb.append(", ");
+ }
+ Object val = getValue(idx);
+ sb.append(val == null ? "null" : val.toString());
}
+ sb.append("]");
+ return sb.toString();
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java
----------------------------------------------------------------------
diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java b/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java
deleted file mode 100644
index dda5928..0000000
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.llap;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import org.apache.hadoop.io.Writable;
-
-public class TypeDesc implements Writable {
- public static enum Type {
- BOOLEAN,
- TINYINT,
- SMALLINT,
- INT,
- BIGINT,
- FLOAT,
- DOUBLE,
- STRING,
- CHAR,
- VARCHAR,
- DATE,
- TIMESTAMP,
- BINARY,
- DECIMAL,
- }
-
- private TypeDesc.Type type;
- private int precision;
- private int scale;
-
- // For types with no type qualifiers
- public TypeDesc(TypeDesc.Type type) {
- this(type, 0, 0);
- }
-
- // For decimal types
- public TypeDesc(TypeDesc.Type type, int precision, int scale) {
- this.type = type;
- this.precision = precision;
- this.scale = scale;
- }
-
- // For char/varchar types
- public TypeDesc(TypeDesc.Type type, int precision) {
- this(type, precision, 0);
- }
-
- // Should be used for serialization only
- public TypeDesc() {
- this(TypeDesc.Type.INT, 0, 0);
- }
-
- public TypeDesc.Type getType() {
- return type;
- }
-
- public int getPrecision() {
- return precision;
- }
-
- public int getScale() {
- return scale;
- }
-
- @Override
- public String toString() {
- switch (type) {
- case DECIMAL:
- return type.name().toLowerCase() + "(" + precision + "," + scale + ")";
- case CHAR:
- case VARCHAR:
- return type.name().toLowerCase() + "(" + precision + ")";
- default:
- return type.name().toLowerCase();
- }
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeUTF(type.name());
- out.writeInt(precision);
- out.writeInt(scale);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- type = TypeDesc.Type.valueOf(in.readUTF());
- precision = in.readInt();
- scale = in.readInt();
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java
----------------------------------------------------------------------
diff --git a/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java b/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java
index d4e68f4..37e934d 100644
--- a/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java
+++ b/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java
@@ -23,8 +23,7 @@ import java.util.Random;
import org.apache.commons.lang.RandomStringUtils;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.junit.After;
import org.junit.Before;
@@ -42,16 +41,14 @@ public class TestRow {
Random rand = new Random();
int iterations = 100;
- Text col0 = new Text();
- IntWritable col1 = new IntWritable();
for (int idx = 0; idx < iterations; ++idx) {
// Set the row values
boolean isNullCol0 = (rand.nextDouble() <= 0.25);
- col0.set(RandomStringUtils.random(10));
+ String col0 = RandomStringUtils.random(10);
row.setValue(0, isNullCol0 ? null : col0);
boolean isNullCol1 = (rand.nextDouble() <= 0.25);
- col1.set(rand.nextInt());
+ Integer col1 = Integer.valueOf(rand.nextInt());
row.setValue(1, isNullCol1 ? null : col1);
// Validate the row values
@@ -60,7 +57,6 @@ public class TestRow {
assertTrue(row.getValue("col0") == null);
} else {
assertTrue(row.getValue(0) != null);
- assertTrue(col0 != row.getValue(0));
assertEquals(col0, row.getValue(0));
assertEquals(col0, row.getValue("col0"));
}
@@ -70,7 +66,6 @@ public class TestRow {
assertTrue(row.getValue("col1") == null);
} else {
assertTrue(row.getValue(1) != null);
- assertTrue(col1 != row.getValue(1));
assertEquals(col1, row.getValue(1));
assertEquals(col1, row.getValue("col1"));
}
@@ -81,10 +76,10 @@ public class TestRow {
List<FieldDesc> colDescs = new ArrayList<FieldDesc>();
colDescs.add(new FieldDesc("col0",
- new TypeDesc(TypeDesc.Type.STRING)));
+ TypeInfoFactory.stringTypeInfo));
colDescs.add(new FieldDesc("col1",
- new TypeDesc(TypeDesc.Type.INT)));
+ TypeInfoFactory.intTypeInfo));
Schema schema = new Schema(colDescs);
return schema;
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
index 9ddbd7e..868eec7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
@@ -46,7 +46,6 @@ import org.apache.hadoop.hive.llap.LlapInputSplit;
import org.apache.hadoop.hive.llap.NotTezEventHelper;
import org.apache.hadoop.hive.llap.Schema;
import org.apache.hadoop.hive.llap.SubmitWorkInfo;
-import org.apache.hadoop.hive.llap.TypeDesc;
import org.apache.hadoop.hive.llap.coordinator.LlapCoordinator;
import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto;
import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec;
@@ -82,12 +81,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SplitLocationInfo;
@@ -526,76 +520,13 @@ public class GenericUDTFGetSplits extends GenericUDTF {
}
}
- private TypeDesc convertTypeString(String typeString) throws HiveException {
- TypeDesc typeDesc;
- TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeString);
- Preconditions.checkState(
- typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE,
- "Unsupported non-primitive type " + typeString);
-
- switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
- case BOOLEAN:
- typeDesc = new TypeDesc(TypeDesc.Type.BOOLEAN);
- break;
- case BYTE:
- typeDesc = new TypeDesc(TypeDesc.Type.TINYINT);
- break;
- case SHORT:
- typeDesc = new TypeDesc(TypeDesc.Type.SMALLINT);
- break;
- case INT:
- typeDesc = new TypeDesc(TypeDesc.Type.INT);
- break;
- case LONG:
- typeDesc = new TypeDesc(TypeDesc.Type.BIGINT);
- break;
- case FLOAT:
- typeDesc = new TypeDesc(TypeDesc.Type.FLOAT);
- break;
- case DOUBLE:
- typeDesc = new TypeDesc(TypeDesc.Type.DOUBLE);
- break;
- case STRING:
- typeDesc = new TypeDesc(TypeDesc.Type.STRING);
- break;
- case CHAR:
- CharTypeInfo charTypeInfo = (CharTypeInfo) typeInfo;
- typeDesc = new TypeDesc(TypeDesc.Type.CHAR, charTypeInfo.getLength());
- break;
- case VARCHAR:
- VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) typeInfo;
- typeDesc = new TypeDesc(TypeDesc.Type.VARCHAR,
- varcharTypeInfo.getLength());
- break;
- case DATE:
- typeDesc = new TypeDesc(TypeDesc.Type.DATE);
- break;
- case TIMESTAMP:
- typeDesc = new TypeDesc(TypeDesc.Type.TIMESTAMP);
- break;
- case BINARY:
- typeDesc = new TypeDesc(TypeDesc.Type.BINARY);
- break;
- case DECIMAL:
- DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
- typeDesc = new TypeDesc(TypeDesc.Type.DECIMAL,
- decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale());
- break;
- default:
- throw new HiveException("Unsupported type " + typeString);
- }
-
- return typeDesc;
- }
-
private Schema convertSchema(Object obj) throws HiveException {
org.apache.hadoop.hive.metastore.api.Schema schema = (org.apache.hadoop.hive.metastore.api.Schema) obj;
List<FieldDesc> colDescs = new ArrayList<FieldDesc>();
for (FieldSchema fs : schema.getFieldSchemas()) {
String colName = fs.getName();
String typeString = fs.getType();
- TypeDesc typeDesc = convertTypeString(typeString);
- colDescs.add(new FieldDesc(colName, typeDesc));
+ colDescs.add(new FieldDesc(colName, TypeInfoUtils.getTypeInfoFromTypeString(typeString)));
}
Schema Schema = new Schema(colDescs);
return Schema;
http://git-wip-us.apache.org/repos/asf/hive/blob/db7b57cd/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index b003eb8..bb79857 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -60,7 +60,6 @@ import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.llap.TypeDesc;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;