You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/05/31 06:18:00 UTC
[carbondata] branch master updated: [CARBONDATA-3406] Support
Binary, Boolean, Varchar,
Complex data types read and Dictionary columns read
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 2f338d3 [CARBONDATA-3406] Support Binary, Boolean,Varchar, Complex data types read and Dictionary columns read
2f338d3 is described below
commit 2f338d377bcf7414ba83f8962aa88d62eb0b4ef0
Author: dhatchayani <dh...@gmail.com>
AuthorDate: Thu May 30 17:55:16 2019 +0530
[CARBONDATA-3406] Support Binary, Boolean,Varchar, Complex data types read and Dictionary columns read
1. Support Read for Binary, Boolean, Varchar, Complex data types.
2. Support Read for Dictionary columns.
This closes #3250
---
.../hive/CarbonDictionaryDecodeReadSupport.java | 83 +++++++++++++---------
.../carbondata/hive/CarbonHiveRecordReader.java | 8 +++
.../carbondata/hive/CarbonObjectInspector.java | 11 ++-
3 files changed, 65 insertions(+), 37 deletions(-)
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
index fdccd09..52ece32 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
@@ -36,6 +36,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -45,12 +46,12 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
/**
* This is the class to decode dictionary encoded column data back to its original value.
@@ -156,28 +157,26 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
* @throws IOException
*/
private ArrayWritable createArray(Object obj, CarbonColumn carbonColumn) throws IOException {
- if (obj instanceof GenericArrayData) {
- Object[] objArray = ((GenericArrayData) obj).array();
- List<CarbonDimension> childCarbonDimensions = null;
- CarbonDimension arrayDimension = null;
- if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
- childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
- arrayDimension = childCarbonDimensions.get(0);
- }
- List array = new ArrayList();
- if (objArray != null) {
- for (int i = 0; i < objArray.length; i++) {
- Object curObj = objArray[i];
- Writable newObj = createWritableObject(curObj, arrayDimension);
- array.add(newObj);
- }
- }
- if (array.size() > 0) {
- ArrayWritable subArray = new ArrayWritable(Writable.class,
- (Writable[]) array.toArray(new Writable[array.size()]));
- return new ArrayWritable(Writable.class, new Writable[] { subArray });
+ Object[] objArray = (Object[]) obj;
+ List<CarbonDimension> childCarbonDimensions = null;
+ CarbonDimension arrayDimension = null;
+ if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
+ childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
+ arrayDimension = childCarbonDimensions.get(0);
+ }
+ List array = new ArrayList();
+ if (objArray != null) {
+ for (int i = 0; i < objArray.length; i++) {
+ Object curObj = objArray[i];
+ Writable newObj = createWritableObject(curObj, arrayDimension);
+ array.add(newObj);
}
}
+ if (array.size() > 0) {
+ ArrayWritable subArray =
+ new ArrayWritable(Writable.class, (Writable[]) array.toArray(new Writable[array.size()]));
+ return new ArrayWritable(Writable.class, new Writable[] { subArray });
+ }
return null;
}
@@ -190,23 +189,21 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
* @throws IOException
*/
private ArrayWritable createStruct(Object obj, CarbonColumn carbonColumn) throws IOException {
- if (obj instanceof GenericInternalRow) {
- Object[] objArray = ((GenericInternalRow) obj).values();
- List<CarbonDimension> childCarbonDimensions = null;
- if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
- childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
- }
+ Object[] objArray = (Object[]) obj;
+ List<CarbonDimension> childCarbonDimensions = null;
+ if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
+ childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
+ }
- if (null != childCarbonDimensions) {
- Writable[] arr = new Writable[objArray.length];
- for (int i = 0; i < objArray.length; i++) {
+ if (null != childCarbonDimensions) {
+ Writable[] arr = new Writable[objArray.length];
+ for (int i = 0; i < objArray.length; i++) {
- arr[i] = createWritableObject(objArray[i], childCarbonDimensions.get(i));
- }
- return new ArrayWritable(Writable.class, arr);
+ arr[i] = createWritableObject(objArray[i], childCarbonDimensions.get(i));
}
+ return new ArrayWritable(Writable.class, arr);
}
- throw new IOException("DataType not supported in Carbondata");
+ return null;
}
/**
@@ -223,6 +220,12 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
if (obj == null) {
return null;
}
+ if (carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
+ obj = DataTypeUtil.getDataBasedOnDataType(obj.toString(), dataType);
+ if (obj == null) {
+ return null;
+ }
+ }
if (dataType == DataTypes.NULL) {
return null;
} else if (dataType == DataTypes.DOUBLE) {
@@ -233,6 +236,12 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
return new LongWritable((long) obj);
} else if (dataType == DataTypes.SHORT) {
return new ShortWritable((short) obj);
+ } else if (dataType == DataTypes.BOOLEAN) {
+ return new BooleanWritable((boolean) obj);
+ } else if (dataType == DataTypes.VARCHAR) {
+ return new Text(obj.toString());
+ } else if (dataType == DataTypes.BINARY) {
+ return new BytesWritable((byte[]) obj);
} else if (dataType == DataTypes.DATE) {
Calendar c = Calendar.getInstance();
c.setTime(new Date(0));
@@ -243,6 +252,10 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
return new TimestampWritable(new Timestamp((long) obj / 1000));
} else if (dataType == DataTypes.STRING) {
return new Text(obj.toString());
+ } else if (DataTypes.isArrayType(dataType)) {
+ return createArray(obj, carbonColumn);
+ } else if (DataTypes.isStructType(dataType)) {
+ return createStruct(obj, carbonColumn);
} else if (DataTypes.isDecimal(dataType)) {
return new HiveDecimalWritable(HiveDecimal.create(new java.math.BigDecimal(obj.toString())));
} else {
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
index 4ed2b91..e93a794 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
@@ -47,6 +47,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -204,6 +206,12 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
return new LongWritable((long) obj);
case SHORT:
return new ShortWritable((short) obj);
+ case BOOLEAN:
+ return new BooleanWritable((boolean) obj);
+ case VARCHAR:
+ return new Text(obj.toString());
+ case BINARY:
+ return new BytesWritable((byte[]) obj);
case DATE:
return new DateWritable(new Date(Long.parseLong(String.valueOf(obj.toString()))));
case TIMESTAMP:
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java
index 6722dcf..75c7056 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java
@@ -25,12 +25,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.ArrayWritable;
class CarbonObjectInspector extends SettableStructObjectInspector {
@@ -79,8 +80,14 @@ class CarbonObjectInspector extends SettableStructObjectInspector {
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
- } else if (((CharTypeInfo) typeInfo).getPrimitiveCategory().name().equals("CHAR")) {
+ } else if (typeInfo.equals(TypeInfoFactory.charTypeInfo)) {
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
+ return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
+ } else if (typeInfo instanceof VarcharTypeInfo) {
+ return new WritableHiveVarcharObjectInspector((VarcharTypeInfo) typeInfo);
+ } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
+ return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
} else {
throw new UnsupportedOperationException("Unknown field type: " + typeInfo);
}