You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/05/31 06:18:00 UTC

[carbondata] branch master updated: [CARBONDATA-3406] Support Binary, Boolean, Varchar, Complex data types read and Dictionary columns read

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 2f338d3  [CARBONDATA-3406] Support Binary, Boolean,Varchar, Complex data types read and Dictionary columns read
2f338d3 is described below

commit 2f338d377bcf7414ba83f8962aa88d62eb0b4ef0
Author: dhatchayani <dh...@gmail.com>
AuthorDate: Thu May 30 17:55:16 2019 +0530

    [CARBONDATA-3406] Support Binary, Boolean,Varchar, Complex data types read and Dictionary columns read
    
    1. Support Read for Binary, Boolean, Varchar, Complex data types.
    2. Support Read for Dictionary columns.
    
    This closes #3250
---
 .../hive/CarbonDictionaryDecodeReadSupport.java    | 83 +++++++++++++---------
 .../carbondata/hive/CarbonHiveRecordReader.java    |  8 +++
 .../carbondata/hive/CarbonObjectInspector.java     | 11 ++-
 3 files changed, 65 insertions(+), 37 deletions(-)

diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
index fdccd09..52ece32 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
@@ -36,6 +36,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
 import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport;
 
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -45,12 +46,12 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.catalyst.util.GenericArrayData;
 
 /**
  * This is the class to decode dictionary encoded column data back to its original value.
@@ -156,28 +157,26 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
    * @throws IOException
    */
   private ArrayWritable createArray(Object obj, CarbonColumn carbonColumn) throws IOException {
-    if (obj instanceof GenericArrayData) {
-      Object[] objArray = ((GenericArrayData) obj).array();
-      List<CarbonDimension> childCarbonDimensions = null;
-      CarbonDimension arrayDimension = null;
-      if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
-        childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
-        arrayDimension = childCarbonDimensions.get(0);
-      }
-      List array = new ArrayList();
-      if (objArray != null) {
-        for (int i = 0; i < objArray.length; i++) {
-          Object curObj = objArray[i];
-          Writable newObj = createWritableObject(curObj, arrayDimension);
-          array.add(newObj);
-        }
-      }
-      if (array.size() > 0) {
-        ArrayWritable subArray = new ArrayWritable(Writable.class,
-            (Writable[]) array.toArray(new Writable[array.size()]));
-        return new ArrayWritable(Writable.class, new Writable[] { subArray });
+    Object[] objArray = (Object[]) obj;
+    List<CarbonDimension> childCarbonDimensions = null;
+    CarbonDimension arrayDimension = null;
+    if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
+      childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
+      arrayDimension = childCarbonDimensions.get(0);
+    }
+    List array = new ArrayList();
+    if (objArray != null) {
+      for (int i = 0; i < objArray.length; i++) {
+        Object curObj = objArray[i];
+        Writable newObj = createWritableObject(curObj, arrayDimension);
+        array.add(newObj);
       }
     }
+    if (array.size() > 0) {
+      ArrayWritable subArray =
+          new ArrayWritable(Writable.class, (Writable[]) array.toArray(new Writable[array.size()]));
+      return new ArrayWritable(Writable.class, new Writable[] { subArray });
+    }
     return null;
   }
 
@@ -190,23 +189,21 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
    * @throws IOException
    */
   private ArrayWritable createStruct(Object obj, CarbonColumn carbonColumn) throws IOException {
-    if (obj instanceof GenericInternalRow) {
-      Object[] objArray = ((GenericInternalRow) obj).values();
-      List<CarbonDimension> childCarbonDimensions = null;
-      if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
-        childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
-      }
+    Object[] objArray = (Object[]) obj;
+    List<CarbonDimension> childCarbonDimensions = null;
+    if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
+      childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
+    }
 
-      if (null != childCarbonDimensions) {
-        Writable[] arr = new Writable[objArray.length];
-        for (int i = 0; i < objArray.length; i++) {
+    if (null != childCarbonDimensions) {
+      Writable[] arr = new Writable[objArray.length];
+      for (int i = 0; i < objArray.length; i++) {
 
-          arr[i] = createWritableObject(objArray[i], childCarbonDimensions.get(i));
-        }
-        return new ArrayWritable(Writable.class, arr);
+        arr[i] = createWritableObject(objArray[i], childCarbonDimensions.get(i));
       }
+      return new ArrayWritable(Writable.class, arr);
     }
-    throw new IOException("DataType not supported in Carbondata");
+    return null;
   }
 
   /**
@@ -223,6 +220,12 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
     if (obj == null) {
       return null;
     }
+    if (carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
+      obj = DataTypeUtil.getDataBasedOnDataType(obj.toString(), dataType);
+      if (obj == null) {
+        return null;
+      }
+    }
     if (dataType == DataTypes.NULL) {
       return null;
     } else if (dataType == DataTypes.DOUBLE) {
@@ -233,6 +236,12 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
       return new LongWritable((long) obj);
     } else if (dataType == DataTypes.SHORT) {
       return new ShortWritable((short) obj);
+    } else if (dataType == DataTypes.BOOLEAN) {
+      return new BooleanWritable((boolean) obj);
+    } else if (dataType == DataTypes.VARCHAR) {
+      return new Text(obj.toString());
+    } else if (dataType == DataTypes.BINARY) {
+      return new BytesWritable((byte[]) obj);
     } else if (dataType == DataTypes.DATE) {
       Calendar c = Calendar.getInstance();
       c.setTime(new Date(0));
@@ -243,6 +252,10 @@ public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T
       return new TimestampWritable(new Timestamp((long) obj / 1000));
     } else if (dataType == DataTypes.STRING) {
       return new Text(obj.toString());
+    } else if (DataTypes.isArrayType(dataType)) {
+      return createArray(obj, carbonColumn);
+    } else if (DataTypes.isStructType(dataType)) {
+      return createStruct(obj, carbonColumn);
     } else if (DataTypes.isDecimal(dataType)) {
       return new HiveDecimalWritable(HiveDecimal.create(new java.math.BigDecimal(obj.toString())));
     } else {
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
index 4ed2b91..e93a794 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
@@ -47,6 +47,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -204,6 +206,12 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
         return new LongWritable((long) obj);
       case SHORT:
         return new ShortWritable((short) obj);
+      case BOOLEAN:
+        return new BooleanWritable((boolean) obj);
+      case VARCHAR:
+        return new Text(obj.toString());
+      case BINARY:
+        return new BytesWritable((byte[]) obj);
       case DATE:
         return new DateWritable(new Date(Long.parseLong(String.valueOf(obj.toString()))));
       case TIMESTAMP:
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java
index 6722dcf..75c7056 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonObjectInspector.java
@@ -25,12 +25,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 
 class CarbonObjectInspector extends SettableStructObjectInspector {
@@ -79,8 +80,14 @@ class CarbonObjectInspector extends SettableStructObjectInspector {
       return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
     } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
       return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
-    } else if (((CharTypeInfo) typeInfo).getPrimitiveCategory().name().equals("CHAR")) {
+    } else if (typeInfo.equals(TypeInfoFactory.charTypeInfo)) {
       return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
+      return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
+    } else if (typeInfo instanceof VarcharTypeInfo) {
+      return new WritableHiveVarcharObjectInspector((VarcharTypeInfo) typeInfo);
+    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
+      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
     } else {
       throw new UnsupportedOperationException("Unknown field type: " + typeInfo);
     }