You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by qi...@apache.org on 2020/08/07 12:38:31 UTC

[carbondata] branch master updated: [CARBONDATA-3941] Support binary data type reading from presto

This is an automated email from the ASF dual-hosted git repository.

qiangcai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new c23c32f  [CARBONDATA-3941] Support binary data type reading from presto
c23c32f is described below

commit c23c32f25a8ef12541cd6828e7901bacd1265548
Author: ajantha-bhat <aj...@gmail.com>
AuthorDate: Thu Aug 6 12:47:41 2020 +0530

    [CARBONDATA-3941] Support binary data type reading from presto
    
    Why is this PR needed?
    When binary store is queried from presto, presto currently give 0 rows.
    
    What changes were proposed in this PR?
    Presto can support binary (varBinary) data type reading by using the SliceStreamReader
    and it can put binary byte[] using putByteArray() method of SliceStreamReader
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #3882
---
 .../carbondata/presto/CarbonVectorBatch.java       |  3 +-
 .../PrestoTestNonTransactionalTableFiles.scala     | 53 +++++++++++++++++++---
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java
index 69ad1e9..94543fb 100644
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java
@@ -94,7 +94,8 @@ public class CarbonVectorBatch {
       return new FloatStreamReader(batchSize, field.getDataType());
     } else if (dataType == DataTypes.BYTE) {
       return new ByteStreamReader(batchSize, field.getDataType());
-    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR
+        || dataType == DataTypes.BINARY) {
       return new SliceStreamReader(batchSize, field.getDataType());
     } else if (DataTypes.isDecimal(dataType)) {
       if (dataType instanceof DecimalType) {
diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala
index 7a6f3b6..fd4ac35 100644
--- a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala
+++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala
@@ -17,10 +17,11 @@
 
 package org.apache.carbondata.presto.integrationtest
 
-import java.io.File
+import java.io.{BufferedInputStream, File, FileInputStream}
 import java.sql.SQLException
 import java.util
 
+import org.apache.commons.codec.binary.Hex
 import org.apache.commons.io.FileUtils
 import org.apache.commons.lang.RandomStringUtils
 import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
@@ -44,6 +45,7 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
                                   + "../../../..").getCanonicalPath
   private val storePath = s"$rootPath/integration/presto/target/store"
   private val writerPath = storePath + "/sdk_output/files"
+  private val writerPathBinary = storePath + "/sdk_output/files1"
   private val prestoServer = new PrestoServer
   private var varcharString = new String
 
@@ -86,6 +88,17 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
         "(format='CARBON') ")
   }
 
+  private def createTableBinary = {
+    prestoServer.execute("drop table if exists sdk_output.files1")
+    prestoServer.execute("drop schema if exists sdk_output")
+    prestoServer.execute("create schema sdk_output")
+    prestoServer
+      .execute(
+        "create table sdk_output.files1(name boolean, age int, id varbinary, height double, salary " +
+        "real) with" +
+        "(format='CARBON') ")
+  }
+
   def buildTestData(rows: Int, options: util.Map[String, String], varcharDataGen: Boolean): Any = {
     buildTestData(rows, options, List("name"), varcharDataGen)
   }
@@ -173,27 +186,36 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
   }
 
   // prepare sdk writer output with other schema
-  def buildTestDataOtherDataType(rows: Int, sortColumns: Array[String]): Any = {
+  def buildTestDataOtherDataType(rows: Int, sortColumns: Array[String], path : String): Any = {
     val fields: Array[Field] = new Array[Field](5)
     // same column name, but name as boolean type
     fields(0) = new Field("name", DataTypes.BOOLEAN)
     fields(1) = new Field("age", DataTypes.INT)
-    fields(2) = new Field("id", DataTypes.BYTE)
+    fields(2) = new Field("id", DataTypes.BINARY)
     fields(3) = new Field("height", DataTypes.DOUBLE)
     fields(4) = new Field("salary", DataTypes.FLOAT)
 
+    val imagePath = rootPath + "/sdk/sdk/src/test/resources/image/carbondatalogo.jpg"
     try {
+      var i = 0
+      val bis = new BufferedInputStream(new FileInputStream(imagePath))
+      var hexValue: Array[Char] = null
+      val originBinary = new Array[Byte](bis.available)
+      while (bis.read(originBinary) != -1) {
+        hexValue = Hex.encodeHex(originBinary)
+      }
+      bis.close()
+      val binaryValue = String.valueOf(hexValue)
       val builder = CarbonWriter.builder()
       val writer =
-        builder.outputPath(writerPath)
+        builder.outputPath(path)
           .uniqueIdentifier(System.currentTimeMillis()).withBlockSize(2).sortBy(sortColumns)
           .withCsvInput(new Schema(fields)).writtenBy("TestNonTransactionalCarbonTable").build()
-      var i = 0
       while (i < rows) {
         writer
           .write(Array[String]("true",
             String.valueOf(i),
-            String.valueOf(i),
+            binaryValue,
             String.valueOf(i.toDouble / 2),
             String.valueOf(i.toFloat / 2)))
         i += 1
@@ -260,7 +282,7 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
 
   test("test reading different schema") {
     buildTestDataSingleFile()
-    buildTestDataOtherDataType(3, null)
+    buildTestDataOtherDataType(3, null, writerPath)
     val exception =
       intercept[SQLException] {
         val actualResult: List[Map[String, Any]] = prestoServer
@@ -271,6 +293,23 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
     cleanTestData()
   }
 
+  test("test reading binary") {
+    FileUtils.deleteDirectory(new File(writerPathBinary))
+    createTableBinary
+    buildTestDataOtherDataType(3, null, writerPathBinary)
+    val actualResult: List[Map[String, Any]] = prestoServer
+      .executeQuery("select id from files1 ")
+    assert(actualResult.size == 3)
+    // check the binary byte Array size, as original hex encoded image byte array size is 118198
+    assert(actualResult.head("id").asInstanceOf[Array[Byte]].length == 118198)
+    // validate some initial bytes
+    assert(actualResult.head("id").asInstanceOf[Array[Byte]](0) == 56)
+    assert(actualResult.head("id").asInstanceOf[Array[Byte]](1) == 57)
+    assert(actualResult.head("id").asInstanceOf[Array[Byte]](2) == 53)
+    assert(actualResult.head("id").asInstanceOf[Array[Byte]](3) == 48)
+    FileUtils.deleteDirectory(new File(writerPathBinary))
+  }
+
   test("test reading without carbon index file") {
     buildTestDataSingleFile()
     deleteFile(writerPath, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)