You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by qi...@apache.org on 2020/08/07 12:38:31 UTC
[carbondata] branch master updated: [CARBONDATA-3941] Support
binary data type reading from presto
This is an automated email from the ASF dual-hosted git repository.
qiangcai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new c23c32f [CARBONDATA-3941] Support binary data type reading from presto
c23c32f is described below
commit c23c32f25a8ef12541cd6828e7901bacd1265548
Author: ajantha-bhat <aj...@gmail.com>
AuthorDate: Thu Aug 6 12:47:41 2020 +0530
[CARBONDATA-3941] Support binary data type reading from presto
Why is this PR needed?
When binary store is queried from presto, presto currently give 0 rows.
What changes were proposed in this PR?
Presto can support binary (varBinary) data type reading by using the SliceStreamReader
and it can put binary byte[] using putByteArray() method of SliceStreamReader
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #3882
---
.../carbondata/presto/CarbonVectorBatch.java | 3 +-
.../PrestoTestNonTransactionalTableFiles.scala | 53 +++++++++++++++++++---
2 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java
index 69ad1e9..94543fb 100644
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java
@@ -94,7 +94,8 @@ public class CarbonVectorBatch {
return new FloatStreamReader(batchSize, field.getDataType());
} else if (dataType == DataTypes.BYTE) {
return new ByteStreamReader(batchSize, field.getDataType());
- } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR
+ || dataType == DataTypes.BINARY) {
return new SliceStreamReader(batchSize, field.getDataType());
} else if (DataTypes.isDecimal(dataType)) {
if (dataType instanceof DecimalType) {
diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala
index 7a6f3b6..fd4ac35 100644
--- a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala
+++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoTestNonTransactionalTableFiles.scala
@@ -17,10 +17,11 @@
package org.apache.carbondata.presto.integrationtest
-import java.io.File
+import java.io.{BufferedInputStream, File, FileInputStream}
import java.sql.SQLException
import java.util
+import org.apache.commons.codec.binary.Hex
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.RandomStringUtils
import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
@@ -44,6 +45,7 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
+ "../../../..").getCanonicalPath
private val storePath = s"$rootPath/integration/presto/target/store"
private val writerPath = storePath + "/sdk_output/files"
+ private val writerPathBinary = storePath + "/sdk_output/files1"
private val prestoServer = new PrestoServer
private var varcharString = new String
@@ -86,6 +88,17 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
"(format='CARBON') ")
}
+ private def createTableBinary = {
+ prestoServer.execute("drop table if exists sdk_output.files1")
+ prestoServer.execute("drop schema if exists sdk_output")
+ prestoServer.execute("create schema sdk_output")
+ prestoServer
+ .execute(
+ "create table sdk_output.files1(name boolean, age int, id varbinary, height double, salary " +
+ "real) with" +
+ "(format='CARBON') ")
+ }
+
def buildTestData(rows: Int, options: util.Map[String, String], varcharDataGen: Boolean): Any = {
buildTestData(rows, options, List("name"), varcharDataGen)
}
@@ -173,27 +186,36 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
}
// prepare sdk writer output with other schema
- def buildTestDataOtherDataType(rows: Int, sortColumns: Array[String]): Any = {
+ def buildTestDataOtherDataType(rows: Int, sortColumns: Array[String], path : String): Any = {
val fields: Array[Field] = new Array[Field](5)
// same column name, but name as boolean type
fields(0) = new Field("name", DataTypes.BOOLEAN)
fields(1) = new Field("age", DataTypes.INT)
- fields(2) = new Field("id", DataTypes.BYTE)
+ fields(2) = new Field("id", DataTypes.BINARY)
fields(3) = new Field("height", DataTypes.DOUBLE)
fields(4) = new Field("salary", DataTypes.FLOAT)
+ val imagePath = rootPath + "/sdk/sdk/src/test/resources/image/carbondatalogo.jpg"
try {
+ var i = 0
+ val bis = new BufferedInputStream(new FileInputStream(imagePath))
+ var hexValue: Array[Char] = null
+ val originBinary = new Array[Byte](bis.available)
+ while (bis.read(originBinary) != -1) {
+ hexValue = Hex.encodeHex(originBinary)
+ }
+ bis.close()
+ val binaryValue = String.valueOf(hexValue)
val builder = CarbonWriter.builder()
val writer =
- builder.outputPath(writerPath)
+ builder.outputPath(path)
.uniqueIdentifier(System.currentTimeMillis()).withBlockSize(2).sortBy(sortColumns)
.withCsvInput(new Schema(fields)).writtenBy("TestNonTransactionalCarbonTable").build()
- var i = 0
while (i < rows) {
writer
.write(Array[String]("true",
String.valueOf(i),
- String.valueOf(i),
+ binaryValue,
String.valueOf(i.toDouble / 2),
String.valueOf(i.toFloat / 2)))
i += 1
@@ -260,7 +282,7 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
test("test reading different schema") {
buildTestDataSingleFile()
- buildTestDataOtherDataType(3, null)
+ buildTestDataOtherDataType(3, null, writerPath)
val exception =
intercept[SQLException] {
val actualResult: List[Map[String, Any]] = prestoServer
@@ -271,6 +293,23 @@ class PrestoTestNonTransactionalTableFiles extends FunSuiteLike with BeforeAndAf
cleanTestData()
}
+ test("test reading binary") {
+ FileUtils.deleteDirectory(new File(writerPathBinary))
+ createTableBinary
+ buildTestDataOtherDataType(3, null, writerPathBinary)
+ val actualResult: List[Map[String, Any]] = prestoServer
+ .executeQuery("select id from files1 ")
+ assert(actualResult.size == 3)
+ // check the binary byte Array size, as original hex encoded image byte array size is 118198
+ assert(actualResult.head("id").asInstanceOf[Array[Byte]].length == 118198)
+ // validate some initial bytes
+ assert(actualResult.head("id").asInstanceOf[Array[Byte]](0) == 56)
+ assert(actualResult.head("id").asInstanceOf[Array[Byte]](1) == 57)
+ assert(actualResult.head("id").asInstanceOf[Array[Byte]](2) == 53)
+ assert(actualResult.head("id").asInstanceOf[Array[Byte]](3) == 48)
+ FileUtils.deleteDirectory(new File(writerPathBinary))
+ }
+
test("test reading without carbon index file") {
buildTestDataSingleFile()
deleteFile(writerPath, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)