You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2021/10/27 07:54:17 UTC
[hive] branch master updated: HIVE-25553: Support Map data-type
natively in Arrow format (Sruthi Mooriyathvariam,
reviewed by Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9ed1d1e HIVE-25553: Support Map data-type natively in Arrow format (Sruthi Mooriyathvariam, reviewed by Sankar Hariappan)
9ed1d1e is described below
commit 9ed1d1ed720196d25f6ad65c1964a8a6924ce9d6
Author: Sruthi Mooriyathvariam <wa...@gmail.com>
AuthorDate: Wed Oct 27 13:24:05 2021 +0530
HIVE-25553: Support Map data-type natively in Arrow format (Sruthi Mooriyathvariam, reviewed by Sankar Hariappan)
This covers the following sub-tasks:
HIVE-25554: Upgrade arrow version to 0.15
HIVE-25555: ArrowColumnarBatchSerDe should store map natively instead of converting to list
a. Upgrading arrow version to version 0.15.0 (where map data-type is supported)
b. Modifying ArrowColumnarBatchSerDe and corresponding Serializer/Deserializer to not use list as a workaround for map and use the arrow map data-type instead
c. Taking care of creating non-nullable struct and non-nullable key type for the map data-type in ArrowColumnarBatchSerDe
Signed-off-by: Sankar Hariappan <sa...@apache.org>
Closes (#2751)
---
data/files/datatypes.txt | 4 +-
.../org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java | 16 +++--
.../java/org/apache/hive/jdbc/TestJdbcDriver2.java | 8 +--
.../hive/jdbc/TestJdbcWithMiniLlapArrow.java | 83 ++++++++++++----------
.../hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java | 83 ++++++++++++----------
.../apache/hive/jdbc/cbo_rp_TestJdbcDriver2.java | 8 +--
pom.xml | 2 +-
.../hive/llap/WritableByteChannelAdapter.java | 2 +-
.../hive/ql/io/arrow/ArrowColumnarBatchSerDe.java | 17 ++---
.../apache/hadoop/hive/ql/io/arrow/Serializer.java | 42 +++++++----
.../hadoop/hive/ql/io/arrow/TestSerializer.java | 18 ++---
11 files changed, 160 insertions(+), 123 deletions(-)
diff --git a/data/files/datatypes.txt b/data/files/datatypes.txt
index 0872a1f..38f8d29 100644
--- a/data/files/datatypes.txt
+++ b/data/files/datatypes.txt
@@ -1,3 +1,3 @@
\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N
--1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N\N\N
-1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.123456YWJjZA==2013-01-01abc123abc123X'01FF'
+-1false-1.11\Nab\N\N\N-1-1-1.0-1110100\N\N\N\N\N\N\N\N\N
+1true1.11121x2ykvbca92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.123456YWJjZA==2013-01-01abc123abc123X'01FF'
\ No newline at end of file
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
index 20682ff..2ec3d48 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
@@ -499,10 +499,12 @@ public abstract class BaseJdbcWithMiniLlap {
assertEquals(0, c5Value.size());
Map<?,?> c6Value = (Map<?,?>) rowValues[5];
- assertEquals(0, c6Value.size());
+ assertEquals(1, c6Value.size());
+ assertEquals(null, c6Value.get(1));
Map<?,?> c7Value = (Map<?,?>) rowValues[6];
- assertEquals(0, c7Value.size());
+ assertEquals(1, c7Value.size());
+ assertEquals("b", c7Value.get("a"));
List<?> c8Value = (List<?>) rowValues[7];
assertEquals(null, c8Value.get(0));
@@ -518,7 +520,10 @@ public abstract class BaseJdbcWithMiniLlap {
assertEquals(0, c13Value.size());
Map<?,?> c14Value = (Map<?,?>) rowValues[13];
- assertEquals(0, c14Value.size());
+ assertEquals(1, c14Value.size());
+ Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
+ assertEquals(1, mapVal.size());
+ assertEquals(100, mapVal.get(Integer.valueOf(10)));
List<?> c15Value = (List<?>) rowValues[14];
assertEquals(null, c15Value.get(0));
@@ -553,8 +558,9 @@ public abstract class BaseJdbcWithMiniLlap {
assertEquals("y", c6Value.get(Integer.valueOf(2)));
c7Value = (Map<?,?>) rowValues[6];
- assertEquals(1, c7Value.size());
+ assertEquals(2, c7Value.size());
assertEquals("v", c7Value.get("k"));
+ assertEquals("c", c7Value.get("b"));
c8Value = (List<?>) rowValues[7];
assertEquals("a", c8Value.get(0));
@@ -577,7 +583,7 @@ public abstract class BaseJdbcWithMiniLlap {
c14Value = (Map<?,?>) rowValues[13];
assertEquals(2, c14Value.size());
- Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
+ mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
assertEquals(2, mapVal.size());
assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13)));
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index b2583c0..c26e832 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -889,15 +889,15 @@ public class TestJdbcDriver2 {
assertEquals(-1.1d, res.getDouble(3), floatCompareDelta);
assertEquals("", res.getString(4));
assertEquals("[]", res.getString(5));
- assertEquals("{}", res.getString(6));
- assertEquals("{}", res.getString(7));
+ assertEquals("{1:null}", res.getString(6));
+ assertEquals("{\"a\":\"b\"}", res.getString(7));
assertEquals("{\"r\":null,\"s\":null,\"t\":null}", res.getString(8));
assertEquals(-1, res.getByte(9));
assertEquals(-1, res.getShort(10));
assertEquals(-1.0f, res.getFloat(11), floatCompareDelta);
assertEquals(-1, res.getLong(12));
assertEquals("[]", res.getString(13));
- assertEquals("{}", res.getString(14));
+ assertEquals("{1:{10:100}}", res.getString(14));
assertEquals("{\"r\":null,\"s\":null}", res.getString(15));
assertEquals("[]", res.getString(16));
assertEquals(null, res.getString(17));
@@ -918,7 +918,7 @@ public class TestJdbcDriver2 {
assertEquals("1", res.getString(4));
assertEquals("[1,2]", res.getString(5));
assertEquals("{1:\"x\",2:\"y\"}", res.getString(6));
- assertEquals("{\"k\":\"v\"}", res.getString(7));
+ assertEquals("{\"k\":\"v\",\"b\":\"c\"}", res.getString(7));
assertEquals("{\"r\":\"a\",\"s\":9,\"t\":2.2}", res.getString(8));
assertEquals(1, res.getByte(9));
assertEquals(1, res.getShort(10));
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java
index 9d0ff2d..209c42b 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapArrow.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.type.Timestamp;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.hive.llap.FieldDesc;
import org.apache.hadoop.hive.llap.LlapBaseInputFormat;
import org.apache.hadoop.hive.llap.Row;
@@ -123,8 +124,6 @@ public class TestJdbcWithMiniLlapArrow extends BaseJdbcWithMiniLlap {
return new LlapArrowRowInputFormat(Long.MAX_VALUE);
}
- // Currently MAP type is not supported. Add it back when Arrow 1.0 is released.
- // See: SPARK-21187
@Test
@Override
public void testDataTypes() throws Exception {
@@ -184,11 +183,13 @@ public class TestJdbcWithMiniLlapArrow extends BaseJdbcWithMiniLlap {
List<?> c5Value = (List<?>) rowValues[4];
assertEquals(0, c5Value.size());
- //Map<?,?> c6Value = (Map<?,?>) rowValues[5];
- //assertEquals(0, c6Value.size());
+ Map<?,?> c6Value = (Map<?,?>) rowValues[5];
+ assertEquals(1, c6Value.size());
+ assertEquals(null, c6Value.get(1));
- //Map<?,?> c7Value = (Map<?,?>) rowValues[6];
- //assertEquals(0, c7Value.size());
+ Map<?,?> c7Value = (Map<?,?>) rowValues[6];
+ assertEquals(1, c7Value.size());
+ assertEquals("b", c7Value.get("a"));
List<?> c8Value = (List<?>) rowValues[7];
assertEquals(null, c8Value.get(0));
@@ -203,15 +204,18 @@ public class TestJdbcWithMiniLlapArrow extends BaseJdbcWithMiniLlap {
List<?> c13Value = (List<?>) rowValues[12];
assertEquals(0, c13Value.size());
- //Map<?,?> c14Value = (Map<?,?>) rowValues[13];
- //assertEquals(0, c14Value.size());
+ Map<?,?> c14Value = (Map<?,?>) rowValues[13];
+ assertEquals(1, c14Value.size());
+ Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
+ assertEquals(1, mapVal.size());
+ assertEquals(100, mapVal.get(Integer.valueOf(10)));
List<?> c15Value = (List<?>) rowValues[14];
assertEquals(null, c15Value.get(0));
assertEquals(null, c15Value.get(1));
- //List<?> c16Value = (List<?>) rowValues[15];
- //assertEquals(0, c16Value.size());
+ List<?> c16Value = (List<?>) rowValues[15];
+ assertEquals(0, c16Value.size());
assertEquals(null, rowValues[16]);
assertEquals(null, rowValues[17]);
@@ -233,14 +237,15 @@ public class TestJdbcWithMiniLlapArrow extends BaseJdbcWithMiniLlap {
assertEquals(Integer.valueOf(1), c5Value.get(0));
assertEquals(Integer.valueOf(2), c5Value.get(1));
- //c6Value = (Map<?,?>) rowValues[5];
- //assertEquals(2, c6Value.size());
- //assertEquals("x", c6Value.get(Integer.valueOf(1)));
- //assertEquals("y", c6Value.get(Integer.valueOf(2)));
+ c6Value = (Map<?,?>) rowValues[5];
+ assertEquals(2, c6Value.size());
+ assertEquals("x", c6Value.get(Integer.valueOf(1)));
+ assertEquals("y", c6Value.get(Integer.valueOf(2)));
- //c7Value = (Map<?,?>) rowValues[6];
- //assertEquals(1, c7Value.size());
- //assertEquals("v", c7Value.get("k"));
+ c7Value = (Map<?,?>) rowValues[6];
+ assertEquals(2, c7Value.size());
+ assertEquals("v", c7Value.get("k"));
+ assertEquals("c", c7Value.get("b"));
c8Value = (List<?>) rowValues[7];
assertEquals("a", c8Value.get(0));
@@ -261,15 +266,15 @@ public class TestJdbcWithMiniLlapArrow extends BaseJdbcWithMiniLlap {
assertEquals("c", listVal.get(0));
assertEquals("d", listVal.get(1));
- //c14Value = (Map<?,?>) rowValues[13];
- //assertEquals(2, c14Value.size());
- //Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
- //assertEquals(2, mapVal.size());
- //assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
- //assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13)));
- //mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(2));
- //assertEquals(1, mapVal.size());
- //assertEquals(Integer.valueOf(22), mapVal.get(Integer.valueOf(21)));
+ c14Value = (Map<?,?>) rowValues[13];
+ assertEquals(2, c14Value.size());
+ mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
+ assertEquals(2, mapVal.size());
+ assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
+ assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13)));
+ mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(2));
+ assertEquals(1, mapVal.size());
+ assertEquals(Integer.valueOf(22), mapVal.get(Integer.valueOf(21)));
c15Value = (List<?>) rowValues[14];
assertEquals(Integer.valueOf(1), c15Value.get(0));
@@ -278,19 +283,19 @@ public class TestJdbcWithMiniLlapArrow extends BaseJdbcWithMiniLlap {
assertEquals(Integer.valueOf(2), listVal.get(0));
assertEquals("x", listVal.get(1));
- //c16Value = (List<?>) rowValues[15];
- //assertEquals(2, c16Value.size());
- //listVal = (List<?>) c16Value.get(0);
- //assertEquals(2, listVal.size());
- //mapVal = (Map<?,?>) listVal.get(0);
- //assertEquals(0, mapVal.size());
- //assertEquals(Integer.valueOf(1), listVal.get(1));
- //listVal = (List<?>) c16Value.get(1);
- //mapVal = (Map<?,?>) listVal.get(0);
- //assertEquals(2, mapVal.size());
- //assertEquals("b", mapVal.get("a"));
- //assertEquals("d", mapVal.get("c"));
- //assertEquals(Integer.valueOf(2), listVal.get(1));
+ c16Value = (List<?>) rowValues[15];
+ assertEquals(2, c16Value.size());
+ listVal = (List<?>) c16Value.get(0);
+ assertEquals(2, listVal.size());
+ mapVal = (Map<?,?>) listVal.get(0);
+ assertEquals(0, mapVal.size());
+ assertEquals(Integer.valueOf(1), listVal.get(1));
+ listVal = (List<?>) c16Value.get(1);
+ mapVal = (Map<?,?>) listVal.get(0);
+ assertEquals(2, mapVal.size());
+ assertEquals("b", mapVal.get("a"));
+ assertEquals("d", mapVal.get("c"));
+ assertEquals(Integer.valueOf(2), listVal.get(1));
assertEquals(Timestamp.valueOf("2012-04-22 09:00:00.123456"), rowValues[16]);
assertEquals(new BigDecimal("123456789.123456"), rowValues[17]);
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java
index 9025703..f7cb406 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.type.Timestamp;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.hive.llap.FieldDesc;
import org.apache.hadoop.hive.llap.Row;
import org.apache.hadoop.io.NullWritable;
@@ -64,8 +65,6 @@ public class TestJdbcWithMiniLlapVectorArrow extends BaseJdbcWithMiniLlap {
return new LlapArrowRowInputFormat(Long.MAX_VALUE);
}
- // Currently MAP type is not supported. Add it back when Arrow 1.0 is released.
- // See: SPARK-21187
@Override
public void testDataTypes() throws Exception {
createDataTypesTable("datatypes");
@@ -124,11 +123,13 @@ public class TestJdbcWithMiniLlapVectorArrow extends BaseJdbcWithMiniLlap {
List<?> c5Value = (List<?>) rowValues[4];
assertEquals(0, c5Value.size());
- //Map<?,?> c6Value = (Map<?,?>) rowValues[5];
- //assertEquals(0, c6Value.size());
+ Map<?,?> c6Value = (Map<?,?>) rowValues[5];
+ assertEquals(1, c6Value.size());
+ assertEquals(null, c6Value.get(1));
- //Map<?,?> c7Value = (Map<?,?>) rowValues[6];
- //assertEquals(0, c7Value.size());
+ Map<?,?> c7Value = (Map<?,?>) rowValues[6];
+ assertEquals(1, c7Value.size());
+ assertEquals("b", c7Value.get("a"));
List<?> c8Value = (List<?>) rowValues[7];
assertEquals(null, c8Value.get(0));
@@ -143,15 +144,18 @@ public class TestJdbcWithMiniLlapVectorArrow extends BaseJdbcWithMiniLlap {
List<?> c13Value = (List<?>) rowValues[12];
assertEquals(0, c13Value.size());
- //Map<?,?> c14Value = (Map<?,?>) rowValues[13];
- //assertEquals(0, c14Value.size());
+ Map<?,?> c14Value = (Map<?,?>) rowValues[13];
+ assertEquals(1, c14Value.size());
+ Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
+ assertEquals(1, mapVal.size());
+ assertEquals(100, mapVal.get(Integer.valueOf(10)));
List<?> c15Value = (List<?>) rowValues[14];
assertEquals(null, c15Value.get(0));
assertEquals(null, c15Value.get(1));
- //List<?> c16Value = (List<?>) rowValues[15];
- //assertEquals(0, c16Value.size());
+ List<?> c16Value = (List<?>) rowValues[15];
+ assertEquals(0, c16Value.size());
assertEquals(null, rowValues[16]);
assertEquals(null, rowValues[17]);
@@ -173,14 +177,15 @@ public class TestJdbcWithMiniLlapVectorArrow extends BaseJdbcWithMiniLlap {
assertEquals(Integer.valueOf(1), c5Value.get(0));
assertEquals(Integer.valueOf(2), c5Value.get(1));
- //c6Value = (Map<?,?>) rowValues[5];
- //assertEquals(2, c6Value.size());
- //assertEquals("x", c6Value.get(Integer.valueOf(1)));
- //assertEquals("y", c6Value.get(Integer.valueOf(2)));
+ c6Value = (Map<?,?>) rowValues[5];
+ assertEquals(2, c6Value.size());
+ assertEquals("x", c6Value.get(Integer.valueOf(1)));
+ assertEquals("y", c6Value.get(Integer.valueOf(2)));
- //c7Value = (Map<?,?>) rowValues[6];
- //assertEquals(1, c7Value.size());
- //assertEquals("v", c7Value.get("k"));
+ c7Value = (Map<?,?>) rowValues[6];
+ assertEquals(2, c7Value.size());
+ assertEquals("v", c7Value.get("k"));
+ assertEquals("c", c7Value.get("b"));
c8Value = (List<?>) rowValues[7];
assertEquals("a", c8Value.get(0));
@@ -201,15 +206,15 @@ public class TestJdbcWithMiniLlapVectorArrow extends BaseJdbcWithMiniLlap {
assertEquals("c", listVal.get(0));
assertEquals("d", listVal.get(1));
- //c14Value = (Map<?,?>) rowValues[13];
- //assertEquals(2, c14Value.size());
- //Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
- //assertEquals(2, mapVal.size());
- //assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
- //assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13)));
- //mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(2));
- //assertEquals(1, mapVal.size());
- //assertEquals(Integer.valueOf(22), mapVal.get(Integer.valueOf(21)));
+ c14Value = (Map<?,?>) rowValues[13];
+ assertEquals(2, c14Value.size());
+ mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
+ assertEquals(2, mapVal.size());
+ assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
+ assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13)));
+ mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(2));
+ assertEquals(1, mapVal.size());
+ assertEquals(Integer.valueOf(22), mapVal.get(Integer.valueOf(21)));
c15Value = (List<?>) rowValues[14];
assertEquals(Integer.valueOf(1), c15Value.get(0));
@@ -218,19 +223,19 @@ public class TestJdbcWithMiniLlapVectorArrow extends BaseJdbcWithMiniLlap {
assertEquals(Integer.valueOf(2), listVal.get(0));
assertEquals("x", listVal.get(1));
- //c16Value = (List<?>) rowValues[15];
- //assertEquals(2, c16Value.size());
- //listVal = (List<?>) c16Value.get(0);
- //assertEquals(2, listVal.size());
- //mapVal = (Map<?,?>) listVal.get(0);
- //assertEquals(0, mapVal.size());
- //assertEquals(Integer.valueOf(1), listVal.get(1));
- //listVal = (List<?>) c16Value.get(1);
- //mapVal = (Map<?,?>) listVal.get(0);
- //assertEquals(2, mapVal.size());
- //assertEquals("b", mapVal.get("a"));
- //assertEquals("d", mapVal.get("c"));
- //assertEquals(Integer.valueOf(2), listVal.get(1));
+ c16Value = (List<?>) rowValues[15];
+ assertEquals(2, c16Value.size());
+ listVal = (List<?>) c16Value.get(0);
+ assertEquals(2, listVal.size());
+ mapVal = (Map<?,?>) listVal.get(0);
+ assertEquals(0, mapVal.size());
+ assertEquals(Integer.valueOf(1), listVal.get(1));
+ listVal = (List<?>) c16Value.get(1);
+ mapVal = (Map<?,?>) listVal.get(0);
+ assertEquals(2, mapVal.size());
+ assertEquals("b", mapVal.get("a"));
+ assertEquals("d", mapVal.get("c"));
+ assertEquals(Integer.valueOf(2), listVal.get(1));
assertEquals(Timestamp.valueOf("2012-04-22 09:00:00.123456"), rowValues[16]);
assertEquals(new BigDecimal("123456789.123456"), rowValues[17]);
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/cbo_rp_TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/cbo_rp_TestJdbcDriver2.java
index 386560f..b43a1b7 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/cbo_rp_TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/cbo_rp_TestJdbcDriver2.java
@@ -817,15 +817,15 @@ public class cbo_rp_TestJdbcDriver2 {
assertEquals(-1.1d, res.getDouble(3), floatCompareDelta);
assertEquals("", res.getString(4));
assertEquals("[]", res.getString(5));
- assertEquals("{}", res.getString(6));
- assertEquals("{}", res.getString(7));
+ assertEquals("{1:null}", res.getString(6));
+ assertEquals("{\"a\":\"b\"}", res.getString(7));
assertEquals("{\"r\":null,\"s\":null,\"t\":null}", res.getString(8));
assertEquals(-1, res.getByte(9));
assertEquals(-1, res.getShort(10));
assertEquals(-1.0f, res.getFloat(11), floatCompareDelta);
assertEquals(-1, res.getLong(12));
assertEquals("[]", res.getString(13));
- assertEquals("{}", res.getString(14));
+ assertEquals("{1:{10:100}}", res.getString(14));
assertEquals("{\"r\":null,\"s\":null}", res.getString(15));
assertEquals("[]", res.getString(16));
assertEquals(null, res.getString(17));
@@ -846,7 +846,7 @@ public class cbo_rp_TestJdbcDriver2 {
assertEquals("1", res.getString(4));
assertEquals("[1,2]", res.getString(5));
assertEquals("{1:\"x\",2:\"y\"}", res.getString(6));
- assertEquals("{\"k\":\"v\"}", res.getString(7));
+ assertEquals("{\"k\":\"v\",\"b\":\"c\"}", res.getString(7));
assertEquals("{\"r\":\"a\",\"s\":9,\"t\":2.2}", res.getString(8));
assertEquals(1, res.getByte(9));
assertEquals(1, res.getShort(10));
diff --git a/pom.xml b/pom.xml
index 47c5d53..23d24c0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -111,7 +111,7 @@
<antlr.version>3.5.2</antlr.version>
<apache-directory-server.version>1.5.7</apache-directory-server.version>
<!-- Include arrow for LlapOutputFormatService -->
- <arrow.version>0.11.0</arrow.version>
+ <arrow.version>0.15.1</arrow.version>
<avatica.version>1.12.0</avatica.version>
<avro.version>1.8.2</avro.version>
<bcprov-jdk15on.version>1.64</bcprov-jdk15on.version>
diff --git a/ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java b/ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java
index b07ce5b..0bb06f5 100644
--- a/ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java
+++ b/ql/src/java/org/apache/hadoop/hive/llap/WritableByteChannelAdapter.java
@@ -93,7 +93,7 @@ public class WritableByteChannelAdapter implements WritableByteChannel {
int size = src.remaining();
//Down the semaphore or block until available
takeWriteResources(1);
- ByteBuf buf = allocator.buffer(size);
+ ByteBuf buf = allocator.getAsByteBufAllocator().buffer(size);
buf.writeBytes(src);
chc.writeAndFlush(buf).addListener(writeListener);
return size;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
index 2ecf4d9..fdef3b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
@@ -184,14 +184,14 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe {
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
final TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
- final StructTypeInfo mapStructTypeInfo = new StructTypeInfo();
- mapStructTypeInfo.setAllStructFieldNames(Lists.newArrayList("keys", "values"));
- mapStructTypeInfo.setAllStructFieldTypeInfos(
- Lists.newArrayList(keyTypeInfo, valueTypeInfo));
- final ListTypeInfo mapListStructTypeInfo = new ListTypeInfo();
- mapListStructTypeInfo.setListElementTypeInfo(mapStructTypeInfo);
- return toField(name, mapListStructTypeInfo);
+ final List<Field> mapFields = Lists.newArrayList();
+ mapFields.add(toField(name+"_keys", keyTypeInfo));
+ mapFields.add(toField(name+"_values", valueTypeInfo));
+
+ FieldType struct = new FieldType(false, new ArrowType.Struct(), null);
+ List<Field> childrenOfList = Lists.newArrayList(new Field(name, struct, mapFields));
+ return new Field(name, FieldType.nullable(MinorType.LIST.getType()), childrenOfList);
default:
throw new IllegalArgumentException();
}
@@ -199,7 +199,7 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe {
static ListTypeInfo toStructListTypeInfo(MapTypeInfo mapTypeInfo) {
final StructTypeInfo structTypeInfo = new StructTypeInfo();
- structTypeInfo.setAllStructFieldNames(Lists.newArrayList("keys", "values"));
+ structTypeInfo.setAllStructFieldNames(Lists.newArrayList("key", "value"));
structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList(
mapTypeInfo.getMapKeyTypeInfo(), mapTypeInfo.getMapValueTypeInfo()));
final ListTypeInfo structListTypeInfo = new ListTypeInfo();
@@ -217,6 +217,7 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe {
structListVector.childCount = mapVector.childCount;
structListVector.isRepeating = mapVector.isRepeating;
structListVector.noNulls = mapVector.noNulls;
+ System.arraycopy(mapVector.isNull, 0, structListVector.isNull, 0, mapVector.childCount);
System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, mapVector.childCount);
System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, mapVector.childCount);
return structListVector;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index 96812d3..c00885e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -276,7 +276,7 @@ public class Serializer {
case STRUCT:
return ArrowType.Struct.INSTANCE;
case MAP:
- return ArrowType.List.INSTANCE;
+ return new ArrowType.Map(false);
case UNION:
default:
throw new IllegalArgumentException();
@@ -290,10 +290,14 @@ public class Serializer {
writePrimitive(arrowVector, hiveVector, typeInfo, size, vectorizedRowBatch, isNative);
break;
case LIST:
- writeList((ListVector) arrowVector, (ListColumnVector) hiveVector, (ListTypeInfo) typeInfo, size, vectorizedRowBatch, isNative);
+ // the flag 'isMapDataType'=false, for all the list types except for the case when map is converted
+ // as a list of structs.
+ writeList((ListVector) arrowVector, (ListColumnVector) hiveVector, (ListTypeInfo) typeInfo, size, vectorizedRowBatch, isNative, false);
break;
case STRUCT:
- writeStruct((NonNullableStructVector) arrowVector, (StructColumnVector) hiveVector, (StructTypeInfo) typeInfo, size, vectorizedRowBatch, isNative);
+ // the flag 'isMapDataType'=false, for all the struct types except for the case when map is converted
+ // as a list of structs.
+ writeStruct((NonNullableStructVector) arrowVector, (StructColumnVector) hiveVector, (StructTypeInfo) typeInfo, size, vectorizedRowBatch, isNative, false);
break;
case UNION:
writeUnion(arrowVector, hiveVector, typeInfo, size, vectorizedRowBatch, isNative);
@@ -311,7 +315,8 @@ public class Serializer {
final ListTypeInfo structListTypeInfo = toStructListTypeInfo(typeInfo);
final ListColumnVector structListVector = hiveVector == null ? null : toStructListVector(hiveVector);
- write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative);
+ // Map is converted as a list of structs and thus we call the writeList() method with the flag 'isMapDataType'=true
+ writeList(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative, true);
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
int selectedIndex = rowIndex;
@@ -341,12 +346,11 @@ public class Serializer {
}
private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector,
- StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
+ StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
final List<String> fieldNames = typeInfo.getAllStructFieldNames();
final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
final ColumnVector[] hiveFieldVectors = hiveVector == null ? null : hiveVector.fields;
final int fieldSize = fieldTypeInfos.size();
-
// This is to handle following scenario -
// if any struct value itself is NULL, we get structVector.isNull[i]=true
// but we don't get the same for it's child fields which later causes exceptions while setting to arrow vectors
@@ -366,9 +370,12 @@ public class Serializer {
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex);
final ColumnVector hiveFieldVector = hiveVector == null ? null : hiveFieldVectors[fieldIndex];
final String fieldName = fieldNames.get(fieldIndex);
- final FieldVector arrowFieldVector =
- arrowVector.addOrGet(fieldName,
- toFieldType(fieldTypeInfos.get(fieldIndex)), FieldVector.class);
+
+ // If the call is coming from writeMap(), then the structs within the list type should be non-nullable.
+ FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(fieldTypeInfo), null))
+ : (toFieldType(fieldTypeInfos.get(fieldIndex)));
+ final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, elementFieldType, FieldVector.class);
+
arrowFieldVector.setInitialCapacity(size);
arrowFieldVector.allocateNew();
write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
@@ -421,12 +428,17 @@ public class Serializer {
}
private void writeList(ListVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo, int size,
- VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
+ VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
final int OFFSET_WIDTH = 4;
final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo();
final ColumnVector hiveElementVector = hiveVector == null ? null : hiveVector.child;
+
+ // If the call is coming from writeMap(), then the List type should be non-nullable.
+ FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(elementTypeInfo), null))
+ : (toFieldType(elementTypeInfo));
+
final FieldVector arrowElementVector =
- (FieldVector) arrowVector.addOrGetVector(toFieldType(elementTypeInfo)).getVector();
+ (FieldVector) arrowVector.addOrGetVector(elementFieldType).getVector();
VectorizedRowBatch correctedVrb = vectorizedRowBatch;
int correctedSize = hiveVector == null ? 0 : hiveVector.childCount;
@@ -437,7 +449,13 @@ public class Serializer {
arrowElementVector.setInitialCapacity(correctedSize);
arrowElementVector.allocateNew();
- write(arrowElementVector, hiveElementVector, elementTypeInfo, correctedSize, correctedVrb, isNative);
+ // If the flag 'isMapDataType' is set to True, it means that the call is coming from writeMap() and it has to call
+ // writeStruct() with the same flag value, as the map is converted as a list of structs.
+ if (isMapDataType) {
+ writeStruct((NonNullableStructVector) arrowElementVector, (StructColumnVector) hiveElementVector, (StructTypeInfo) elementTypeInfo, correctedSize, correctedVrb, isNative, isMapDataType);
+ } else {
+ write(arrowElementVector, hiveElementVector, elementTypeInfo, correctedSize, correctedVrb, isNative);
+ }
int nextOffset = 0;
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestSerializer.java b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestSerializer.java
index aed958b..f8daada 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestSerializer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestSerializer.java
@@ -53,7 +53,7 @@ public class TestSerializer {
List<String> fieldNames = Arrays.asList(new String[] { "a" });
Serializer converter = new Serializer(new HiveConf(), "attemptId", typeInfos, fieldNames);
ArrowWrapperWritable writable = converter.emptyBatch();
- Assert.assertEquals("Schema<a: List<$data$: Struct<keys: Utf8, values: Utf8>>>",
+ Assert.assertEquals("Schema<a: Map(false)<entries: Struct<key: Utf8 not null, value: Utf8 not null> not null>>",
writable.getVectorSchemaRoot().getSchema().toString());
}
@@ -65,8 +65,9 @@ public class TestSerializer {
Serializer converter = new Serializer(new HiveConf(), "attemptId", typeInfos, fieldNames);
ArrowWrapperWritable writable = converter.emptyBatch();
Assert.assertEquals(
- "Schema<a: Struct<b: List<$data$: Int(8, true)>, c: List<$data$: Struct<keys: Utf8, values: Utf8>>, " +
- "d: Struct<e: List<$data$: Int(8, true)>, f: List<$data$: Struct<keys: Utf8, values: Utf8>>>>>",
+ "Schema<a: Struct<b: List<$data$: Int(8, true)>, c: Map(false)<entries: Struct<key: Utf8 not null, " +
+ "value: Utf8 not null> not null>, d: Struct<e: List<$data$: Int(8, true)>, f: Map(false)<entries: " +
+ "Struct<key: Utf8 not null, value: Utf8 not null> not null>>>>",
writable.getVectorSchemaRoot().getSchema().toString());
}
@@ -78,8 +79,9 @@ public class TestSerializer {
Serializer converter = new Serializer(new HiveConf(), "attemptId", typeInfos, fieldNames);
ArrowWrapperWritable writable = converter.emptyBatch();
Assert.assertEquals(
- "Schema<a: List<$data$: Struct<keys: List<$data$: Int(8, true)>, values: " +
- "Struct<b: List<$data$: Int(8, true)>, c: List<$data$: Struct<keys: Utf8, values: Utf8>>>>>>",
+ "Schema<a: Map(false)<entries: Struct<key: List<$data$: Int(8, true)> not null, value: Struct<b: " +
+ "List<$data$: Int(8, true)>, c: Map(false)<entries: Struct<key: Utf8 not null, value: Utf8 not null> " +
+ "not null>> not null> not null>>",
writable.getVectorSchemaRoot().getSchema().toString());
}
@@ -91,9 +93,9 @@ public class TestSerializer {
Serializer converter = new Serializer(new HiveConf(), "attemptId", typeInfos, fieldNames);
ArrowWrapperWritable writable = converter.emptyBatch();
Assert.assertEquals(
- "Schema<a: Struct<b: List<$data$: List<$data$: Int(8, true)>>, c: List<$data$: List<$data$: " +
- "Struct<keys: Utf8, values: Utf8>>>, d: List<$data$: Struct<e: List<$data$: Int(8, true)>, " +
- "f: List<$data$: Struct<keys: Utf8, values: Utf8>>>>>>",
+ "Schema<a: Struct<b: List<$data$: List<$data$: Int(8, true)>>, c: List<$data$: Map(false)<entries: " +
+ "Struct<key: Utf8 not null, value: Utf8 not null> not null>>, d: List<$data$: Struct<e: List<$data$: " +
+ "Int(8, true)>, f: Map(false)<entries: Struct<key: Utf8 not null, value: Utf8 not null> not null>>>>>",
writable.getVectorSchemaRoot().getSchema().toString());
}
}