You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/25 19:18:47 UTC

hive git commit: HIVE-11827: STORED AS AVRO fails SELECT COUNT(*) when empty (Yongzhi via Xuefu)

Repository: hive
Updated Branches:
  refs/heads/branch-1 e76295a90 -> 6fa9b5b71


HIVE-11827: STORED AS AVRO fails SELECT COUNT(*) when empty (Yongzhi via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6fa9b5b7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6fa9b5b7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6fa9b5b7

Branch: refs/heads/branch-1
Commit: 6fa9b5b71008fcecd6a2d755697aa1e360275c3c
Parents: e76295a
Author: Xuefu Zhang <xz...@Cloudera.com>
Authored: Fri Sep 25 10:18:28 2015 -0700
Committer: Xuefu Zhang <xz...@Cloudera.com>
Committed: Fri Sep 25 10:18:28 2015 -0700

----------------------------------------------------------------------
 .../queries/clientpositive/avrocountemptytbl.q  |  8 +++
 .../clientpositive/avrocountemptytbl.q.out      | 58 ++++++++++++++++++++
 .../hadoop/hive/serde2/avro/AvroSerdeUtils.java | 24 +++++++-
 3 files changed, 89 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6fa9b5b7/ql/src/test/queries/clientpositive/avrocountemptytbl.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avrocountemptytbl.q b/ql/src/test/queries/clientpositive/avrocountemptytbl.q
new file mode 100644
index 0000000..9ecfb05
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avrocountemptytbl.q
@@ -0,0 +1,8 @@
+drop table if exists emptyavro;
+create table emptyavro (a int) stored as avro;
+select count(*) from emptyavro;
+insert into emptyavro select count(*) from emptyavro;
+select count(*) from emptyavro;
+insert into emptyavro select key from src where key = 100 limit 1;
+select * from emptyavro;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/6fa9b5b7/ql/src/test/results/clientpositive/avrocountemptytbl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avrocountemptytbl.q.out b/ql/src/test/results/clientpositive/avrocountemptytbl.q.out
new file mode 100644
index 0000000..72e8489
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avrocountemptytbl.q.out
@@ -0,0 +1,58 @@
+PREHOOK: query: drop table if exists emptyavro
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists emptyavro
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table emptyavro (a int) stored as avro
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emptyavro
+POSTHOOK: query: create table emptyavro (a int) stored as avro
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emptyavro
+PREHOOK: query: select count(*) from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+0
+PREHOOK: query: insert into emptyavro select count(*) from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+PREHOOK: Output: default@emptyavro
+POSTHOOK: query: insert into emptyavro select count(*) from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+POSTHOOK: Output: default@emptyavro
+POSTHOOK: Lineage: emptyavro.a EXPRESSION [(emptyavro)emptyavro.null, ]
+PREHOOK: query: select count(*) from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+1
+PREHOOK: query: insert into emptyavro select key from src where key = 100 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@emptyavro
+POSTHOOK: query: insert into emptyavro select key from src where key = 100 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@emptyavro
+POSTHOOK: Lineage: emptyavro.a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select * from emptyavro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emptyavro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emptyavro
+#### A masked pattern was here ####
+0
+100

http://git-wip-us.apache.org/repos/asf/hive/blob/6fa9b5b7/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
index 4edf654..903ac95 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
@@ -27,6 +27,9 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.mapred.JobConf;
 
 import java.io.File;
@@ -38,6 +41,7 @@ import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.Buffer;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Properties;
 
@@ -105,8 +109,26 @@ public class AvroSerdeUtils {
 
     // Try pulling directly from URL
     schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName());
-    if(schemaString == null || schemaString.equals(SCHEMA_NONE))
+    if (schemaString == null) {
+      final String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS);
+      final String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
+      final String columnCommentProperty = properties.getProperty(AvroSerDe.LIST_COLUMN_COMMENTS);
+      if (columnNameProperty == null || columnNameProperty.isEmpty()
+        || columnTypeProperty == null || columnTypeProperty.isEmpty() ) {
+        throw new AvroSerdeException(EXCEPTION_MESSAGE);
+      }
+      // Get column names and types
+      List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
+      List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+
+      Schema schema = AvroSerDe.getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty);
+      properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
+      if (conf != null)
+        conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
+      return schema;
+    } else if(schemaString.equals(SCHEMA_NONE)) {
       throw new AvroSerdeException(EXCEPTION_MESSAGE);
+    }
 
     try {
       Schema s = getSchemaFromFS(schemaString, conf);