You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/06/21 21:37:18 UTC

hive git commit: HIVE-19899: Support stored as JsonFile (Aihua Xu, reviewed by Yongzhi Chen, BELUGA BEHR)

Repository: hive
Updated Branches:
  refs/heads/master 6adab1c2a -> 24e16cc57


HIVE-19899: Support stored as JsonFile (Aihua Xu, reviewed by Yongzhi Chen, BELUGA BEHR)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24e16cc5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24e16cc5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24e16cc5

Branch: refs/heads/master
Commit: 24e16cc57293ea6771cd55009f8cfd29870a39ee
Parents: 6adab1c
Author: Aihua Xu <ai...@apache.org>
Authored: Thu Jun 14 13:35:49 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Thu Jun 21 14:36:07 2018 -0700

----------------------------------------------------------------------
 .../hcatalog/pig/AbstractHCatStorerTest.java    |  2 +-
 .../pig/TestHCatLoaderComplexSchema.java        |  3 ++
 .../hive/hcatalog/pig/TestHCatStorer.java       |  4 +-
 .../apache/hadoop/hive/ql/io/IOConstants.java   |  1 +
 .../ql/io/JsonFileStorageFormatDescriptor.java  | 51 ++++++++++++++++++++
 ...he.hadoop.hive.ql.io.StorageFormatDescriptor |  1 +
 .../hive/ql/io/TestStorageFormatDescriptor.java |  3 ++
 .../test/queries/clientpositive/json_serde1.q   |  9 ++--
 .../results/clientpositive/json_serde1.q.out    | 44 ++++++++++++++++-
 9 files changed, 109 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
index 97277b5..a5cf3a5 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
@@ -54,7 +54,7 @@ import org.slf4j.LoggerFactory;
 public abstract class AbstractHCatStorerTest extends HCatBaseTest {
   static Logger LOG = LoggerFactory.getLogger(AbstractHCatStorerTest.class);
   static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data";
-  String storageFormat;
+  protected String storageFormat;
 
   public AbstractHCatStorerTest() {
     storageFormat = getStorageFormat();

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
index 8f06d39..37e670c 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
@@ -75,6 +75,9 @@ public class TestHCatLoaderComplexSchema {
         put(IOConstants.PARQUETFILE, new HashSet<String>() {{
           add("testMapNullKey");
         }});
+        put(IOConstants.JSONFILE, new HashSet<String>() {{
+          add("testMapNullKey");
+        }});
       }};
 
   private String storageFormat;

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
index 477ea66..cb02139 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
@@ -86,8 +86,6 @@ public class TestHCatStorer extends AbstractHCatStorerTest {
         }
       };
 
-  private String storageFormat;
-
   @Parameterized.Parameters
   public static Collection<Object[]> generateParameters() {
     return StorageFormats.names();
@@ -99,7 +97,7 @@ public class TestHCatStorer extends AbstractHCatStorerTest {
 
   @Override
   String getStorageFormat() {
-    return null;
+    return this.storageFormat;
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
index f60d296..2be864e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
@@ -35,6 +35,7 @@ public final class IOConstants {
   public static final String PARQUETFILE = "PARQUETFILE";
   public static final String AVRO = "AVRO";
   public static final String AVROFILE = "AVROFILE";
+  public static final String JSONFILE = "JSONFILE";
 
   /**
    * The desired TABLE column names and types for input format schema evolution.

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
new file mode 100644
index 0000000..00c6178
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.util.Set;
+
+import org.apache.hadoop.hive.serde2.JsonSerDe;
+
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * A storage format descriptor class to support "STORED AS JSONFILE" syntax.
+ *
+ */
+public class JsonFileStorageFormatDescriptor extends AbstractStorageFormatDescriptor {
+  @Override
+  public Set<String> getNames() {
+    return ImmutableSet.of(IOConstants.JSONFILE);
+  }
+
+  @Override
+  public String getInputFormat() {
+    return IOConstants.TEXTFILE_INPUT;
+  }
+
+  @Override
+  public String getOutputFormat() {
+    return IOConstants.TEXTFILE_OUTPUT;
+  }
+
+  @Override
+  public String getSerde() {
+    return JsonSerDe.class.getName();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
----------------------------------------------------------------------
diff --git a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
index d858a95..c28a302 100644
--- a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
+++ b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
@@ -4,3 +4,4 @@ org.apache.hadoop.hive.ql.io.RCFileStorageFormatDescriptor
 org.apache.hadoop.hive.ql.io.ORCFileStorageFormatDescriptor
 org.apache.hadoop.hive.ql.io.ParquetFileStorageFormatDescriptor
 org.apache.hadoop.hive.ql.io.AvroStorageFormatDescriptor
+org.apache.hadoop.hive.ql.io.JsonFileStorageFormatDescriptor
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
index 72acaad..86d3703 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
@@ -40,5 +40,8 @@ public class TestStorageFormatDescriptor {
         (new ParquetFileStorageFormatDescriptor()).getNames());
     Assert.assertEquals(Sets.newHashSet(IOConstants.AVRO, IOConstants.AVROFILE),
       (new AvroStorageFormatDescriptor()).getNames());
+    Assert.assertEquals(Sets.newHashSet(IOConstants.JSONFILE),
+        (new JsonFileStorageFormatDescriptor()).getNames());
+
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/queries/clientpositive/json_serde1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/json_serde1.q b/ql/src/test/queries/clientpositive/json_serde1.q
index b805925..fcbf1c0 100644
--- a/ql/src/test/queries/clientpositive/json_serde1.q
+++ b/ql/src/test/queries/clientpositive/json_serde1.q
@@ -1,9 +1,8 @@
 --! qt:dataset:src
 
-add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
-
 drop table if exists json_serde1_1;
 drop table if exists json_serde1_2;
+drop table if exists json_serde1_3;
 
 create table json_serde1_1 (a array<string>,b map<string,int>)
   row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
@@ -17,7 +16,7 @@ create table json_serde1_2 (
   a array<int>,
   b map<int,date>,
   c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
 
 insert into table json_serde1_2
   select
@@ -33,5 +32,9 @@ insert into table json_serde1_2
 
 select * from json_serde1_2;
 
+create table json_serde1_3 (c1 int, c2 string) stored as jsonfile;
+show create table json_serde1_3;
+
 drop table json_serde1_1;
 drop table json_serde1_2;
+drop table json_serde1_3;

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/results/clientpositive/json_serde1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/json_serde1.q.out b/ql/src/test/results/clientpositive/json_serde1.q.out
index e14d674..341a494 100644
--- a/ql/src/test/results/clientpositive/json_serde1.q.out
+++ b/ql/src/test/results/clientpositive/json_serde1.q.out
@@ -6,6 +6,10 @@ PREHOOK: query: drop table if exists json_serde1_2
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table if exists json_serde1_2
 POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists json_serde1_3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists json_serde1_3
+POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table json_serde1_1 (a array<string>,b map<string,int>)
   row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
 PREHOOK: type: CREATETABLE
@@ -42,7 +46,7 @@ PREHOOK: query: create table json_serde1_2 (
   a array<int>,
   b map<int,date>,
   c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@json_serde1_2
@@ -50,7 +54,7 @@ POSTHOOK: query: create table json_serde1_2 (
   a array<int>,
   b map<int,date>,
   c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@json_serde1_2
@@ -95,6 +99,34 @@ POSTHOOK: Input: default@json_serde1_2
 #### A masked pattern was here ####
 [3,2,1]	{1:"2001-01-01",2:null}	{"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}}
 [3,2,1]	{1:"2001-01-01",2:null}	{"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}}
+PREHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde1_3
+POSTHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde1_3
+PREHOOK: query: show create table json_serde1_3
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@json_serde1_3
+POSTHOOK: query: show create table json_serde1_3
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@json_serde1_3
+CREATE TABLE `json_serde1_3`(
+  `c1` int COMMENT 'from deserializer', 
+  `c2` string COMMENT 'from deserializer')
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.JsonSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+#### A masked pattern was here ####
 PREHOOK: query: drop table json_serde1_1
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@json_serde1_1
@@ -111,3 +143,11 @@ POSTHOOK: query: drop table json_serde1_2
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@json_serde1_2
 POSTHOOK: Output: default@json_serde1_2
+PREHOOK: query: drop table json_serde1_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde1_3
+PREHOOK: Output: default@json_serde1_3
+POSTHOOK: query: drop table json_serde1_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde1_3
+POSTHOOK: Output: default@json_serde1_3