You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/06/21 21:37:18 UTC
hive git commit: HIVE-19899: Support stored as JsonFile (Aihua Xu,
reviewed by Yongzhi Chen, BELUGA BEHR)
Repository: hive
Updated Branches:
refs/heads/master 6adab1c2a -> 24e16cc57
HIVE-19899: Support stored as JsonFile (Aihua Xu, reviewed by Yongzhi Chen, BELUGA BEHR)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24e16cc5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24e16cc5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24e16cc5
Branch: refs/heads/master
Commit: 24e16cc57293ea6771cd55009f8cfd29870a39ee
Parents: 6adab1c
Author: Aihua Xu <ai...@apache.org>
Authored: Thu Jun 14 13:35:49 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Thu Jun 21 14:36:07 2018 -0700
----------------------------------------------------------------------
.../hcatalog/pig/AbstractHCatStorerTest.java | 2 +-
.../pig/TestHCatLoaderComplexSchema.java | 3 ++
.../hive/hcatalog/pig/TestHCatStorer.java | 4 +-
.../apache/hadoop/hive/ql/io/IOConstants.java | 1 +
.../ql/io/JsonFileStorageFormatDescriptor.java | 51 ++++++++++++++++++++
...he.hadoop.hive.ql.io.StorageFormatDescriptor | 1 +
.../hive/ql/io/TestStorageFormatDescriptor.java | 3 ++
.../test/queries/clientpositive/json_serde1.q | 9 ++--
.../results/clientpositive/json_serde1.q.out | 44 ++++++++++++++++-
9 files changed, 109 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
index 97277b5..a5cf3a5 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
@@ -54,7 +54,7 @@ import org.slf4j.LoggerFactory;
public abstract class AbstractHCatStorerTest extends HCatBaseTest {
static Logger LOG = LoggerFactory.getLogger(AbstractHCatStorerTest.class);
static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data";
- String storageFormat;
+ protected String storageFormat;
public AbstractHCatStorerTest() {
storageFormat = getStorageFormat();
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
index 8f06d39..37e670c 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
@@ -75,6 +75,9 @@ public class TestHCatLoaderComplexSchema {
put(IOConstants.PARQUETFILE, new HashSet<String>() {{
add("testMapNullKey");
}});
+ put(IOConstants.JSONFILE, new HashSet<String>() {{
+ add("testMapNullKey");
+ }});
}};
private String storageFormat;
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
index 477ea66..cb02139 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
@@ -86,8 +86,6 @@ public class TestHCatStorer extends AbstractHCatStorerTest {
}
};
- private String storageFormat;
-
@Parameterized.Parameters
public static Collection<Object[]> generateParameters() {
return StorageFormats.names();
@@ -99,7 +97,7 @@ public class TestHCatStorer extends AbstractHCatStorerTest {
@Override
String getStorageFormat() {
- return null;
+ return this.storageFormat;
}
@Test
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
index f60d296..2be864e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
@@ -35,6 +35,7 @@ public final class IOConstants {
public static final String PARQUETFILE = "PARQUETFILE";
public static final String AVRO = "AVRO";
public static final String AVROFILE = "AVROFILE";
+ public static final String JSONFILE = "JSONFILE";
/**
* The desired TABLE column names and types for input format schema evolution.
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
new file mode 100644
index 0000000..00c6178
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.util.Set;
+
+import org.apache.hadoop.hive.serde2.JsonSerDe;
+
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * A storage format descriptor class to support "STORED AS JSONFILE" syntax.
+ *
+ */
+public class JsonFileStorageFormatDescriptor extends AbstractStorageFormatDescriptor {
+ @Override
+ public Set<String> getNames() {
+ return ImmutableSet.of(IOConstants.JSONFILE);
+ }
+
+ @Override
+ public String getInputFormat() {
+ return IOConstants.TEXTFILE_INPUT;
+ }
+
+ @Override
+ public String getOutputFormat() {
+ return IOConstants.TEXTFILE_OUTPUT;
+ }
+
+ @Override
+ public String getSerde() {
+ return JsonSerDe.class.getName();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
----------------------------------------------------------------------
diff --git a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
index d858a95..c28a302 100644
--- a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
+++ b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
@@ -4,3 +4,4 @@ org.apache.hadoop.hive.ql.io.RCFileStorageFormatDescriptor
org.apache.hadoop.hive.ql.io.ORCFileStorageFormatDescriptor
org.apache.hadoop.hive.ql.io.ParquetFileStorageFormatDescriptor
org.apache.hadoop.hive.ql.io.AvroStorageFormatDescriptor
+org.apache.hadoop.hive.ql.io.JsonFileStorageFormatDescriptor
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
index 72acaad..86d3703 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
@@ -40,5 +40,8 @@ public class TestStorageFormatDescriptor {
(new ParquetFileStorageFormatDescriptor()).getNames());
Assert.assertEquals(Sets.newHashSet(IOConstants.AVRO, IOConstants.AVROFILE),
(new AvroStorageFormatDescriptor()).getNames());
+ Assert.assertEquals(Sets.newHashSet(IOConstants.JSONFILE),
+ (new JsonFileStorageFormatDescriptor()).getNames());
+
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/queries/clientpositive/json_serde1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/json_serde1.q b/ql/src/test/queries/clientpositive/json_serde1.q
index b805925..fcbf1c0 100644
--- a/ql/src/test/queries/clientpositive/json_serde1.q
+++ b/ql/src/test/queries/clientpositive/json_serde1.q
@@ -1,9 +1,8 @@
--! qt:dataset:src
-add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
-
drop table if exists json_serde1_1;
drop table if exists json_serde1_2;
+drop table if exists json_serde1_3;
create table json_serde1_1 (a array<string>,b map<string,int>)
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
@@ -17,7 +16,7 @@ create table json_serde1_2 (
a array<int>,
b map<int,date>,
c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
insert into table json_serde1_2
select
@@ -33,5 +32,9 @@ insert into table json_serde1_2
select * from json_serde1_2;
+create table json_serde1_3 (c1 int, c2 string) stored as jsonfile;
+show create table json_serde1_3;
+
drop table json_serde1_1;
drop table json_serde1_2;
+drop table json_serde1_3;
http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/results/clientpositive/json_serde1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/json_serde1.q.out b/ql/src/test/results/clientpositive/json_serde1.q.out
index e14d674..341a494 100644
--- a/ql/src/test/results/clientpositive/json_serde1.q.out
+++ b/ql/src/test/results/clientpositive/json_serde1.q.out
@@ -6,6 +6,10 @@ PREHOOK: query: drop table if exists json_serde1_2
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table if exists json_serde1_2
POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists json_serde1_3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists json_serde1_3
+POSTHOOK: type: DROPTABLE
PREHOOK: query: create table json_serde1_1 (a array<string>,b map<string,int>)
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
PREHOOK: type: CREATETABLE
@@ -42,7 +46,7 @@ PREHOOK: query: create table json_serde1_2 (
a array<int>,
b map<int,date>,
c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde1_2
@@ -50,7 +54,7 @@ POSTHOOK: query: create table json_serde1_2 (
a array<int>,
b map<int,date>,
c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde1_2
@@ -95,6 +99,34 @@ POSTHOOK: Input: default@json_serde1_2
#### A masked pattern was here ####
[3,2,1] {1:"2001-01-01",2:null} {"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}}
[3,2,1] {1:"2001-01-01",2:null} {"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}}
+PREHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde1_3
+POSTHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde1_3
+PREHOOK: query: show create table json_serde1_3
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@json_serde1_3
+POSTHOOK: query: show create table json_serde1_3
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@json_serde1_3
+CREATE TABLE `json_serde1_3`(
+ `c1` int COMMENT 'from deserializer',
+ `c2` string COMMENT 'from deserializer')
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.JsonSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+ 'bucketing_version'='2',
+#### A masked pattern was here ####
PREHOOK: query: drop table json_serde1_1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@json_serde1_1
@@ -111,3 +143,11 @@ POSTHOOK: query: drop table json_serde1_2
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@json_serde1_2
POSTHOOK: Output: default@json_serde1_2
+PREHOOK: query: drop table json_serde1_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde1_3
+PREHOOK: Output: default@json_serde1_3
+POSTHOOK: query: drop table json_serde1_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde1_3
+POSTHOOK: Output: default@json_serde1_3