You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mi...@apache.org on 2017/08/15 22:50:58 UTC
hive git commit: HIVE-17181: HCatOutputFormat should expose complete
output-schema (including partition-keys) for dynamic-partitioning MR jobs
(Mithun Radhakrishnan, reviewed by Thejas M Nair)
Repository: hive
Updated Branches:
refs/heads/master 683543011 -> 0b81d5d81
HIVE-17181: HCatOutputFormat should expose complete output-schema (including partition-keys) for dynamic-partitioning MR jobs (Mithun Radhakrishnan, reviewed by Thejas M Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b81d5d8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b81d5d8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b81d5d8
Branch: refs/heads/master
Commit: 0b81d5d81add5a5a6764ef57809c7b7a914486e2
Parents: 6835430
Author: Mithun RK <mi...@apache.org>
Authored: Fri Aug 4 15:38:03 2017 -0700
Committer: Mithun RK <mi...@apache.org>
Committed: Tue Aug 15 15:50:03 2017 -0700
----------------------------------------------------------------------
.../mapreduce/HCatBaseOutputFormat.java | 15 ++++++-
.../mapreduce/TestHCatOutputFormat.java | 43 ++++++++++++++++++--
2 files changed, 53 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0b81d5d8/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
index 3e2ed97..6c09e6f 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
@@ -43,8 +43,9 @@ public abstract class HCatBaseOutputFormat extends OutputFormat<WritableComparab
/**
* Gets the table schema for the table specified in the HCatOutputFormat.setOutput call
* on the specified job context.
+ * Note: This is the record-schema for the table. It does not include the table's partition columns.
* @param conf the Configuration object
- * @return the table schema
+ * @return the table schema, excluding partition columns
* @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context
*/
public static HCatSchema getTableSchema(Configuration conf) throws IOException {
@@ -53,6 +54,18 @@ public abstract class HCatBaseOutputFormat extends OutputFormat<WritableComparab
}
/**
+ * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call
+ * on the specified job context.
+ * Note: This is the complete table-schema, including the record-schema *and* the partitioning schema.
+ * @param conf the Configuration object
+ * @return the table schema, including the record-schema and partitioning schema.
+ * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context
+ */
+ public static HCatSchema getTableSchemaWithPartitionColumns(Configuration conf) throws IOException {
+ return getJobInfo(conf).getTableInfo().getAllColumns();
+ }
+
+ /**
* Check for validity of the output-specification for the job.
* @param context information about the job
* @throws IOException when output should not be attempted
http://git-wip-us.apache.org/repos/asf/hive/blob/0b81d5d8/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
index d96b385..a4c3b17 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -110,7 +111,6 @@ public class TestHCatOutputFormat extends TestCase {
sd.setCols(Lists.newArrayList(new FieldSchema("data_column", serdeConstants.STRING_TYPE_NAME, "")));
tbl.setSd(sd);
- //sd.setLocation("hdfs://tmp");
sd.setInputFormat(RCFileInputFormat.class.getName());
sd.setOutputFormat(RCFileOutputFormat.class.getName());
sd.setParameters(new HashMap<String, String>());
@@ -138,7 +138,7 @@ public class TestHCatOutputFormat extends TestCase {
public void testSetOutput() throws Exception {
Configuration conf = new Configuration();
- Job job = new Job(conf, "test outputformat");
+ Job job = Job.getInstance(conf, "test outputformat");
Map<String, String> partitionValues = new HashMap<String, String>();
partitionValues.put("colname", "p1");
@@ -157,7 +157,7 @@ public class TestHCatOutputFormat extends TestCase {
publishTest(job);
}
- public void publishTest(Job job) throws Exception {
+ private void publishTest(Job job) throws Exception {
HCatOutputFormat hcof = new HCatOutputFormat();
TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID());
@@ -172,6 +172,41 @@ public class TestHCatOutputFormat extends TestCase {
StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters());
assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue");
- assertTrue(part.getSd().getLocation().indexOf("p1") != -1);
+ assertTrue(part.getSd().getLocation().contains("p1"));
+ }
+
+ public void testGetTableSchema() throws Exception {
+
+ Configuration conf = new Configuration();
+ Job job = Job.getInstance(conf, "test getTableSchema");
+ HCatOutputFormat.setOutput(
+ job,
+ OutputJobInfo.create(
+ dbName,
+ tblName,
+ new HashMap<String, String>() {{put("colname", "col_value");}}
+ )
+ );
+
+ HCatSchema rowSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
+ assertEquals("Row-schema should have exactly one column.",
+ 1, rowSchema.getFields().size());
+ assertEquals("Row-schema must contain the data column.",
+ "data_column", rowSchema.getFields().get(0).getName());
+ assertEquals("Data column should have been STRING type.",
+ serdeConstants.STRING_TYPE_NAME, rowSchema.getFields().get(0).getTypeString());
+
+ HCatSchema tableSchema = HCatOutputFormat.getTableSchemaWithPartitionColumns(job.getConfiguration());
+ assertEquals("Table-schema should have exactly 2 columns.",
+ 2, tableSchema.getFields().size());
+ assertEquals("Table-schema must contain the data column.",
+ "data_column", tableSchema.getFields().get(0).getName());
+ assertEquals("Data column should have been STRING type.",
+ serdeConstants.STRING_TYPE_NAME, tableSchema.getFields().get(0).getTypeString());
+ assertEquals("Table-schema must contain the partition column.",
+ "colname", tableSchema.getFields().get(1).getName());
+ assertEquals("Partition column should have been STRING type.",
+ serdeConstants.STRING_TYPE_NAME, tableSchema.getFields().get(1).getTypeString());
+
}
}