You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mi...@apache.org on 2017/08/15 22:50:58 UTC

hive git commit: HIVE-17181: HCatOutputFormat should expose complete output-schema (including partition-keys) for dynamic-partitioning MR jobs (Mithun Radhakrishnan, reviewed by Thejas M Nair)

Repository: hive
Updated Branches:
  refs/heads/master 683543011 -> 0b81d5d81


HIVE-17181: HCatOutputFormat should expose complete output-schema (including partition-keys) for dynamic-partitioning MR jobs (Mithun Radhakrishnan, reviewed by Thejas M Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b81d5d8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b81d5d8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b81d5d8

Branch: refs/heads/master
Commit: 0b81d5d81add5a5a6764ef57809c7b7a914486e2
Parents: 6835430
Author: Mithun RK <mi...@apache.org>
Authored: Fri Aug 4 15:38:03 2017 -0700
Committer: Mithun RK <mi...@apache.org>
Committed: Tue Aug 15 15:50:03 2017 -0700

----------------------------------------------------------------------
 .../mapreduce/HCatBaseOutputFormat.java         | 15 ++++++-
 .../mapreduce/TestHCatOutputFormat.java         | 43 ++++++++++++++++++--
 2 files changed, 53 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0b81d5d8/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
index 3e2ed97..6c09e6f 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java
@@ -43,8 +43,9 @@ public abstract class HCatBaseOutputFormat extends OutputFormat<WritableComparab
   /**
    * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call
    * on the specified job context.
+   * Note: This is the record-schema for the table. It does not include the table's partition columns.
    * @param conf the Configuration object
-   * @return the table schema
+   * @return the table schema, excluding partition columns
    * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context
    */
   public static HCatSchema getTableSchema(Configuration conf) throws IOException {
@@ -53,6 +54,18 @@ public abstract class HCatBaseOutputFormat extends OutputFormat<WritableComparab
   }
 
   /**
+   * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call
+   * on the specified job context.
+   * Note: This is the complete table-schema, including the record-schema *and* the partitioning schema.
+   * @param conf the Configuration object
+   * @return the table schema, including the record-schema and partitioning schema.
+   * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context
+   */
+  public static HCatSchema getTableSchemaWithPartitionColumns(Configuration conf) throws IOException {
+    return getJobInfo(conf).getTableInfo().getAllColumns();
+  }
+
+  /**
    * Check for validity of the output-specification for the job.
    * @param context information about the job
    * @throws IOException when output should not be attempted

http://git-wip-us.apache.org/repos/asf/hive/blob/0b81d5d8/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
index d96b385..a4c3b17 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -110,7 +111,6 @@ public class TestHCatOutputFormat extends TestCase {
     sd.setCols(Lists.newArrayList(new FieldSchema("data_column", serdeConstants.STRING_TYPE_NAME, "")));
     tbl.setSd(sd);
 
-    //sd.setLocation("hdfs://tmp");
     sd.setInputFormat(RCFileInputFormat.class.getName());
     sd.setOutputFormat(RCFileOutputFormat.class.getName());
     sd.setParameters(new HashMap<String, String>());
@@ -138,7 +138,7 @@ public class TestHCatOutputFormat extends TestCase {
 
   public void testSetOutput() throws Exception {
     Configuration conf = new Configuration();
-    Job job = new Job(conf, "test outputformat");
+    Job job = Job.getInstance(conf, "test outputformat");
 
     Map<String, String> partitionValues = new HashMap<String, String>();
     partitionValues.put("colname", "p1");
@@ -157,7 +157,7 @@ public class TestHCatOutputFormat extends TestCase {
     publishTest(job);
   }
 
-  public void publishTest(Job job) throws Exception {
+  private void publishTest(Job job) throws Exception {
     HCatOutputFormat hcof = new HCatOutputFormat();
     TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
         job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID());
@@ -172,6 +172,41 @@ public class TestHCatOutputFormat extends TestCase {
 
     StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters());
     assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue");
-    assertTrue(part.getSd().getLocation().indexOf("p1") != -1);
+    assertTrue(part.getSd().getLocation().contains("p1"));
+  }
+
+  public void testGetTableSchema() throws Exception {
+
+    Configuration conf = new Configuration();
+    Job job = Job.getInstance(conf, "test getTableSchema");
+    HCatOutputFormat.setOutput(
+        job,
+        OutputJobInfo.create(
+            dbName,
+            tblName,
+            new HashMap<String, String>() {{put("colname", "col_value");}}
+        )
+    );
+
+    HCatSchema rowSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
+    assertEquals("Row-schema should have exactly one column.",
+        1, rowSchema.getFields().size());
+    assertEquals("Row-schema must contain the data column.",
+        "data_column", rowSchema.getFields().get(0).getName());
+    assertEquals("Data column should have been STRING type.",
+        serdeConstants.STRING_TYPE_NAME, rowSchema.getFields().get(0).getTypeString());
+
+    HCatSchema tableSchema = HCatOutputFormat.getTableSchemaWithPartitionColumns(job.getConfiguration());
+    assertEquals("Table-schema should have exactly 2 columns.",
+        2, tableSchema.getFields().size());
+    assertEquals("Table-schema must contain the data column.",
+        "data_column", tableSchema.getFields().get(0).getName());
+    assertEquals("Data column should have been STRING type.",
+        serdeConstants.STRING_TYPE_NAME, tableSchema.getFields().get(0).getTypeString());
+    assertEquals("Table-schema must contain the partition column.",
+        "colname", tableSchema.getFields().get(1).getName());
+    assertEquals("Partition column should have been STRING type.",
+        serdeConstants.STRING_TYPE_NAME, tableSchema.getFields().get(1).getTypeString());
+
   }
 }