You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2013/04/09 23:47:31 UTC

svn commit: r1466255 - in /hive/trunk/hcatalog: core/src/main/java/org/apache/hcatalog/mapreduce/ core/src/test/java/org/apache/hcatalog/fileformats/ core/src/test/java/org/apache/hcatalog/mapreduce/ hcatalog-pig-adapter/src/test/java/org/apache/hcatal...

Author: khorgath
Date: Tue Apr  9 21:47:31 2013
New Revision: 1466255

URL: http://svn.apache.org/r1466255
Log:
HCATALOG-632 Fixing ORC File usage with HCatalog

Added:
    hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/
    hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java
    hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java
    hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java
Modified:
    hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java
    hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java
    hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java
    hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
    hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
    hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java

Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java Tue Apr  9 21:47:31 2013
@@ -19,9 +19,9 @@
 
 package org.apache.hcatalog.mapreduce;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
@@ -95,19 +95,24 @@ class FileOutputFormatContainer extends 
         context.getConfiguration().set("mapred.output.value.class",
             sd.getSerializedClass().getName());
 
-        // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
-        // (That's because records can't be written until the values of the dynamic partitions are deduced.
-        // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
-        RecordWriter<WritableComparable<?>, HCatRecord> rw =
-            new FileRecordWriterContainer(
-                HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ?
-                    null :
-                    getBaseOutputFormat()
-                        .getRecordWriter(null,
-                            new JobConf(context.getConfiguration()),
-                            FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part"),
-                            InternalUtil.createReporter(context)),
-                context);
+        RecordWriter<WritableComparable<?>, HCatRecord> rw;
+        if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){
+            // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
+            // (That's because records can't be written until the values of the dynamic partitions are deduced.
+            // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
+            rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context);
+        } else {
+            Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
+            Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part"));
+
+            rw = new FileRecordWriterContainer(
+                      getBaseOutputFormat().getRecordWriter(
+                              parentDir.getFileSystem(context.getConfiguration()),
+                              new JobConf(context.getConfiguration()),
+                              childPath.toString(),
+                              InternalUtil.createReporter(context)),
+                      context);
+        }
         return rw;
     }
 

Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java Tue Apr  9 21:47:31 2013
@@ -218,10 +218,14 @@ class FileRecordWriterContainer extends 
                 //setupTask()
                 baseOutputCommitter.setupTask(currTaskContext);
 
+                Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
+                Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", ""));
+                
                 org.apache.hadoop.mapred.RecordWriter baseRecordWriter =
-                    baseOF.getRecordWriter(null,
+                    baseOF.getRecordWriter(
+                        parentDir.getFileSystem(currTaskContext.getConfiguration()),
                         currTaskContext.getJobConf(),
-                        FileOutputFormat.getUniqueFile(currTaskContext, "part", ""),
+                        childPath.toString(),
                         InternalUtil.createReporter(currTaskContext));
 
                 baseDynamicWriters.put(dynKey, baseRecordWriter);

Added: hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java?rev=1466255&view=auto
==============================================================================
--- hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java (added)
+++ hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java Tue Apr  9 21:47:31 2013
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hcatalog.fileformats;
+
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hcatalog.mapreduce.TestHCatDynamicPartitioned;
+import org.junit.BeforeClass;
+
+public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned {
+
+    @BeforeClass
+    public static void generateInputData() throws Exception {
+        tableName = "testOrcDynamicPartitionedTable";
+        generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
+        generateDataColumns();
+    }
+
+    @Override
+    protected String inputFormat() { 
+        return OrcInputFormat.class.getName();
+    }
+  
+    @Override
+    protected String outputFormat() { 
+        return OrcOutputFormat.class.getName(); 
+    }
+  
+    @Override
+    protected String serdeClass() { 
+        return OrcSerde.class.getName(); 
+    }
+
+}

Modified: hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java (original)
+++ hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java Tue Apr  9 21:47:31 2013
@@ -40,8 +40,8 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
 import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
-import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -75,10 +75,6 @@ public abstract class HCatMapReduceTest 
     protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
     protected static String tableName = "testHCatMapReduceTable";
 
-    protected String inputFormat = RCFileInputFormat.class.getName();
-    protected String outputFormat = RCFileOutputFormat.class.getName();
-    protected String serdeClass = ColumnarSerDe.class.getName();
-
     private static List<HCatRecord> writeRecords = new ArrayList<HCatRecord>();
     private static List<HCatRecord> readRecords = new ArrayList<HCatRecord>();
 
@@ -88,6 +84,18 @@ public abstract class HCatMapReduceTest 
 
     private static FileSystem fs;
 
+    protected String inputFormat() { 
+        return RCFileInputFormat.class.getName();
+    }
+    
+    protected String outputFormat() { 
+        return RCFileOutputFormat.class.getName(); 
+    }
+    
+    protected String serdeClass() { 
+        return ColumnarSerDe.class.getName(); 
+    }
+    
     @BeforeClass
     public static void setUpOneTime() throws Exception {
         fs = new LocalFileSystem();
@@ -142,9 +150,9 @@ public abstract class HCatMapReduceTest 
         sd.getSerdeInfo().setName(tbl.getTableName());
         sd.getSerdeInfo().setParameters(new HashMap<String, String>());
         sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
-        sd.getSerdeInfo().setSerializationLib(serdeClass);
-        sd.setInputFormat(inputFormat);
-        sd.setOutputFormat(outputFormat);
+        sd.getSerdeInfo().setSerializationLib(serdeClass());
+        sd.setInputFormat(inputFormat());
+        sd.setOutputFormat(outputFormat());
 
         Map<String, String> tableParams = new HashMap<String, String>();
         tbl.setParameters(tableParams);

Modified: hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java (original)
+++ hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java Tue Apr  9 21:47:31 2013
@@ -50,8 +50,8 @@ public class TestHCatDynamicPartitioned 
     private static List<HCatRecord> writeRecords;
     private static List<HCatFieldSchema> dataColumns;
     private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class);
-    private static final int NUM_RECORDS = 20;
-    private static final int NUM_PARTITIONS = 5;
+    protected static final int NUM_RECORDS = 20;
+    protected static final int NUM_PARTITIONS = 5;
 
     @BeforeClass
     public static void generateInputData() throws Exception {
@@ -60,14 +60,14 @@ public class TestHCatDynamicPartitioned 
         generateDataColumns();
     }
 
-    private static void generateDataColumns() throws HCatException {
+    protected static void generateDataColumns() throws HCatException {
         dataColumns = new ArrayList<HCatFieldSchema>();
         dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
         dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
         dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")));
     }
 
-    private static void generateWriteRecords(int max, int mod, int offset) {
+    protected static void generateWriteRecords(int max, int mod, int offset) {
         writeRecords = new ArrayList<HCatRecord>();
 
         for (int i = 0; i < max; i++) {

Modified: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java (original)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java Tue Apr  9 21:47:31 2013
@@ -65,8 +65,14 @@ public class TestHCatLoader extends Test
     private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass
     private static boolean setupHasRun = false;
 
+    
     private static Map<Integer, Pair<Integer, String>> basicInputData;
 
+    protected String storageFormat() {
+        return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
+            "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')";
+    }
+
     private void dropTable(String tablename) throws IOException, CommandNeedRetryException {
         driver.run("drop table " + tablename);
     }
@@ -77,8 +83,7 @@ public class TestHCatLoader extends Test
         if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) {
             createTable = createTable + "partitioned by (" + partitionedBy + ") ";
         }
-        createTable = createTable + "stored as RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
-            "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') ";
+        createTable = createTable + "stored as " +storageFormat();
         int retCode = driver.run(createTable).getResponseCode();
         if (retCode != 0) {
             throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]");

Modified: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java (original)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java Tue Apr  9 21:47:31 2013
@@ -46,9 +46,14 @@ public class TestHCatStorerMulti extends
     private static final String BASIC_TABLE = "junit_unparted_basic";
     private static final String PARTITIONED_TABLE = "junit_parted_basic";
     private static Driver driver;
-
+    
     private static Map<Integer, Pair<Integer, String>> basicInputData;
 
+    protected String storageFormat() {
+        return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
+            "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')";
+    }
+
     private void dropTable(String tablename) throws IOException, CommandNeedRetryException {
         driver.run("drop table " + tablename);
     }
@@ -59,8 +64,7 @@ public class TestHCatStorerMulti extends
         if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) {
             createTable = createTable + "partitioned by (" + partitionedBy + ") ";
         }
-        createTable = createTable + "stored as RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
-            "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') ";
+        createTable = createTable + "stored as " + storageFormat();
         int retCode = driver.run(createTable).getResponseCode();
         if (retCode != 0) {
             throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]");

Added: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java?rev=1466255&view=auto
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java (added)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java Tue Apr  9 21:47:31 2013
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hcatalog.pig;
+
+public class TestOrcHCatLoader extends TestHCatLoader {
+
+    @Override
+    protected String storageFormat() {
+        return "orc";
+    }
+
+}
+

Added: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java?rev=1466255&view=auto
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java (added)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java Tue Apr  9 21:47:31 2013
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hcatalog.pig;
+
+public class TestOrcHCatStorer extends TestHCatStorerMulti {
+
+    @Override
+    protected String storageFormat() {
+        return "orc";
+    }
+}
+