You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2013/04/09 23:47:31 UTC
svn commit: r1466255 - in /hive/trunk/hcatalog:
core/src/main/java/org/apache/hcatalog/mapreduce/
core/src/test/java/org/apache/hcatalog/fileformats/
core/src/test/java/org/apache/hcatalog/mapreduce/
hcatalog-pig-adapter/src/test/java/org/apache/hcatal...
Author: khorgath
Date: Tue Apr 9 21:47:31 2013
New Revision: 1466255
URL: http://svn.apache.org/r1466255
Log:
HCATALOG-632 Fixing ORC File usage with HCatalog
Added:
hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/
hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java
hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java
hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java
Modified:
hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java
hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java
hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java
hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java
Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java Tue Apr 9 21:47:31 2013
@@ -19,9 +19,9 @@
package org.apache.hcatalog.mapreduce;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
@@ -95,19 +95,24 @@ class FileOutputFormatContainer extends
context.getConfiguration().set("mapred.output.value.class",
sd.getSerializedClass().getName());
- // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
- // (That's because records can't be written until the values of the dynamic partitions are deduced.
- // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
- RecordWriter<WritableComparable<?>, HCatRecord> rw =
- new FileRecordWriterContainer(
- HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ?
- null :
- getBaseOutputFormat()
- .getRecordWriter(null,
- new JobConf(context.getConfiguration()),
- FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part"),
- InternalUtil.createReporter(context)),
- context);
+ RecordWriter<WritableComparable<?>, HCatRecord> rw;
+ if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){
+ // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
+ // (That's because records can't be written until the values of the dynamic partitions are deduced.
+ // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
+ rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context);
+ } else {
+ Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
+ Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part"));
+
+ rw = new FileRecordWriterContainer(
+ getBaseOutputFormat().getRecordWriter(
+ parentDir.getFileSystem(context.getConfiguration()),
+ new JobConf(context.getConfiguration()),
+ childPath.toString(),
+ InternalUtil.createReporter(context)),
+ context);
+ }
return rw;
}
Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java Tue Apr 9 21:47:31 2013
@@ -218,10 +218,14 @@ class FileRecordWriterContainer extends
//setupTask()
baseOutputCommitter.setupTask(currTaskContext);
+ Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
+ Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", ""));
+
org.apache.hadoop.mapred.RecordWriter baseRecordWriter =
- baseOF.getRecordWriter(null,
+ baseOF.getRecordWriter(
+ parentDir.getFileSystem(currTaskContext.getConfiguration()),
currTaskContext.getJobConf(),
- FileOutputFormat.getUniqueFile(currTaskContext, "part", ""),
+ childPath.toString(),
InternalUtil.createReporter(currTaskContext));
baseDynamicWriters.put(dynKey, baseRecordWriter);
Added: hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java?rev=1466255&view=auto
==============================================================================
--- hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java (added)
+++ hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java Tue Apr 9 21:47:31 2013
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hcatalog.fileformats;
+
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hcatalog.mapreduce.TestHCatDynamicPartitioned;
+import org.junit.BeforeClass;
+
+public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned {
+
+ @BeforeClass
+ public static void generateInputData() throws Exception {
+ tableName = "testOrcDynamicPartitionedTable";
+ generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0);
+ generateDataColumns();
+ }
+
+ @Override
+ protected String inputFormat() {
+ return OrcInputFormat.class.getName();
+ }
+
+ @Override
+ protected String outputFormat() {
+ return OrcOutputFormat.class.getName();
+ }
+
+ @Override
+ protected String serdeClass() {
+ return OrcSerde.class.getName();
+ }
+
+}
Modified: hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java (original)
+++ hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java Tue Apr 9 21:47:31 2013
@@ -40,8 +40,8 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
-import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -75,10 +75,6 @@ public abstract class HCatMapReduceTest
protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
protected static String tableName = "testHCatMapReduceTable";
- protected String inputFormat = RCFileInputFormat.class.getName();
- protected String outputFormat = RCFileOutputFormat.class.getName();
- protected String serdeClass = ColumnarSerDe.class.getName();
-
private static List<HCatRecord> writeRecords = new ArrayList<HCatRecord>();
private static List<HCatRecord> readRecords = new ArrayList<HCatRecord>();
@@ -88,6 +84,18 @@ public abstract class HCatMapReduceTest
private static FileSystem fs;
+ protected String inputFormat() {
+ return RCFileInputFormat.class.getName();
+ }
+
+ protected String outputFormat() {
+ return RCFileOutputFormat.class.getName();
+ }
+
+ protected String serdeClass() {
+ return ColumnarSerDe.class.getName();
+ }
+
@BeforeClass
public static void setUpOneTime() throws Exception {
fs = new LocalFileSystem();
@@ -142,9 +150,9 @@ public abstract class HCatMapReduceTest
sd.getSerdeInfo().setName(tbl.getTableName());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
- sd.getSerdeInfo().setSerializationLib(serdeClass);
- sd.setInputFormat(inputFormat);
- sd.setOutputFormat(outputFormat);
+ sd.getSerdeInfo().setSerializationLib(serdeClass());
+ sd.setInputFormat(inputFormat());
+ sd.setOutputFormat(outputFormat());
Map<String, String> tableParams = new HashMap<String, String>();
tbl.setParameters(tableParams);
Modified: hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java (original)
+++ hive/trunk/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java Tue Apr 9 21:47:31 2013
@@ -50,8 +50,8 @@ public class TestHCatDynamicPartitioned
private static List<HCatRecord> writeRecords;
private static List<HCatFieldSchema> dataColumns;
private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class);
- private static final int NUM_RECORDS = 20;
- private static final int NUM_PARTITIONS = 5;
+ protected static final int NUM_RECORDS = 20;
+ protected static final int NUM_PARTITIONS = 5;
@BeforeClass
public static void generateInputData() throws Exception {
@@ -60,14 +60,14 @@ public class TestHCatDynamicPartitioned
generateDataColumns();
}
- private static void generateDataColumns() throws HCatException {
+ protected static void generateDataColumns() throws HCatException {
dataColumns = new ArrayList<HCatFieldSchema>();
dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")));
}
- private static void generateWriteRecords(int max, int mod, int offset) {
+ protected static void generateWriteRecords(int max, int mod, int offset) {
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < max; i++) {
Modified: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java (original)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java Tue Apr 9 21:47:31 2013
@@ -65,8 +65,14 @@ public class TestHCatLoader extends Test
private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass
private static boolean setupHasRun = false;
+
private static Map<Integer, Pair<Integer, String>> basicInputData;
+ protected String storageFormat() {
+ return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
+ "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')";
+ }
+
private void dropTable(String tablename) throws IOException, CommandNeedRetryException {
driver.run("drop table " + tablename);
}
@@ -77,8 +83,7 @@ public class TestHCatLoader extends Test
if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) {
createTable = createTable + "partitioned by (" + partitionedBy + ") ";
}
- createTable = createTable + "stored as RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
- "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') ";
+ createTable = createTable + "stored as " +storageFormat();
int retCode = driver.run(createTable).getResponseCode();
if (retCode != 0) {
throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]");
Modified: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java?rev=1466255&r1=1466254&r2=1466255&view=diff
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java (original)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java Tue Apr 9 21:47:31 2013
@@ -46,9 +46,14 @@ public class TestHCatStorerMulti extends
private static final String BASIC_TABLE = "junit_unparted_basic";
private static final String PARTITIONED_TABLE = "junit_parted_basic";
private static Driver driver;
-
+
private static Map<Integer, Pair<Integer, String>> basicInputData;
+ protected String storageFormat() {
+ return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
+ "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')";
+ }
+
private void dropTable(String tablename) throws IOException, CommandNeedRetryException {
driver.run("drop table " + tablename);
}
@@ -59,8 +64,7 @@ public class TestHCatStorerMulti extends
if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) {
createTable = createTable + "partitioned by (" + partitionedBy + ") ";
}
- createTable = createTable + "stored as RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," +
- "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') ";
+ createTable = createTable + "stored as " + storageFormat();
int retCode = driver.run(createTable).getResponseCode();
if (retCode != 0) {
throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]");
Added: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java?rev=1466255&view=auto
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java (added)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java Tue Apr 9 21:47:31 2013
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hcatalog.pig;
+
+public class TestOrcHCatLoader extends TestHCatLoader {
+
+ @Override
+ protected String storageFormat() {
+ return "orc";
+ }
+
+}
+
Added: hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java?rev=1466255&view=auto
==============================================================================
--- hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java (added)
+++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java Tue Apr 9 21:47:31 2013
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hcatalog.pig;
+
+public class TestOrcHCatStorer extends TestHCatStorerMulti {
+
+ @Override
+ protected String storageFormat() {
+ return "orc";
+ }
+}
+