You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ga...@apache.org on 2012/03/29 23:17:52 UTC
svn commit: r1307159 - in /incubator/hcatalog/trunk: ./
src/java/org/apache/hcatalog/cli/SemanticAnalysis/
src/java/org/apache/hcatalog/common/ src/java/org/apache/hcatalog/mapreduce/
src/java/org/apache/hcatalog/pig/drivers/ src/java/org/apache/hcatal...
Author: gates
Date: Thu Mar 29 23:17:50 2012
New Revision: 1307159
URL: http://svn.apache.org/viewvc?rev=1307159&view=rev
Log:
HCATALOG-268 Remove remnants of storage drivers.
Added:
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/LoadFuncBasedInputDriver.java.broken
- copied unchanged from r1306643, incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/LoadFuncBasedInputDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/PigStorageInputDriver.java.broken
- copied unchanged from r1306643, incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/PigStorageInputDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/StoreFuncBasedOutputDriver.java.broken
- copied unchanged from r1306643, incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/StoreFuncBasedOutputDriver.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken
- copied unchanged from r1306643, incubator/hcatalog/trunk/src/test/org/apache/hcatalog/rcfile/TestRCFileInputStorageDriver.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken
- copied unchanged from r1306643, incubator/hcatalog/trunk/src/test/org/apache/hcatalog/rcfile/TestRCFileOutputStorageDriver.java
Removed:
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/LoadFuncBasedInputDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/PigStorageInputDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/drivers/StoreFuncBasedOutputDriver.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/rcfile/TestRCFileInputStorageDriver.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/rcfile/TestRCFileOutputStorageDriver.java
Modified:
incubator/hcatalog/trunk/CHANGES.txt
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/ErrorType.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InitializeInput.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InternalUtil.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/StorerInfo.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java
incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/pig/MyPigStorage.java
incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java
incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java
incubator/hcatalog/trunk/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java
Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Thu Mar 29 23:17:50 2012
@@ -77,6 +77,8 @@ Release 0.4.0 - Unreleased
HCAT-2 Support nested schema conversion between Hive an Pig (julienledem via hashutosh)
IMPROVEMENTS
+ HCAT-268 Remove remnants of storage drivers. (rohini via gates)
+
HCAT-306 Need more end-to-end tests (gates)
HCAT-130 Documentation improvements (gates and lefty via gates)
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java Thu Mar 29 23:17:50 2012
@@ -24,36 +24,26 @@ import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.exec.DDLTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
-import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
-import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
-import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.hive.ql.security.authorization.Privilege;
import org.apache.hcatalog.common.HCatConstants;
-import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.mapreduce.HCatStorageHandler;
-import org.apache.hcatalog.rcfile.RCFileInputDriver;
-import org.apache.hcatalog.rcfile.RCFileOutputDriver;
final class CreateTableHook extends HCatSemanticAnalyzerBase {
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/ErrorType.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/ErrorType.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/ErrorType.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/ErrorType.java Thu Mar 29 23:17:50 2012
@@ -34,7 +34,7 @@ public enum ErrorType {
ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"),
ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"),
ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"),
- ERROR_INIT_STORAGE_DRIVER (2005, "Error initializing output storage driver instance"),
+ ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"),
ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"),
ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"),
ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"),
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java Thu Mar 29 23:17:50 2012
@@ -25,8 +25,6 @@ public final class HCatConstants {
public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat";
- public static final String HCAT_RCFILE_ISD_CLASS = "org.apache.hcatalog.rcfile.RCFileInputDriver";
- public static final String HCAT_RCFILE_OSD_CLASS = "org.apache.hcatalog.rcfile.RCFileOutputDriver";
public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName();
public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName();
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java Thu Mar 29 23:17:50 2012
@@ -55,7 +55,7 @@ class DefaultOutputFormatContainer exten
}
/**
- * Get the record writer for the job. Uses the Table's default OutputStorageDriver
+ * Get the record writer for the job. Uses the storagehandler's OutputFormat
* to get the record writer.
* @param context the information about the current task.
* @return a RecordWriter to write the output for the job.
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileOutputStorageDriver.java Thu Mar 29 23:17:50 2012
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hcatalog.mapreduce;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.apache.hadoop.security.AccessControlException;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * Base class for File-based OutputStorageDrivers to extend. Provides subclasses
- * the convenience of not having to rewrite mechanisms such as, dynamic
- * partitioning, partition registration, success file, etc.
- */
-public abstract class FileOutputStorageDriver extends HCatOutputStorageDriver {
-
- /** The directory under which data is initially written for a partitioned table */
- protected static final String DYNTEMP_DIR_NAME = "_DYN";
-
- /** The directory under which data is initially written for a non partitioned table */
- protected static final String TEMP_DIR_NAME = "_TEMP";
- private OutputFormat<WritableComparable<?>, ? super Writable> outputFormat;
-
-
- @Override
- public void initialize(JobContext jobContext, Properties hcatProperties) throws IOException {
- super.initialize(jobContext, hcatProperties);
- }
-
- @Override
- public final String getOutputLocation(JobContext jobContext,
- String tableLocation, List<String> partitionCols, Map<String, String> partitionValues, String dynHash) throws IOException {
- String parentPath = tableLocation;
- // For dynamic partitioned writes without all keyvalues specified,
- // we create a temp dir for the associated write job
- if (dynHash != null){
- parentPath = new Path(tableLocation, DYNTEMP_DIR_NAME+dynHash).toString();
- }
-
- // For non-partitioned tables, we send them to the temp dir
- if((dynHash == null) && ( partitionValues == null || partitionValues.size() == 0 )) {
- return new Path(tableLocation, TEMP_DIR_NAME).toString();
- }
-
- List<String> values = new ArrayList<String>();
- for(String partitionCol : partitionCols) {
- values.add(partitionValues.get(partitionCol));
- }
-
- String partitionLocation = FileUtils.makePartName(partitionCols, values);
-
- Path path = new Path(parentPath, partitionLocation);
- return path.toString();
- }
-
- @Override
- public final Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException{
- return new Path(new FileOutputCommitter(new Path(outputLoc), context).getWorkPath(), org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getUniqueFile(context, "part", ""));
- }
-
- /**
- * Any initialization of file paths, set permissions and group on freshly created files
- * This is called at RecordWriter instantiation time which can be at write-time for
- * a dynamic partitioning usecase
- * @param context
- * @throws IOException
- */
- static void prepareOutputLocation(HCatOutputStorageDriver osd, TaskAttemptContext context) throws IOException {
- OutputJobInfo info = HCatBaseOutputFormat.getJobInfo(context);
-// Path workFile = osd.getWorkFilePath(context,info.getLocation());
- Path workFile = osd.getWorkFilePath(context,context.getConfiguration().get("mapred.output.dir"));
- Path tblPath = new Path(info.getTableInfo().getTable().getSd().getLocation());
- FileSystem fs = tblPath.getFileSystem(context.getConfiguration());
- FileStatus tblPathStat = fs.getFileStatus(tblPath);
-
-// LOG.info("Attempting to set permission ["+tblPathStat.getPermission()+"] on ["+
-// workFile+"], location=["+info.getLocation()+"] , mapred.locn =["+
-// context.getConfiguration().get("mapred.output.dir")+"]");
-//
-// FileStatus wFileStatus = fs.getFileStatus(workFile);
-// LOG.info("Table : "+tblPathStat.getPath());
-// LOG.info("Working File : "+wFileStatus.getPath());
-
- fs.setPermission(workFile, tblPathStat.getPermission());
- try{
- fs.setOwner(workFile, null, tblPathStat.getGroup());
- } catch(AccessControlException ace){
- // log the messages before ignoring. Currently, logging is not built in HCat.
- }
- }
-
- @Override
- OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) {
- //broken
- return new FileOutputFormatContainer(null);
- }
-}
-
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java Thu Mar 29 23:17:50 2012
@@ -123,7 +123,7 @@ class FileRecordWriterContainer extends
}
/**
- * @return the storageDriver
+ * @return the storagehandler
*/
public HCatStorageHandler getStorageHandler() {
return storageHandler;
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java Thu Mar 29 23:17:50 2012
@@ -23,17 +23,8 @@ import java.util.ArrayList;
import java.util.LinkedList;
import java.util.Map;
import java.util.HashMap;
-import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Properties;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.conf.Configuration;
@@ -41,9 +32,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
@@ -54,9 +43,7 @@ import org.apache.hadoop.mapreduce.TaskA
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
-import org.apache.hcatalog.common.ErrorType;
import org.apache.hcatalog.common.HCatConstants;
-import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.data.HCatRecord;
import org.apache.hcatalog.data.schema.HCatFieldSchema;
@@ -109,7 +96,7 @@ public abstract class HCatBaseInputForma
* underlying InputFormat's splits
* @param jobContext the job context object
* @return the splits, an HCatInputSplit wrapper over the storage
- * driver InputSplits
+ * handler InputSplits
* @throws IOException or InterruptedException
*/
@Override
@@ -183,7 +170,7 @@ public abstract class HCatBaseInputForma
* @param split the split
* @param taskContext the task attempt context
* @return the record reader instance, either an HCatRecordReader(later) or
- * the underlying storage driver's RecordReader
+ * the underlying storage handler's RecordReader
* @throws IOException or InterruptedException
*/
@Override
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java Thu Mar 29 23:17:50 2012
@@ -23,9 +23,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.io.WritableComparable;
@@ -98,9 +95,8 @@ public abstract class HCatBaseOutputForm
}
/**
- * Gets the output storage driver instance.
+ * Configure the output storage handler
* @param jobContext the job context
- * @return the output driver instance
* @throws IOException
*/
@SuppressWarnings("unchecked")
@@ -110,9 +106,9 @@ public abstract class HCatBaseOutputForm
}
/**
- * Gets the output storage driver instance, with allowing specification of missing dynamic partvals
+ * Configure the output storage handler with allowing specification of missing dynamic partvals
* @param jobContext the job context
- * @return the output driver instance
+ * @param dynamicPartVals
* @throws IOException
*/
@SuppressWarnings("unchecked")
@@ -130,7 +126,7 @@ public abstract class HCatBaseOutputForm
List<String> dynamicPartKeys = jobInfo.getDynamicPartitioningKeys();
if (dynamicPartVals.size() != dynamicPartKeys.size()){
throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES,
- "Unable to instantiate dynamic partitioning storage driver, mismatch between"
+ "Unable to configure dynamic partitioning for storage handler, mismatch between"
+ " number of partition values obtained["+dynamicPartVals.size()
+ "] and number of partition values required["+dynamicPartKeys.size()+"]");
}
@@ -153,16 +149,17 @@ public abstract class HCatBaseOutputForm
if (e instanceof HCatException){
throw (HCatException)e;
}else{
- throw new HCatException(ErrorType.ERROR_INIT_STORAGE_DRIVER, e);
+ throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e);
}
}
}
/**
- * Gets the output storage driver instance, with allowing specification
+ * Configure the output storage handler, with allowing specification
* of partvals from which it picks the dynamic partvals
* @param context the job context
* @param jobInfo the output job info
+ * @param fullPartSpec
* @throws IOException
*/
@@ -191,7 +188,7 @@ public abstract class HCatBaseOutputForm
// So, find out positions of partition columns in schema provided by user.
// We also need to update the output Schema with these deletions.
- // Note that, output storage drivers never sees partition columns in data
+ // Note that, output storage handlers never sees partition columns in data
// or schema.
HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields());
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java Thu Mar 29 23:17:50 2012
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hcatalog.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hcatalog.data.HCatRecord;
-import org.apache.hcatalog.data.schema.HCatSchema;
-
-/** The abstract class to be implemented by underlying storage drivers to enable data access from HCat through
- * HCatInputFormat.
- */
-public abstract class HCatInputStorageDriver {
-
- public void initialize(JobContext context, Properties storageDriverArgs) throws IOException {
- // trivial do nothing
- }
-
- /**
- * Returns the InputFormat to use with this Storage Driver.
- * @param hcatProperties the properties containing parameters required for initialization of InputFormat
- * @return the InputFormat instance
- */
- public abstract InputFormat<? extends WritableComparable, ? extends Writable> getInputFormat(Properties hcatProperties);
-
-
- /**
- * Converts to HCatRecord format usable by HCatInputFormat to convert to required valuetype.
- * Implementers of StorageDriver should look to overwriting this function so as to convert their
- * value type to HCatRecord. Default implementation is provided for StorageDriver implementations
- * on top of an underlying InputFormat that already uses HCatRecord as a tuple
- * @param baseValue the underlying value to convert to HCatRecord
- */
- public abstract HCatRecord convertToHCatRecord(WritableComparable baseKey, Writable baseValue) throws IOException;
-
- /**
- * Set the data location for the input.
- * @param jobContext the job context object
- * @param location the data location
- * @throws IOException Signals that an I/O exception has occurred.
- *
- * Default implementation for FileInputFormat based Input Formats. Override
- * this for other input formats.
- */
- public void setInputPath(JobContext jobContext, String location) throws IOException{
-
- // ideally we should just call FileInputFormat.setInputPaths() here - but
- // that won't work since FileInputFormat.setInputPaths() needs
- // a Job object instead of a JobContext which we are handed here
-
- int length = location.length();
- int curlyOpen = 0;
- int pathStart = 0;
- boolean globPattern = false;
- List<String> pathStrings = new ArrayList<String>();
-
- for (int i=0; i<length; i++) {
- char ch = location.charAt(i);
- switch(ch) {
- case '{' : {
- curlyOpen++;
- if (!globPattern) {
- globPattern = true;
- }
- break;
- }
- case '}' : {
- curlyOpen--;
- if (curlyOpen == 0 && globPattern) {
- globPattern = false;
- }
- break;
- }
- case ',' : {
- if (!globPattern) {
- pathStrings.add(location.substring(pathStart, i));
- pathStart = i + 1 ;
- }
- break;
- }
- }
- }
- pathStrings.add(location.substring(pathStart, length));
-
- Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0]));
-
- Configuration conf = jobContext.getConfiguration();
-
- FileSystem fs = FileSystem.get(conf);
- Path path = paths[0].makeQualified(fs);
- StringBuilder str = new StringBuilder(StringUtils.escapeString(path.toString()));
- for(int i = 1; i < paths.length;i++) {
- str.append(StringUtils.COMMA_STR);
- path = paths[i].makeQualified(fs);
- str.append(StringUtils.escapeString(path.toString()));
- }
-
- conf.set("mapred.input.dir", str.toString());
- }
-
- /**
- * Set the schema of the data as originally published in HCat. The storage driver might validate that this matches with
- * the schema it has (like Zebra) or it will use this to create a HCatRecord matching the output schema.
- * @param jobContext the job context object
- * @param hcatSchema the schema published in HCat for this data
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract void setOriginalSchema(JobContext jobContext, HCatSchema hcatSchema) throws IOException;
-
- /**
- * Set the consolidated schema for the HCatRecord data returned by the storage driver. All tuples returned by the RecordReader should
- * have this schema. Nulls should be inserted for columns not present in the data.
- * @param jobContext the job context object
- * @param hcatSchema the schema to use as the consolidated schema
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract void setOutputSchema(JobContext jobContext, HCatSchema hcatSchema) throws IOException;
-
- /**
- * Sets the partition key values for the current partition. The storage driver is passed this so that the storage
- * driver can add the partition key values to the output HCatRecord if the partition key values are not present on disk.
- * @param jobContext the job context object
- * @param partitionValues the partition values having a map with partition key name as key and the HCatKeyValue as value
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract void setPartitionValues(JobContext jobContext, Map<String,String> partitionValues) throws IOException;
-
-}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java Thu Mar 29 23:17:50 2012
@@ -62,7 +62,7 @@ public class HCatOutputFormat extends HC
/**
* Set the info about the output to write for the Job. This queries the metadata server
- * to find the StorageDriver to use for the table. Throws error if partition is already published.
+ * to find the StorageHandler to use for the table. Throws error if partition is already published.
* @param job the job object
* @param outputJobInfo the table output info
* @throws IOException the exception in communicating with the metadata server
@@ -221,7 +221,7 @@ public class HCatOutputFormat extends HC
}
/**
- * Get the record writer for the job. Uses the Table's default OutputStorageDriver
+ * Get the record writer for the job. Uses the StorageHandler's default OutputFormat
* to get the record writer.
* @param context the information about the current task.
* @return a RecordWriter to write the output for the job.
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatOutputStorageDriver.java Thu Mar 29 23:17:50 2012
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package org.apache.hcatalog.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.JobStatus.State;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hcatalog.data.HCatRecord;
-import org.apache.hcatalog.data.schema.HCatSchema;
-
-
-/** The abstract class to be implemented by underlying storage drivers to enable data access from HCat through
- * HCatOutputFormat.
- */
-public abstract class HCatOutputStorageDriver {
-
-
- /**
- * Initialize the storage driver with specified properties, default implementation does nothing.
- * @param context the job context object
- * @param hcatProperties the properties for the storage driver
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public void initialize(JobContext context, Properties hcatProperties) throws IOException {
- }
-
- /**
- * Returns the OutputFormat to use with this Storage Driver.
- * @return the OutputFormat instance
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract OutputFormat<? extends WritableComparable<?>, ? extends Writable> getOutputFormat() throws IOException;
-
- /**
- * Set the data location for the output.
- * @param jobContext the job context object
- * @param location the data location
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract void setOutputPath(JobContext jobContext, String location) throws IOException;
-
- /**
- * Set the schema for the data being written out.
- * @param jobContext the job context object
- * @param schema the data schema
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract void setSchema(JobContext jobContext, HCatSchema schema) throws IOException;
-
- /**
- * Sets the partition key values for the partition being written.
- * @param jobContext the job context object
- * @param partitionValues the partition values
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract void setPartitionValues(JobContext jobContext, Map<String, String> partitionValues) throws IOException;
-
- /**
- * Generate the key for the underlying outputformat. The value given to HCatOutputFormat is passed as the
- * argument. The key given to HCatOutputFormat is ignored..
- * @param value the value given to HCatOutputFormat
- * @return a key instance
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract WritableComparable<?> generateKey(HCatRecord value) throws IOException;
-
- /**
- * Convert the given HCatRecord value to the actual value type.
- * @param value the HCatRecord value to convert
- * @return a value instance
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public abstract Writable convertValue(HCatRecord value) throws IOException;
-
- /**
- * Gets the location to use for the specified partition values.
- * The storage driver can override as required.
- * @param jobContext the job context object
- * @param tableLocation the location of the table
- * @param partitionValues the partition values
- * @param dynHash A unique hash value that represents the dynamic partitioning job used
- * @return the location String.
- * @throws IOException Signals that an I/O exception has occurred.
- */
- public String getOutputLocation(JobContext jobContext,
- String tableLocation, List<String> partitionCols, Map<String, String> partitionValues, String dynHash) throws IOException {
- return null;
- }
-
- /** Storage drivers wrapping other OutputFormats should override this method.
- */
- public Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException{
- return null;
- }
-
- /**
- * Implementation that calls the underlying output committer's setupJob,
- * used in lieu of underlying committer's setupJob when using dynamic partitioning
- * The default implementation should be overriden by underlying implementations
- * that do not use FileOutputCommitter.
- * The reason this function exists is so as to allow a storage driver implementor to
- * override underlying OutputCommitter's setupJob implementation to allow for
- * being called multiple times in a job, to make it idempotent.
- * This should be written in a manner that is callable multiple times
- * from individual tasks without stepping on each others' toes
- *
- * @param context
- * @throws InterruptedException
- * @throws IOException
- */
- public void setupOutputCommitterJob(TaskAttemptContext context)
- throws IOException, InterruptedException{
- getOutputFormat().getOutputCommitter(context).setupJob(context);
- }
-
- /**
- * Implementation that calls the underlying output committer's cleanupJob,
- * used in lieu of underlying committer's cleanupJob when using dynamic partitioning
- * This should be written in a manner that is okay to call after having had
- * multiple underlying outputcommitters write to task dirs inside it.
- * While the base MR cleanupJob should have sufficed normally, this is provided
- * in order to let people implementing setupOutputCommitterJob to cleanup properly
- *
- * @param context
- * @throws IOException
- */
- public void cleanupOutputCommitterJob(TaskAttemptContext context)
- throws IOException, InterruptedException{
- getOutputFormat().getOutputCommitter(context).cleanupJob(context);
- }
-
- /**
- * Implementation that calls the underlying output committer's abortJob,
- * used in lieu of underlying committer's abortJob when using dynamic partitioning
- * This should be written in a manner that is okay to call after having had
- * multiple underlying outputcommitters write to task dirs inside it.
- * While the base MR cleanupJob should have sufficed normally, this is provided
- * in order to let people implementing setupOutputCommitterJob to abort properly
- *
- * @param context
- * @param state
- * @throws IOException
- */
- public void abortOutputCommitterJob(TaskAttemptContext context, State state)
- throws IOException, InterruptedException{
- getOutputFormat().getOutputCommitter(context).abortJob(context,state);
- }
-
- /**
- * return an instance of OutputFormatContainer containing the passed outputFormat. DefaultOutputFormatContainer is returned by default.
- * @param outputFormat format the returned container will contain
- * @return
- */
-
- //TODO broken this entire class will disappear anyway
- OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) {
- return new DefaultOutputFormatContainer(null);
- }
-
-}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InitializeInput.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InitializeInput.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InitializeInput.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InitializeInput.java Thu Mar 29 23:17:50 2012
@@ -67,7 +67,7 @@ public class InitializeInput {
private static final Log LOG = LogFactory.getLog(InitializeInput.class);
- /** The prefix for keys used for storage driver arguments */
+ /** The prefix for keys used for storage handler arguments */
static final String HCAT_KEY_PREFIX = "hcat.";
private static HiveConf hiveConf;
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java Thu Mar 29 23:17:50 2012
@@ -20,9 +20,7 @@ package org.apache.hcatalog.mapreduce;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import java.io.Serializable;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Properties;
/** The class used to serialize and store the information read from the metadata server */
@@ -47,9 +45,6 @@ public class InputJobInfo implements Ser
/** implementation specific job properties */
private Properties properties;
- /** job properties */
- private Map<String,String> jobProperties;
-
/**
* Initializes a new InputJobInfo
* for reading data from a table.
@@ -132,8 +127,8 @@ public class InputJobInfo implements Ser
}
/**
- * Set/Get Property information to be passed down to *StorageDriver implementation
- * put implementation specific storage driver configurations here
+ * Set/Get Property information to be passed down to *StorageHandler implementation
+ * put implementation specific storage handler configurations here
* @return the implementation specific job properties
*/
public Properties getProperties() {
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InternalUtil.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InternalUtil.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InternalUtil.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/InternalUtil.java Thu Mar 29 23:17:50 2012
@@ -38,7 +38,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hcatalog.data.schema.HCatSchema;
@@ -64,7 +63,7 @@ class InternalUtil {
}
- return new StorerInfo(null, null,
+ return new StorerInfo(
sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(),
properties.get(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE),
hcatProperties);
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java Thu Mar 29 23:17:50 2012
@@ -28,7 +28,7 @@ import org.apache.hcatalog.data.HCatReco
* behavior necessary to work with HCatalog (ie metastore updates, hcatalog delegation tokens, etc).
* Containers are also used to provide storage specific implementations of some HCatalog features (ie dynamic partitioning).
* Hence users wishing to create storage specific implementations of HCatalog features should implement this class and override
- * HCatOutputStorageDriver.getOutputFormatContainer() to return the implementation.
+ * HCatStorageHandler.getOutputFormatContainer(OutputFormat outputFormat) to return the implementation.
* By default DefaultOutputFormatContainer is used, which only implements the bare minimum features HCatalog features
* such as partitioning isn't supported.
*/
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java Thu Mar 29 23:17:50 2012
@@ -205,8 +205,8 @@ public class OutputJobInfo implements Se
}
/**
- * Set/Get Property information to be passed down to *StorageDriver implementation
- * put implementation specific storage driver configurations here
+ * Set/Get Property information to be passed down to *StorageHandler implementation
+ * put implementation specific storage handler configurations here
* @return the implementation specific job properties
*/
public Properties getProperties() {
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/StorerInfo.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/StorerInfo.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/StorerInfo.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/StorerInfo.java Thu Mar 29 23:17:50 2012
@@ -25,15 +25,7 @@ public class StorerInfo implements Seria
/** The serialization version */
private static final long serialVersionUID = 1L;
- //TODO remove this
- /** The name of the input storage driver class */
- private String inputSDClass;
-
- //TODO remove this
- /** The name of the output storage driver class */
- private String outputSDClass;
-
- /** The properties for the storage driver */
+ /** The properties for the storage handler */
private Properties properties;
private String ofClass;
@@ -44,31 +36,16 @@ public class StorerInfo implements Seria
private String storageHandlerClass;
-
- //TODO remove this
- /**
- * Initialize the storage driver
- * @param inputSDClass
- * @param outputSDClass
- * @param properties
- */
- public StorerInfo(String inputSDClass, String outputSDClass, Properties properties) {
- super();
- this.inputSDClass = inputSDClass;
- this.outputSDClass = outputSDClass;
- this.properties = properties;
- }
-
/**
- * Initialize the storage driver
- * @param inputSDClass
- * @param outputSDClass
+ * Initialize the storer info
+ * @param ifClass
+ * @param ofClass
+ * @param serdeClass
+ * @param storageHandlerClass
* @param properties
*/
- public StorerInfo(String inputSDClass, String outputSDClass, String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) {
+ public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) {
super();
- this.inputSDClass = inputSDClass;
- this.outputSDClass = outputSDClass;
this.ifClass =ifClass;
this.ofClass = ofClass;
this.serdeClass = serdeClass;
@@ -76,21 +53,7 @@ public class StorerInfo implements Seria
this.properties = properties;
}
- /**
- * @return the inputSDClass
- */
- public String getInputSDClass() {
- return inputSDClass;
- }
-
- /**
- * @return the outputSDClass
- */
- public String getOutputSDClass() {
- return outputSDClass;
- }
-
-public String getIfClass() {
+ public String getIfClass() {
return ifClass;
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileInputDriver.java Thu Mar 29 23:17:50 2012
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hcatalog.rcfile;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Map.Entry;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
-import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
-import org.apache.hadoop.hive.serde2.columnar.ColumnarStruct;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hcatalog.common.HCatUtil;
-import org.apache.hcatalog.data.DefaultHCatRecord;
-import org.apache.hcatalog.data.HCatRecord;
-import org.apache.hcatalog.data.schema.HCatFieldSchema;
-import org.apache.hcatalog.data.schema.HCatSchema;
-import org.apache.hcatalog.mapreduce.HCatInputStorageDriver;
-
-public class RCFileInputDriver extends HCatInputStorageDriver{
-
-
- private SerDe serde;
- private static final Log LOG = LogFactory.getLog(RCFileInputDriver.class);
- private List<HCatFieldSchema> colsInData;
- private StructObjectInspector oi;
- private Map<String,String> partValues;
- private List<HCatFieldSchema> outCols;
- private List<? extends StructField> structFields;
- private Map<String,Integer> namePosMapping;
-
- @Override
- public InputFormat<? extends WritableComparable, ? extends Writable> getInputFormat(Properties hcatProperties) {
- return new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
- }
-
- @Override
- public void setInputPath(JobContext jobContext, String location) throws IOException {
-
- super.setInputPath(jobContext, location);
- }
-
- @Override
- public void setOriginalSchema(JobContext jobContext, HCatSchema dataSchema) throws IOException {
-
- colsInData = dataSchema.getFields();
- namePosMapping = new HashMap<String, Integer>(colsInData.size());
- int index =0;
- for(HCatFieldSchema field : dataSchema.getFields()){
- namePosMapping.put(field.getName(), index++);
- }
- }
-
- @Override
- public void setOutputSchema(JobContext jobContext, HCatSchema desiredSchema) throws IOException {
-
- // Finds out which column ids needs to be projected and set them up for RCFile.
- outCols = desiredSchema.getFields();
- ArrayList<Integer> prjColumns = new ArrayList<Integer>();
- for(HCatFieldSchema prjCol : outCols){
- Integer pos = namePosMapping.get(prjCol.getName().toLowerCase());
- if(pos != null) {
- prjColumns.add(pos);
- }
- }
-
- Collections.sort(prjColumns);
- ColumnProjectionUtils.setReadColumnIDs(jobContext.getConfiguration(), prjColumns);
- }
-
- @Override
- public void setPartitionValues(JobContext jobContext, Map<String, String> partitionValues)
- throws IOException {
- partValues = partitionValues;
- }
-
- @Override
- public HCatRecord convertToHCatRecord(WritableComparable ignored, Writable bytesRefArray) throws IOException {
-
- // Deserialize bytesRefArray into struct and then convert that struct to
- // HCatRecord.
- ColumnarStruct struct;
- try {
- struct = (ColumnarStruct)serde.deserialize(bytesRefArray);
- } catch (SerDeException e) {
- LOG.error(e.toString(), e);
- throw new IOException(e);
- }
-
- List<Object> outList = new ArrayList<Object>(outCols.size());
-
- String colName;
- Integer index;
-
- for(HCatFieldSchema col : outCols){
-
- colName = col.getName().toLowerCase();
- index = namePosMapping.get(colName);
-
- if(index != null){
- StructField field = structFields.get(index);
- outList.add( getTypedObj(oi.getStructFieldData(struct, field), field.getFieldObjectInspector()));
- }
-
- else {
- outList.add(partValues.get(colName));
- }
-
- }
- return new DefaultHCatRecord(outList);
- }
-
- private Object getTypedObj(Object data, ObjectInspector oi) throws IOException{
-
- // The real work-horse method. We are gobbling up all the laziness benefits
- // of Hive-RCFile by deserializing everything and creating crisp HCatRecord
- // with crisp Java objects inside it. We have to do it because higher layer
- // may not know how to do it.
-
- if (data == null) {
- return null;
- }
-
- switch(oi.getCategory()){
-
- case PRIMITIVE:
- return ((PrimitiveObjectInspector)oi).getPrimitiveJavaObject(data);
-
- case MAP:
- MapObjectInspector moi = (MapObjectInspector)oi;
- Map<?,?> lazyMap = moi.getMap(data);
- ObjectInspector keyOI = moi.getMapKeyObjectInspector();
- ObjectInspector valOI = moi.getMapValueObjectInspector();
- Map<Object,Object> typedMap = new HashMap<Object,Object>(lazyMap.size());
- for(Entry<?,?> e : lazyMap.entrySet()){
- typedMap.put(getTypedObj(e.getKey(), keyOI), getTypedObj(e.getValue(), valOI));
- }
- return typedMap;
-
- case LIST:
- ListObjectInspector loi = (ListObjectInspector)oi;
- List<?> lazyList = loi.getList(data);
- ObjectInspector elemOI = loi.getListElementObjectInspector();
- List<Object> typedList = new ArrayList<Object>(lazyList.size());
- Iterator<?> itr = lazyList.listIterator();
- while(itr.hasNext()){
- typedList.add(getTypedObj(itr.next(),elemOI));
- }
- return typedList;
-
- case STRUCT:
- StructObjectInspector soi = (StructObjectInspector)oi;
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- List<Object> typedStruct = new ArrayList<Object>(fields.size());
- for(StructField field : fields){
- typedStruct.add( getTypedObj(soi.getStructFieldData(data, field), field.getFieldObjectInspector()));
- }
- return typedStruct;
-
-
- default:
- throw new IOException("Don't know how to deserialize: "+oi.getCategory());
-
- }
- }
-
- @Override
- public void initialize(JobContext context,Properties hcatProperties)
- throws IOException {
-
- super.initialize(context, hcatProperties);
-
- // Columnar Serde needs to know names and types of columns it needs to read.
- List<FieldSchema> fields = HCatUtil.getFieldSchemaList(colsInData);
- hcatProperties.setProperty(Constants.LIST_COLUMNS,MetaStoreUtils.
- getColumnNamesFromFieldSchema(fields));
- hcatProperties.setProperty(Constants.LIST_COLUMN_TYPES, MetaStoreUtils.
- getColumnTypesFromFieldSchema(fields));
-
- // It seems RCFIle reads and writes nulls differently as compared to default hive.
- // setting these props to match LazySimpleSerde
- hcatProperties.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
- hcatProperties.setProperty(Constants.SERIALIZATION_FORMAT, "1");
-
- try {
- serde = new ColumnarSerDe();
- serde.initialize(context.getConfiguration(), hcatProperties);
- oi = (StructObjectInspector) serde.getObjectInspector();
- structFields = oi.getAllStructFieldRefs();
-
- } catch (SerDeException e) {
- throw new IOException(e);
- }
- }
-}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/rcfile/RCFileOutputDriver.java Thu Mar 29 23:17:50 2012
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hcatalog.rcfile;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hcatalog.common.HCatUtil;
-import org.apache.hcatalog.data.HCatRecord;
-import org.apache.hcatalog.data.schema.HCatFieldSchema;
-import org.apache.hcatalog.data.schema.HCatSchema;
-import org.apache.hcatalog.mapreduce.FileOutputStorageDriver;
-
-/**
- * The storage driver for writing RCFile data through HCatOutputFormat.
- */
- public class RCFileOutputDriver extends FileOutputStorageDriver {
-
- /** The serde for serializing the HCatRecord to bytes writable */
- private SerDe serde;
-
- /** The object inspector for the given schema */
- private StructObjectInspector objectInspector;
-
- /** The schema for the output data */
- private HCatSchema outputSchema;
-
- /** The cached RCFile output format instance */
- private OutputFormat outputFormat = null;
-
- /* (non-Javadoc)
- * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#convertValue(org.apache.hcatalog.data.HCatRecord)
- */
- @Override
- public Writable convertValue(HCatRecord value) throws IOException {
- try {
-
- return serde.serialize(value.getAll(), objectInspector);
- } catch(SerDeException e) {
- throw new IOException(e);
- }
- }
-
- /* (non-Javadoc)
- * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#generateKey(org.apache.hcatalog.data.HCatRecord)
- */
- @Override
- public WritableComparable<?> generateKey(HCatRecord value) throws IOException {
- //key is not used for RCFile output
- return null;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#getOutputFormat(java.util.Properties)
- */
- @SuppressWarnings("unchecked")
- @Override
- public OutputFormat<? extends WritableComparable<?>, ? extends Writable> getOutputFormat() throws IOException {
- if( outputFormat == null ) {
- outputFormat = new RCFileMapReduceOutputFormat();
- }
-
- return outputFormat;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#setOutputPath(org.apache.hadoop.mapreduce.JobContext, java.lang.String)
- */
- @Override
- public void setOutputPath(JobContext jobContext, String location) throws IOException {
- //Not calling FileOutputFormat.setOutputPath since that requires a Job instead of JobContext
- jobContext.getConfiguration().set("mapred.output.dir", location);
- }
-
- /* (non-Javadoc)
- * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#setPartitionValues(org.apache.hadoop.mapreduce.JobContext, java.util.Map)
- */
- @Override
- public void setPartitionValues(JobContext jobContext, Map<String, String> partitionValues)
- throws IOException {
- //default implementation of HCatOutputStorageDriver.getPartitionLocation will use the partition
- //values to generate the data location, so partition values not used here
- }
-
- /* (non-Javadoc)
- * @see org.apache.hcatalog.mapreduce.HCatOutputStorageDriver#setSchema(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.hive.metastore.api.Schema)
- */
- @Override
- public void setSchema(JobContext jobContext, HCatSchema schema) throws IOException {
- outputSchema = schema;
- RCFileMapReduceOutputFormat.setColumnNumber(
- jobContext.getConfiguration(), schema.getFields().size());
- }
-
- @Override
- public void initialize(JobContext context,Properties hcatProperties) throws IOException {
-
- super.initialize(context, hcatProperties);
-
- List<FieldSchema> fields = HCatUtil.getFieldSchemaList(outputSchema.getFields());
- hcatProperties.setProperty(Constants.LIST_COLUMNS,
- MetaStoreUtils.getColumnNamesFromFieldSchema(fields));
- hcatProperties.setProperty(Constants.LIST_COLUMN_TYPES,
- MetaStoreUtils.getColumnTypesFromFieldSchema(fields));
-
- // setting these props to match LazySimpleSerde
- hcatProperties.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
- hcatProperties.setProperty(Constants.SERIALIZATION_FORMAT, "1");
-
- try {
- serde = new ColumnarSerDe();
- serde.initialize(context.getConfiguration(), hcatProperties);
- objectInspector = createStructObjectInspector();
-
- } catch (SerDeException e) {
- throw new IOException(e);
- }
- }
-
- public StructObjectInspector createStructObjectInspector() throws IOException {
-
- if( outputSchema == null ) {
- throw new IOException("Invalid output schema specified");
- }
-
- List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
- List<String> fieldNames = new ArrayList<String>();
-
- for(HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) {
- TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString());
-
- fieldNames.add(hcatFieldSchema.getName());
- fieldInspectors.add(getObjectInspector(type));
- }
-
- StructObjectInspector structInspector = ObjectInspectorFactory.
- getStandardStructObjectInspector(fieldNames, fieldInspectors);
- return structInspector;
- }
-
- public ObjectInspector getObjectInspector(TypeInfo type) throws IOException {
-
- switch(type.getCategory()) {
-
- case PRIMITIVE :
- PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
- return PrimitiveObjectInspectorFactory.
- getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory());
-
- case MAP :
- MapTypeInfo mapType = (MapTypeInfo) type;
- MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector(
- getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
- return mapInspector;
-
- case LIST :
- ListTypeInfo listType = (ListTypeInfo) type;
- ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector(
- getObjectInspector(listType.getListElementTypeInfo()));
- return listInspector;
-
- case STRUCT :
- StructTypeInfo structType = (StructTypeInfo) type;
- List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();
-
- List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
- for(TypeInfo fieldType : fieldTypes) {
- fieldInspectors.add(getObjectInspector(fieldType));
- }
-
- StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
- structType.getAllStructFieldNames(), fieldInspectors);
- return structInspector;
-
- default :
- throw new IOException("Unknown field schema type");
- }
- }
-
-}
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java Thu Mar 29 23:17:50 2012
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hcatalog.utils;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hcatalog.rcfile.RCFileInputDriver;
-import org.apache.hcatalog.rcfile.RCFileOutputDriver;
-import org.apache.thrift.TException;
-
-/**
- * A utility program to annotate partitions of a pre-created table
- * with input storage driver and output storage driver information
- */
-public class PartitionStorageDriverAnnotator {
-
- /**
- * @param args
- * @throws MetaException
- * @throws TException
- * @throws NoSuchObjectException
- * @throws InvalidOperationException
- */
- public static void main(String[] args) throws MetaException, NoSuchObjectException,
- TException, InvalidOperationException {
- String thrifturi = null;
- String database = "default";
- String table = null;
- String isd = null;
- String osd = null;
- Map<String, String> m = new HashMap<String, String>();
- for(int i = 0; i < args.length; i++) {
- if(args[i].equals("-u")) {
- thrifturi = args[i+1];
- } else if(args[i].equals("-t")) {
- table = args[i+1];
- } else if (args[i].equals("-i")) {
- isd = args[i+1];
- } else if (args[i].equals("-o")) {
- osd = args[i+1];
- } else if (args[i].equals("-p")) {
- String[] kvps = args[i+1].split(";");
- for(String kvp: kvps) {
- String[] kv = kvp.split("=");
- if(kv.length != 2) {
- System.err.println("ERROR: key value property pairs must be specified as key1=val1;key2=val2;..;keyn=valn");
- System.exit(1);
- }
- m.put(kv[0], kv[1]);
- }
- } else if(args[i].equals("-d")) {
- database = args[i+1];
- } else {
- System.err.println("ERROR: Unknown option: " + args[i]);
- usage();
- }
- i++; // to skip the value for an option
- }
- if(table == null || thrifturi == null) {
- System.err.println("ERROR: thrift uri and table name are mandatory");
- usage();
- }
- HiveConf hiveConf = new HiveConf(PartitionStorageDriverAnnotator.class);
- hiveConf.set("hive.metastore.local", "false");
- hiveConf.set("hive.metastore.uris", thrifturi);
-
- HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf,null);
- List<Partition> parts = hmsc.listPartitions(database, table, Short.MAX_VALUE);
-
- m.put("hcat.isd", isd != null ? isd : RCFileInputDriver.class.getName());
- m.put("hcat.osd", osd != null ? osd : RCFileOutputDriver.class.getName());
-
- for(Partition p: parts) {
- p.setParameters(m);
- hmsc.alter_partition(database, table, p);
- }
- }
-
- /**
- *
- */
- private static void usage() {
- System.err.println("Usage: java -cp testudf.jar:<hcatjar> org.apache.hcat.utils.PartitionStorageDriverAnnotator -u <thrift uri> -t <partitioned tablename>" +
- " [-i input driver classname (Default rcfiledriver)] [-o output driver classname (default rcfiledriver)] " +
- " [-p key1=val1;key2=val2;..;keyn=valn (list of key=value property pairs to associate with each partition)]" +
- " [-d database (if this not supplied the default database is used)]");
- System.exit(1);
- }
-
-}
Modified: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java (original)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/cli/TestSemanticAnalysis.java Thu Mar 29 23:17:50 2012
@@ -20,7 +20,6 @@ package org.apache.hcatalog.cli;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
import junit.framework.TestCase;
@@ -43,10 +42,7 @@ import org.apache.hadoop.hive.ql.process
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
-import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.listener.NotificationListener;
-import org.apache.hcatalog.rcfile.RCFileInputDriver;
-import org.apache.hcatalog.rcfile.RCFileOutputDriver;
import org.apache.thrift.TException;
public class TestSemanticAnalysis extends TestCase{
Modified: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java (original)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java Thu Mar 29 23:17:50 2012
@@ -53,13 +53,10 @@ import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.data.DefaultHCatRecord;
import org.apache.hcatalog.data.HCatRecord;
import org.apache.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hcatalog.data.schema.HCatSchema;
-import org.apache.hcatalog.rcfile.RCFileInputDriver;
-import org.apache.hcatalog.rcfile.RCFileOutputDriver;
/**
* Test for HCatOutputFormat. Writes a partition using HCatOutputFormat and reads
@@ -72,8 +69,6 @@ public abstract class HCatMapReduceTest
protected String inputFormat = RCFileInputFormat.class.getName();
protected String outputFormat = RCFileOutputFormat.class.getName();
- protected String inputSD = RCFileInputDriver.class.getName();
- protected String outputSD = RCFileOutputDriver.class.getName();
protected String serdeClass = ColumnarSerDe.class.getName();
private static List<HCatRecord> writeRecords = new ArrayList<HCatRecord>();
Modified: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java (original)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java Thu Mar 29 23:17:50 2012
@@ -42,8 +42,6 @@ import org.apache.hadoop.hive.serde.Cons
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.util.StringUtils;
-import org.apache.hcatalog.common.HCatConstants;
-import org.apache.hcatalog.rcfile.RCFileOutputDriver;
public class TestHCatOutputFormat extends TestCase {
private HiveMetaStoreClient client;
Modified: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/pig/MyPigStorage.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/pig/MyPigStorage.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/pig/MyPigStorage.java (original)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/pig/MyPigStorage.java Thu Mar 29 23:17:50 2012
@@ -18,10 +18,7 @@
package org.apache.hcatalog.pig;
import java.io.IOException;
-import java.util.Properties;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hcatalog.pig.drivers.PigStorageInputDriver;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.Tuple;
Modified: incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java (original)
+++ incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java Thu Mar 29 23:17:50 2012
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.hbase.HBas
import org.apache.hcatalog.common.HCatConstants;
/**
- * Constants class for constants used in HBase storage driver.
+ * Constants class for constants used in HBase storage handler.
*/
class HBaseConstants {
@@ -35,8 +35,8 @@ class HBaseConstants {
/** key used to define the column mapping of hbase to hcatalog schema */
public static final String PROPERTY_COLUMN_MAPPING_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+"."+ HBaseSerDe.HBASE_COLUMNS_MAPPING;
- /** key used to define wether bulk storage driver will be used or not */
- public static final String PROPERTY_OSD_BULK_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+".hbase.output.bulkMode";
+ /** key used to define whether bulk storage output format will be used or not */
+ public static final String PROPERTY_BULK_OUTPUT_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+".hbase.output.bulkMode";
/** key used to define the hbase table snapshot. */
public static final String PROPERTY_TABLE_SNAPSHOT_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + "hbase.table.snapshot";
Modified: incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java (original)
+++ incubator/hcatalog/trunk/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java Thu Mar 29 23:17:50 2012
@@ -556,7 +556,7 @@ public class HBaseHCatStorageHandler ext
public static boolean isBulkMode(OutputJobInfo outputJobInfo) {
//Default is false
String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties()
- .getProperty(HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY,
+ .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY,
"false");
return "true".equals(bulkMode);
}
Modified: incubator/hcatalog/trunk/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java?rev=1307159&r1=1307158&r2=1307159&view=diff
==============================================================================
--- incubator/hcatalog/trunk/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java (original)
+++ incubator/hcatalog/trunk/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java Thu Mar 29 23:17:50 2012
@@ -352,7 +352,7 @@ public class TestHBaseBulkOutputFormat e
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName +
"(key int, english string, spanish string) STORED BY " +
"'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" +
- "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY+"'='true',"+
+ "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY+"'='true',"+
"'hbase.columns.mapping'=':key,"+familyName+":english,"+familyName+":spanish')" ;
assertEquals(0, hcatDriver.run(dbquery).getResponseCode());
@@ -446,7 +446,7 @@ public class TestHBaseBulkOutputFormat e
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName +
"(key int, english string, spanish string) STORED BY " +
"'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" +
- "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY+"'='true',"+
+ "TBLPROPERTIES ('"+HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY+"'='true',"+
"'hbase.columns.mapping'=':key,"+familyName+":english,"+familyName+":spanish')" ;
assertEquals(0, hcatDriver.run(dbquery).getResponseCode());
@@ -525,7 +525,7 @@ public class TestHBaseBulkOutputFormat e
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName +
"(key int, english string, spanish string) STORED BY " +
"'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" +
- "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY + "'='true'," +
+ "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," +
"'hbase.columns.mapping'=':key," + familyName + ":english," + familyName
+ ":spanish')";
@@ -578,7 +578,7 @@ public class TestHBaseBulkOutputFormat e
ResultScanner scanner = table.getScanner(scan);
assertFalse(scanner.iterator().hasNext());
- // verify that the input storage driver returns empty results.
+ // verify that the storage handler input format returns empty results.
Path outputDir = new Path(getTestDir(),
"mapred/testHBaseTableBulkIgnoreAbortedTransactions");
FileSystem fs = getFileSystem();