You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/01/21 08:31:27 UTC
svn commit: r901581 [1/10] - in /hadoop/hive/trunk: ./
cli/src/java/org/apache/hadoop/hive/cli/
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/common/io/
contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/...
Author: zshao
Date: Thu Jan 21 07:29:29 2010
New Revision: 901581
URL: http://svn.apache.org/viewvc?rev=901581&view=rev
Log:
HIVE-1081. Automated source code cleanup - Part 1. (Carl Steinbach via zshao)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java
hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/OptionsProcessor.java
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayInputStream.java
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextInputFormat.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextOutputFormat.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/genericudf/example/GenericUDFDBOutput.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/GenericMR.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Mapper.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Output.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Reducer.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/IdentityMapper.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/WordCountReduce.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/TypedBytesSerDe.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogDeserializer.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/s3/S3LogStruct.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFExplode2.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/Type.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesInput.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesOutput.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordInput.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordOutput.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordWriter.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesWritable.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesWritableInput.java
hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesWritableOutput.java
hadoop/hive/trunk/contrib/src/test/org/apache/hadoop/hive/contrib/mr/TestGenericMR.java
hadoop/hive/trunk/contrib/src/test/org/apache/hadoop/hive/contrib/serde2/TestRegexSerDe.java
hadoop/hive/trunk/hwi/src/java/org/apache/hadoop/hive/hwi/HWIAuth.java
hadoop/hive/trunk/hwi/src/java/org/apache/hadoop/hive/hwi/HWIContextListener.java
hadoop/hive/trunk/hwi/src/java/org/apache/hadoop/hive/hwi/HWIException.java
hadoop/hive/trunk/hwi/src/java/org/apache/hadoop/hive/hwi/HWIServer.java
hadoop/hive/trunk/hwi/src/java/org/apache/hadoop/hive/hwi/HWISessionItem.java
hadoop/hive/trunk/hwi/src/java/org/apache/hadoop/hive/hwi/HWISessionManager.java
hadoop/hive/trunk/hwi/src/test/org/apache/hadoop/hive/hwi/TestHWIServer.java
hadoop/hive/trunk/hwi/src/test/org/apache/hadoop/hive/hwi/TestHWISessionManager.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveCallableStatement.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveConnection.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveDataSource.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveDatabaseMetaData.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveDriver.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HivePreparedStatement.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveResultSet.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveResultSetMetaData.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveStatement.java
hadoop/hive/trunk/jdbc/src/java/org/apache/hadoop/hive/jdbc/JdbcSessionState.java
hadoop/hive/trunk/jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/AlterHandler.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
hadoop/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
hadoop/hive/trunk/service/src/java/org/apache/hadoop/hive/service/HiveClient.java
hadoop/hive/trunk/service/src/java/org/apache/hadoop/hive/service/HiveInterface.java
hadoop/hive/trunk/service/src/java/org/apache/hadoop/hive/service/HiveServer.java
hadoop/hive/trunk/service/src/test/org/apache/hadoop/hive/service/TestHiveServer.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Jan 21 07:29:29 2010
@@ -19,6 +19,9 @@
HIVE-990. Incorporate CheckStyle into Hive's build.xml.
(Carl Steinbach via zshao)
+ HIVE-1081. Automated source code cleanup - Part 1.
+ (Carl Steinbach via zshao)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (original)
+++ hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java Thu Jan 21 07:29:29 2010
@@ -18,25 +18,37 @@
package org.apache.hadoop.hive.cli;
-import jline.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Vector;
+
+import jline.ArgumentCompletor;
+import jline.ConsoleReader;
+import jline.History;
+import jline.SimpleCompletor;
-import java.io.*;
-import java.util.*;
-
-import org.apache.hadoop.fs.FsShell;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.Driver;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.shims.ShimLoader;
public class CliDriver {
@@ -44,25 +56,26 @@
public final static String prompt = "hive";
public final static String prompt2 = " "; // when ';' is not yet seen
- private LogHelper console;
- private Configuration conf;
+ private final LogHelper console;
+ private final Configuration conf;
public CliDriver() {
SessionState ss = SessionState.get();
- conf = (ss != null) ? ss.getConf() : new Configuration ();
+ conf = (ss != null) ? ss.getConf() : new Configuration();
Log LOG = LogFactory.getLog("CliDriver");
console = new LogHelper(LOG);
}
-
+
public int processCmd(String cmd) {
SessionState ss = SessionState.get();
-
+
String cmd_trimmed = cmd.trim();
String[] tokens = cmd_trimmed.split("\\s+");
String cmd_1 = cmd_trimmed.substring(tokens[0].length()).trim();
int ret = 0;
-
- if (cmd_trimmed.toLowerCase().equals("quit") || cmd_trimmed.toLowerCase().equals("exit")) {
+
+ if (cmd_trimmed.toLowerCase().equals("quit")
+ || cmd_trimmed.toLowerCase().equals("exit")) {
// if we have come this far - either the previous commands
// are all successful or this is command line. in either case
@@ -73,49 +86,53 @@
String shell_cmd = cmd_trimmed.substring(1);
- //shell_cmd = "/bin/bash -c \'" + shell_cmd + "\'";
+ // shell_cmd = "/bin/bash -c \'" + shell_cmd + "\'";
try {
Process executor = Runtime.getRuntime().exec(shell_cmd);
- StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, ss.out);
- StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, ss.err);
-
+ StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(),
+ null, ss.out);
+ StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(),
+ null, ss.err);
+
outPrinter.start();
errPrinter.start();
-
+
ret = executor.waitFor();
if (ret != 0) {
console.printError("Command failed with exit code = " + ret);
}
- }
- catch (Exception e) {
- console.printError("Exception raised from Shell command " + e.getLocalizedMessage(),
- org.apache.hadoop.util.StringUtils.stringifyException(e));
+ } catch (Exception e) {
+ console.printError("Exception raised from Shell command "
+ + e.getLocalizedMessage(), org.apache.hadoop.util.StringUtils
+ .stringifyException(e));
ret = 1;
}
} else if (tokens[0].toLowerCase().equals("list")) {
SessionState.ResourceType t;
- if(tokens.length < 2 || (t = SessionState.find_resource_type(tokens[1])) == null) {
- console.printError("Usage: list [" +
- StringUtils.join(SessionState.ResourceType.values(),"|") +
- "] [<value> [<value>]*]" );
+ if (tokens.length < 2
+ || (t = SessionState.find_resource_type(tokens[1])) == null) {
+ console.printError("Usage: list ["
+ + StringUtils.join(SessionState.ResourceType.values(), "|")
+ + "] [<value> [<value>]*]");
ret = 1;
} else {
List<String> filter = null;
- if(tokens.length >=3) {
- System.arraycopy(tokens, 2, tokens, 0, tokens.length-2);
+ if (tokens.length >= 3) {
+ System.arraycopy(tokens, 2, tokens, 0, tokens.length - 2);
filter = Arrays.asList(tokens);
}
Set<String> s = ss.list_resource(t, filter);
- if(s != null && !s.isEmpty())
+ if (s != null && !s.isEmpty()) {
ss.out.println(StringUtils.join(s, "\n"));
+ }
}
} else {
CommandProcessor proc = CommandProcessorFactory.get(tokens[0]);
- if(proc != null) {
- if(proc instanceof Driver) {
+ if (proc != null) {
+ if (proc instanceof Driver) {
Driver qp = (Driver) proc;
PrintStream out = ss.out;
long start = System.currentTimeMillis();
@@ -125,11 +142,11 @@
qp.close();
return ret;
}
-
+
Vector<String> res = new Vector<String>();
try {
while (qp.getResults(res)) {
- for (String r:res) {
+ for (String r : res) {
out.println(r);
}
res.clear();
@@ -138,11 +155,12 @@
}
}
} catch (IOException e) {
- console.printError("Failed with exception " + e.getClass().getName() + ":" + e.getMessage(),
- "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ console.printError("Failed with exception "
+ + e.getClass().getName() + ":" + e.getMessage(), "\n"
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
ret = 1;
}
-
+
int cret = qp.close();
if (ret == 0) {
ret = cret;
@@ -150,7 +168,7 @@
long end = System.currentTimeMillis();
if (end > start) {
- double timeTaken = (double)(end-start)/1000.0;
+ double timeTaken = (end - start) / 1000.0;
console.printInfo("Time taken: " + timeTaken + " seconds", null);
}
@@ -165,24 +183,26 @@
public int processLine(String line) {
int lastRet = 0, ret = 0;
-
- String command="";
- for(String oneCmd: line.split(";")) {
-
- if (StringUtils.endsWith(oneCmd, "\\")){
- command+=StringUtils.chop(oneCmd)+";";
+
+ String command = "";
+ for (String oneCmd : line.split(";")) {
+
+ if (StringUtils.endsWith(oneCmd, "\\")) {
+ command += StringUtils.chop(oneCmd) + ";";
continue;
} else {
- command+=oneCmd;
+ command += oneCmd;
}
- if(StringUtils.isBlank(command))
+ if (StringUtils.isBlank(command)) {
continue;
-
+ }
+
ret = processCmd(command);
- command="";
+ command = "";
lastRet = ret;
- boolean ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS);
- if(ret != 0 && !ignoreErrors) {
+ boolean ignoreErrors = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.CLIIGNOREERRORS);
+ if (ret != 0 && !ignoreErrors) {
return ret;
}
}
@@ -193,7 +213,7 @@
String line;
StringBuffer qsb = new StringBuffer();
- while((line = r.readLine()) != null) {
+ while ((line = r.readLine()) != null) {
qsb.append(line + "\n");
}
@@ -203,15 +223,16 @@
public static void main(String[] args) throws Exception {
OptionsProcessor oproc = new OptionsProcessor();
- if(! oproc.process_stage1(args)) {
+ if (!oproc.process_stage1(args)) {
System.exit(1);
}
- // NOTE: It is critical to do this here so that log4j is reinitialized before
+ // NOTE: It is critical to do this here so that log4j is reinitialized
+ // before
// any of the other core hive classes are loaded
SessionState.initHiveLog4j();
- CliSessionState ss = new CliSessionState (new HiveConf(SessionState.class));
+ CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class));
ss.in = System.in;
try {
ss.out = new PrintStream(System.out, true, "UTF-8");
@@ -220,24 +241,25 @@
System.exit(3);
}
-
- if(! oproc.process_stage2(ss)) {
+ if (!oproc.process_stage2(ss)) {
System.exit(2);
}
// set all properties specified via command line
HiveConf conf = ss.getConf();
- for(Map.Entry<Object, Object> item: ss.cmdProperties.entrySet()) {
+ for (Map.Entry<Object, Object> item : ss.cmdProperties.entrySet()) {
conf.set((String) item.getKey(), (String) item.getValue());
}
-
- if(!ShimLoader.getHadoopShims().usesJobShell()) {
- // hadoop-20 and above - we need to augment classpath using hiveconf components
+
+ if (!ShimLoader.getHadoopShims().usesJobShell()) {
+ // hadoop-20 and above - we need to augment classpath using hiveconf
+ // components
// see also: code in ExecDriver.java
ClassLoader loader = conf.getClassLoader();
String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);
if (StringUtils.isNotBlank(auxJars)) {
- loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
+ loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars,
+ ","));
}
conf.setClassLoader(loader);
Thread.currentThread().setContextClassLoader(loader);
@@ -245,44 +267,46 @@
SessionState.start(ss);
- CliDriver cli = new CliDriver ();
+ CliDriver cli = new CliDriver();
- if(ss.execString != null) {
+ if (ss.execString != null) {
System.exit(cli.processLine(ss.execString));
}
try {
- if(ss.fileName != null) {
- System.exit(cli.processReader(new BufferedReader(new FileReader(ss.fileName))));
+ if (ss.fileName != null) {
+ System.exit(cli.processReader(new BufferedReader(new FileReader(
+ ss.fileName))));
}
} catch (FileNotFoundException e) {
- System.err.println("Could not open input file for reading. ("+e.getMessage()+")");
+ System.err.println("Could not open input file for reading. ("
+ + e.getMessage() + ")");
System.exit(3);
}
ConsoleReader reader = new ConsoleReader();
reader.setBellEnabled(false);
- //reader.setDebug(new PrintWriter(new FileWriter("writer.debug", true)));
+ // reader.setDebug(new PrintWriter(new FileWriter("writer.debug", true)));
List<SimpleCompletor> completors = new LinkedList<SimpleCompletor>();
- completors.add(new SimpleCompletor(new String[] { "set", "from",
- "create", "load",
- "describe", "quit", "exit" }));
+ completors.add(new SimpleCompletor(new String[] { "set", "from", "create",
+ "load", "describe", "quit", "exit" }));
reader.addCompletor(new ArgumentCompletor(completors));
-
+
String line;
final String HISTORYFILE = ".hivehistory";
- String historyFile = System.getProperty("user.home") + File.separator + HISTORYFILE;
+ String historyFile = System.getProperty("user.home") + File.separator
+ + HISTORYFILE;
reader.setHistory(new History(new File(historyFile)));
int ret = 0;
String prefix = "";
String curPrompt = prompt;
- while ((line = reader.readLine(curPrompt+"> ")) != null) {
+ while ((line = reader.readLine(curPrompt + "> ")) != null) {
if (!prefix.equals("")) {
prefix += '\n';
}
- if(line.trim().endsWith(";") && !line.trim().endsWith("\\;")) {
+ if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) {
line = prefix + line;
ret = cli.processLine(line);
prefix = "";
Modified: hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java (original)
+++ hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java Thu Jan 21 07:29:29 2010
@@ -20,14 +20,13 @@
import java.util.Properties;
-import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.session.SessionState;
public class CliSessionState extends SessionState {
/**
* -e option if any that the session has been invoked with
- */
+ */
public String execString;
/**
@@ -40,12 +39,11 @@
*/
public Properties cmdProperties = new Properties();
-
public CliSessionState() {
super();
}
- public CliSessionState (HiveConf conf) {
+ public CliSessionState(HiveConf conf) {
super(conf);
}
Modified: hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/OptionsProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/OptionsProcessor.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/OptionsProcessor.java (original)
+++ hadoop/hive/trunk/cli/src/java/org/apache/hadoop/hive/cli/OptionsProcessor.java Thu Jan 21 07:29:29 2010
@@ -18,160 +18,162 @@
package org.apache.hadoop.hive.cli;
-import java.io.*;
-import java.util.*;
-
-import org.apache.commons.cli2.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.commons.cli2.Argument;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.WriteableCommandLine;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.PropertyOption;
import org.apache.commons.cli2.resource.ResourceConstants;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.logging.*;
-
-import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
public class OptionsProcessor {
- protected static final Log l4j = LogFactory.getLog(OptionsProcessor.class.getName());
-
- private Parser parser = new Parser();
- private Option confOptions, isSilentOption, execOption, fileOption, isHelpOption;
+ protected static final Log l4j = LogFactory.getLog(OptionsProcessor.class
+ .getName());
- /**
- * shameless cloned from hadoop streaming
- * take in multiple -hiveconf x=y parameters
+ private final Parser parser = new Parser();
+ private final Option confOptions, isSilentOption, execOption, fileOption,
+ isHelpOption;
+
+ /**
+ * shameless cloned from hadoop streaming take in multiple -hiveconf x=y
+ * parameters
*/
- class MultiPropertyOption extends PropertyOption{
- private String optionString;
- MultiPropertyOption(){
- super();
- }
-
- MultiPropertyOption(final String optionString,
- final String description,
- final int id){
- super(optionString, description, id);
+ class MultiPropertyOption extends PropertyOption {
+ private String optionString;
+
+ MultiPropertyOption() {
+ super();
+ }
+
+ MultiPropertyOption(final String optionString, final String description,
+ final int id) {
+ super(optionString, description, id);
this.optionString = optionString;
}
+ @Override
public boolean canProcess(final WriteableCommandLine commandLine,
- final String argument) {
+ final String argument) {
boolean ret = (argument != null) && argument.startsWith(optionString);
-
+
return ret;
- }
+ }
+ @Override
public void process(final WriteableCommandLine commandLine,
- final ListIterator arguments) throws OptionException {
+ final ListIterator arguments) throws OptionException {
final String arg = (String) arguments.next();
if (!canProcess(commandLine, arg)) {
- throw new OptionException(this,
- ResourceConstants.UNEXPECTED_TOKEN, arg);
+ throw new OptionException(this, ResourceConstants.UNEXPECTED_TOKEN, arg);
}
-
- ArrayList properties = new ArrayList();
- String next = "";
- while(arguments.hasNext()){
+
+ ArrayList properties = new ArrayList();
+ String next = "";
+ while (arguments.hasNext()) {
next = (String) arguments.next();
- if (!next.startsWith("-")){
-
- if(next.indexOf("=") == -1) {
+ if (!next.startsWith("-")) {
+
+ if (next.indexOf("=") == -1) {
throw new OptionException(this, ResourceConstants.UNEXPECTED_TOKEN,
- "argument: '" + next + "' is not of the form x=y");
+ "argument: '" + next + "' is not of the form x=y");
}
properties.add(next);
- }else{
+ } else {
arguments.previous();
- break;
+ break;
}
- }
+ }
// add to any existing values (support specifying args multiple times)
- List<String> oldVal = (List<String>)commandLine.getValue(this);
- if (oldVal == null){
+ List<String> oldVal = (List<String>) commandLine.getValue(this);
+ if (oldVal == null) {
commandLine.addValue(this, properties);
- } else{
- oldVal.addAll(properties);
+ } else {
+ oldVal.addAll(properties);
}
}
}
- private Option createBoolOption(DefaultOptionBuilder builder, String longName,
- String shortName, String desc){
+ private Option createBoolOption(DefaultOptionBuilder builder,
+ String longName, String shortName, String desc) {
builder.reset();
- if(longName == null) {
+ if (longName == null) {
return builder.withShortName(shortName).withDescription(desc).create();
} else {
- return builder.withShortName(shortName).withLongName(longName).withDescription(desc).create();
+ return builder.withShortName(shortName).withLongName(longName)
+ .withDescription(desc).create();
}
}
-
- private Option createOptionWithArg(DefaultOptionBuilder builder, String longName,
- String shortName, String desc, Argument arg) {
+ private Option createOptionWithArg(DefaultOptionBuilder builder,
+ String longName, String shortName, String desc, Argument arg) {
builder.reset();
- DefaultOptionBuilder dob =
- builder.withShortName(shortName).
- withArgument(arg).
- withDescription(desc);
+ DefaultOptionBuilder dob = builder.withShortName(shortName).withArgument(
+ arg).withDescription(desc);
- if(longName != null)
+ if (longName != null) {
dob = dob.withLongName(longName);
+ }
return dob.create();
}
-
public OptionsProcessor() {
- DefaultOptionBuilder builder =
- new DefaultOptionBuilder("-","-", false);
+ DefaultOptionBuilder builder = new DefaultOptionBuilder("-", "-", false);
ArgumentBuilder argBuilder = new ArgumentBuilder();
-
- //-e
- execOption = createOptionWithArg(builder, "exec", "e", "execute the following command",
- argBuilder.withMinimum(1).withMaximum(1).create());
-
- //-f
- fileOption = createOptionWithArg(builder, "file", "f", "execute commands from the following file",
- argBuilder.withMinimum(1).withMaximum(1).create());
+ // -e
+ execOption = createOptionWithArg(builder, "exec", "e",
+ "execute the following command", argBuilder.withMinimum(1).withMaximum(
+ 1).create());
+
+ // -f
+ fileOption = createOptionWithArg(builder, "file", "f",
+ "execute commands from the following file", argBuilder.withMinimum(1)
+ .withMaximum(1).create());
// -S
isSilentOption = createBoolOption(builder, "silent", "S", "silent mode");
// -help
isHelpOption = createBoolOption(builder, "help", "h", "help");
-
+
// -hiveconf var=val
- confOptions = new MultiPropertyOption("-hiveconf", "(n=v) Optional. Add or override Hive/Hadoop properties.", 'D');
+ confOptions = new MultiPropertyOption("-hiveconf",
+ "(n=v) Optional. Add or override Hive/Hadoop properties.", 'D');
new PropertyOption();
- Group allOptions = new GroupBuilder().
- withOption(confOptions).
- withOption(isSilentOption).
- withOption(isHelpOption).
- withOption(execOption).
- withOption(fileOption).
- create();
+ Group allOptions = new GroupBuilder().withOption(confOptions).withOption(
+ isSilentOption).withOption(isHelpOption).withOption(execOption)
+ .withOption(fileOption).create();
parser.setGroup(allOptions);
}
private CommandLine cmdLine;
-
- public boolean process_stage1(String [] argv) {
+
+ public boolean process_stage1(String[] argv) {
try {
cmdLine = parser.parse(argv);
- List<String> hiveConfArgs = (List<String>)cmdLine.getValue(confOptions);
- if (null != hiveConfArgs){
- for(String s : hiveConfArgs){
- String []parts = s.split("=", 2);
+ List<String> hiveConfArgs = (List<String>) cmdLine.getValue(confOptions);
+ if (null != hiveConfArgs) {
+ for (String s : hiveConfArgs) {
+ String[] parts = s.split("=", 2);
System.setProperty(parts[0], parts[1]);
}
}
@@ -182,29 +184,28 @@
return true;
}
-
public boolean process_stage2(CliSessionState ss) {
- HiveConf hconf = ss.getConf();
- //-S
+ ss.getConf();
+ // -S
ss.setIsSilent(cmdLine.hasOption(isSilentOption));
- //-e
+ // -e
ss.execString = (String) cmdLine.getValue(execOption);
- //-f
+ // -f
ss.fileName = (String) cmdLine.getValue(fileOption);
// -h
if (cmdLine.hasOption(isHelpOption)) {
printUsage(null);
return false;
}
- if(ss.execString != null && ss.fileName != null) {
+ if (ss.execString != null && ss.fileName != null) {
printUsage("-e and -f option cannot be specified simultaneously");
return false;
}
- List<String> hiveConfArgs = (List<String>)cmdLine.getValue(confOptions);
- if (null != hiveConfArgs){
- for(String s : hiveConfArgs){
- String []parts = s.split("=", 2);
+ List<String> hiveConfArgs = (List<String>) cmdLine.getValue(confOptions);
+ if (null != hiveConfArgs) {
+ for (String s : hiveConfArgs) {
+ String[] parts = s.split("=", 2);
ss.cmdProperties.setProperty(parts[0], parts[1]);
}
}
@@ -212,20 +213,23 @@
return true;
}
- public void printUsage (String error) {
+ public void printUsage(String error) {
if (error != null) {
System.err.println("Invalid arguments: " + error);
}
System.err.println("");
- System.err.println("Usage: hive [--config confdir] [-hiveconf x=y]* [<-f filename>|<-e query-string>] [-S]");
+ System.err
+ .println("Usage: hive [--config confdir] [-hiveconf x=y]* [<-f filename>|<-e query-string>] [-S]");
System.err.println("");
- System.err.println(" -e 'quoted query string' Sql from command line");
+ System.err.println(" -e 'quoted query string' Sql from command line");
System.err.println(" -f <filename> Sql from files");
- System.err.println(" -S Silent mode in interactive shell");
+ System.err
+ .println(" -S Silent mode in interactive shell");
System.err.println("");
- System.err.println("-e and -f cannot be specified together. In the absence of these");
+ System.err
+ .println("-e and -f cannot be specified together. In the absence of these");
System.err.println("options, interactive shell is started");
System.err.println("");
-
+
}
}
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Thu Jan 21 07:29:29 2010
@@ -18,33 +18,34 @@
package org.apache.hadoop.hive.common;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.net.URI;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
/**
* Collection of file manipulation utilities common across Hive
*/
public class FileUtils {
/**
- * Variant of Path.makeQualified that qualifies the input
- * path against the default file system indicated by the
- * configuration
- *
- * This does not require a FileSystem handle in most cases
- * - only requires the Filesystem URI. This saves the cost
- * of opening the Filesystem - which can involve RPCs - as
- * well as cause errors
- *
- * @param path path to be fully qualified
- * @param conf Configuration file
+ * Variant of Path.makeQualified that qualifies the input path against the
+ * default file system indicated by the configuration
+ *
+ * This does not require a FileSystem handle in most cases - only requires the
+ * Filesystem URI. This saves the cost of opening the Filesystem - which can
+ * involve RPCs - as well as cause errors
+ *
+ * @param path
+ * path to be fully qualified
+ * @param conf
+ * Configuration file
* @return path qualified relative to default file system
*/
- public static Path makeQualified(Path path, Configuration conf)
- throws IOException {
+ public static Path makeQualified(Path path, Configuration conf)
+ throws IOException {
if (!path.isAbsolute()) {
// in this case we need to get the working directory
@@ -59,14 +60,14 @@
String scheme = pathUri.getScheme();
String authority = pathUri.getAuthority();
- if (scheme != null &&
- (authority != null || fsUri.getAuthority() == null))
+ if (scheme != null && (authority != null || fsUri.getAuthority() == null)) {
return path;
-
+ }
+
if (scheme == null) {
scheme = fsUri.getScheme();
}
-
+
if (authority == null) {
authority = fsUri.getAuthority();
if (authority == null) {
@@ -74,6 +75,6 @@
}
}
- return new Path(scheme+":"+"//"+authority + pathUri.getPath());
+ return new Path(scheme + ":" + "//" + authority + pathUri.getPath());
}
}
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java Thu Jan 21 07:29:29 2010
@@ -19,7 +19,8 @@
package org.apache.hadoop.hive.common;
/**
- * Collection of Java class loading/reflection related utilities common across Hive
+ * Collection of Java class loading/reflection related utilities common across
+ * Hive
*/
public class JavaUtils {
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayInputStream.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayInputStream.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayInputStream.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayInputStream.java Thu Jan 21 07:29:29 2010
@@ -37,10 +37,10 @@
}
public void reset(byte[] input, int start, int length) {
- this.buf = input;
- this.count = start + length;
- this.mark = start;
- this.pos = start;
+ buf = input;
+ count = start + length;
+ mark = start;
+ pos = start;
}
public int getPosition() {
@@ -54,6 +54,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public int read() {
return (pos < count) ? (buf[pos++] & 0xff) : -1;
}
@@ -61,6 +62,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public int read(byte b[], int off, int len) {
if (b == null) {
throw new NullPointerException();
@@ -84,6 +86,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public long skip(long n) {
if (pos + n > count) {
n = count - pos;
@@ -98,6 +101,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public int available() {
return count - pos;
}
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java Thu Jan 21 07:29:29 2010
@@ -46,6 +46,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public void reset() {
count = 0;
}
@@ -59,6 +60,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public void write(int b) {
enLargeBuffer(1);
buf[count] = (byte) b;
@@ -69,8 +71,9 @@
int temp = count + increment;
int newLen = temp;
if (temp > buf.length) {
- if ((buf.length << 1) > temp)
+ if ((buf.length << 1) > temp) {
newLen = buf.length << 1;
+ }
byte newbuf[] = new byte[newLen];
System.arraycopy(buf, 0, newbuf, 0, count);
buf = newbuf;
@@ -81,6 +84,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public void write(byte b[], int off, int len) {
if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length)
|| ((off + len) < 0)) {
@@ -96,6 +100,7 @@
/**
* {@inheritDoc}
*/
+ @Override
public void writeTo(OutputStream out) throws IOException {
out.write(buf, 0, count);
}
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextInputFormat.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextInputFormat.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextInputFormat.java Thu Jan 21 07:29:29 2010
@@ -22,6 +22,7 @@
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
+import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
@@ -35,28 +36,25 @@
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.commons.codec.binary.Base64;
/**
* FileInputFormat for base64 encoded text files.
*
- * Each line is a base64-encoded record.
- * The key is a LongWritable which is the offset.
- * The value is a BytesWritable containing the base64-decoded bytes.
+ * Each line is a base64-encoded record. The key is a LongWritable which is the
+ * offset. The value is a BytesWritable containing the base64-decoded bytes.
*
* This class accepts a configurable parameter:
* "base64.text.input.format.signature"
*
- * The UTF-8 encoded signature will be compared with the beginning
- * of each decoded bytes. If they don't match, the record is discarded.
- * If they match, the signature is stripped off the data.
+ * The UTF-8 encoded signature will be compared with the beginning of each
+ * decoded bytes. If they don't match, the record is discarded. If they match,
+ * the signature is stripped off the data.
*/
-public class Base64TextInputFormat
- implements InputFormat<LongWritable, BytesWritable>, JobConfigurable {
-
-
- public static class Base64LineRecordReader
- implements RecordReader<LongWritable, BytesWritable>, JobConfigurable {
+public class Base64TextInputFormat implements
+ InputFormat<LongWritable, BytesWritable>, JobConfigurable {
+
+ public static class Base64LineRecordReader implements
+ RecordReader<LongWritable, BytesWritable>, JobConfigurable {
LineRecordReader reader;
Text text;
@@ -65,7 +63,7 @@
this.reader = reader;
text = reader.createValue();
}
-
+
@Override
public void close() throws IOException {
reader.close();
@@ -98,7 +96,7 @@
// text -> byte[] -> value
byte[] textBytes = text.getBytes();
int length = text.getLength();
-
+
// Trim additional bytes
if (length != textBytes.length) {
textBytes = Arrays.copyOf(textBytes, length);
@@ -107,22 +105,24 @@
// compare data header with signature
int i;
- for (i = 0; i < binaryData.length && i < signature.length &&
- binaryData[i] == signature[i]; ++i);
+ for (i = 0; i < binaryData.length && i < signature.length
+ && binaryData[i] == signature[i]; ++i) {
+ ;
+ }
// return the row only if it's not corrupted
if (i == signature.length) {
- value.set(binaryData, signature.length,
- binaryData.length - signature.length);
+ value.set(binaryData, signature.length, binaryData.length
+ - signature.length);
return true;
}
}
// no more data
return false;
}
-
+
private byte[] signature;
- private Base64 base64 = new Base64();
+ private final Base64 base64 = new Base64();
@Override
public void configure(JobConf job) {
@@ -135,26 +135,26 @@
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
- }
+ }
}
}
-
+
TextInputFormat format;
JobConf job;
-
+
public Base64TextInputFormat() {
format = new TextInputFormat();
}
-
+
@Override
public void configure(JobConf job) {
this.job = job;
format.configure(job);
}
-
+
public RecordReader<LongWritable, BytesWritable> getRecordReader(
- InputSplit genericSplit, JobConf job,
- Reporter reporter) throws IOException {
+ InputSplit genericSplit, JobConf job, Reporter reporter)
+ throws IOException {
reporter.setStatus(genericSplit.toString());
Base64LineRecordReader reader = new Base64LineRecordReader(
new LineRecordReader(job, (FileSplit) genericSplit));
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextOutputFormat.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextOutputFormat.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/fileformat/base64/Base64TextOutputFormat.java Thu Jan 21 07:29:29 2010
@@ -37,69 +37,67 @@
/**
* FileOutputFormat for base64 encoded text files.
*
- * Each line is a base64-encoded record.
- * The key is a LongWritable which is the offset.
- * The value is a BytesWritable containing the base64-decoded bytes.
+ * Each line is a base64-encoded record. The key is a LongWritable which is the
+ * offset. The value is a BytesWritable containing the base64-decoded bytes.
*
* This class accepts a configurable parameter:
* "base64.text.output.format.signature"
*
- * The UTF-8 encoded signature will be prepended to each BytesWritable
- * before we do base64 encoding.
+ * The UTF-8 encoded signature will be prepended to each BytesWritable before we
+ * do base64 encoding.
*/
public class Base64TextOutputFormat<K extends WritableComparable, V extends Writable>
- extends HiveIgnoreKeyTextOutputFormat<K, V> {
+ extends HiveIgnoreKeyTextOutputFormat<K, V> {
-
- public static class Base64RecordWriter implements RecordWriter,
- JobConfigurable{
+ public static class Base64RecordWriter implements RecordWriter,
+ JobConfigurable {
RecordWriter writer;
BytesWritable bytesWritable;
-
+
public Base64RecordWriter(RecordWriter writer) {
this.writer = writer;
bytesWritable = new BytesWritable();
}
-
+
@Override
public void write(Writable w) throws IOException {
-
+
// Get input data
byte[] input;
int inputLength;
if (w instanceof Text) {
- input = ((Text)w).getBytes();
- inputLength = ((Text)w).getLength();
+ input = ((Text) w).getBytes();
+ inputLength = ((Text) w).getLength();
} else {
- assert(w instanceof BytesWritable);
- input = ((BytesWritable)w).get();
- inputLength = ((BytesWritable)w).getSize();
+ assert (w instanceof BytesWritable);
+ input = ((BytesWritable) w).get();
+ inputLength = ((BytesWritable) w).getSize();
}
-
+
// Add signature
byte[] wrapped = new byte[signature.length + inputLength];
- for (int i=0; i<signature.length; i++) {
+ for (int i = 0; i < signature.length; i++) {
wrapped[i] = signature[i];
}
- for (int i=0; i<inputLength; i++) {
- wrapped[i + signature.length] = input[i];
+ for (int i = 0; i < inputLength; i++) {
+ wrapped[i + signature.length] = input[i];
}
-
+
// Encode
byte[] output = base64.encode(wrapped);
bytesWritable.set(output, 0, output.length);
-
+
writer.write(bytesWritable);
}
@Override
public void close(boolean abort) throws IOException {
- writer.close(abort);
+ writer.close(abort);
}
private byte[] signature;
- private Base64 base64 = new Base64();
+ private final Base64 base64 = new Base64();
@Override
public void configure(JobConf job) {
@@ -112,21 +110,20 @@
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
- }
+ }
}
}
-
+
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
Class<? extends Writable> valueClass, boolean isCompressed,
Properties tableProperties, Progressable progress) throws IOException {
-
- Base64RecordWriter writer = new Base64RecordWriter(
- super.getHiveRecordWriter(jc, finalOutPath, BytesWritable.class,
+
+ Base64RecordWriter writer = new Base64RecordWriter(super
+ .getHiveRecordWriter(jc, finalOutPath, BytesWritable.class,
isCompressed, tableProperties, progress));
writer.configure(jc);
return writer;
}
-
}
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/genericudf/example/GenericUDFDBOutput.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/genericudf/example/GenericUDFDBOutput.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/genericudf/example/GenericUDFDBOutput.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/genericudf/example/GenericUDFDBOutput.java Thu Jan 21 07:29:29 2010
@@ -21,16 +21,15 @@
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.description;
-import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.exec.description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -39,31 +38,28 @@
import org.apache.hadoop.io.IntWritable;
/**
-* GenericUDFDBOutput is designed to output data directly from Hive to a JDBC datastore.
-* This UDF is useful for exporting small to medium summaries that have a unique key.
-*
-* Due to the nature of hadoop, individual mappers, reducers or entire jobs can fail.
-* If a failure occurs a mapper or reducer may be retried. This UDF has no way of
-* detecting failures or rolling back a transaction. Consequently, you should only
-* only use this to export to a table with a unique key. The unique key should safeguard
-* against duplicate data.
-*
-* Use hive's ADD JAR feature to add your JDBC Driver to the distributed cache,
-* otherwise GenericUDFDBoutput will fail.
-*/
-@description(
- name = "dboutput",
- value = "_FUNC_(jdbcstring,username,password,preparedstatement,[arguments]) - sends data to a jdbc driver",
- extended =
- "argument 0 is the JDBC connection string\n"+
- "argument 1 is the user name\n"+
- "argument 2 is the password\n"+
- "argument 3 is an SQL query to be used in the PreparedStatement\n"+
- "argument (4-n) The remaining arguments must be primitive and are passed to the PreparedStatement object\n"
-)
-@UDFType(deterministic=false)
+ * GenericUDFDBOutput is designed to output data directly from Hive to a JDBC
+ * datastore. This UDF is useful for exporting small to medium summaries that
+ * have a unique key.
+ *
+ * Due to the nature of hadoop, individual mappers, reducers or entire jobs can
+ * fail. If a failure occurs a mapper or reducer may be retried. This UDF has no
+ * way of detecting failures or rolling back a transaction. Consequently, you
+ * should only only use this to export to a table with a unique key. The unique
+ * key should safeguard against duplicate data.
+ *
+ * Use hive's ADD JAR feature to add your JDBC Driver to the distributed cache,
+ * otherwise GenericUDFDBoutput will fail.
+ */
+@description(name = "dboutput", value = "_FUNC_(jdbcstring,username,password,preparedstatement,[arguments]) - sends data to a jdbc driver", extended = "argument 0 is the JDBC connection string\n"
+ + "argument 1 is the user name\n"
+ + "argument 2 is the password\n"
+ + "argument 3 is an SQL query to be used in the PreparedStatement\n"
+ + "argument (4-n) The remaining arguments must be primitive and are passed to the PreparedStatement object\n")
+@UDFType(deterministic = false)
public class GenericUDFDBOutput extends GenericUDF {
- private static Log LOG = LogFactory.getLog(GenericUDFDBOutput.class.getName());
+ private static Log LOG = LogFactory
+ .getLog(GenericUDFDBOutput.class.getName());
ObjectInspector[] argumentOI;
GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
@@ -71,63 +67,71 @@
private String url;
private String user;
private String pass;
- private IntWritable result = new IntWritable(-1);
+ private final IntWritable result = new IntWritable(-1);
+
/**
- * @param arguments
- * argument 0 is the JDBC connection string
- * argument 1 is the user name
- * argument 2 is the password
- * argument 3 is an SQL query to be used in the PreparedStatement
- * argument (4-n) The remaining arguments must be primitive and are passed to the PreparedStatement object
- */
+ * @param arguments
+ * argument 0 is the JDBC connection string argument 1 is the user
+ * name argument 2 is the password argument 3 is an SQL query to be
+ * used in the PreparedStatement argument (4-n) The remaining
+ * arguments must be primitive and are passed to the
+ * PreparedStatement object
+ */
+ @Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentTypeException {
- this.argumentOI = arguments;
+ argumentOI = arguments;
+
+ // this should be connection url,username,password,query,column1[,columnn]*
+ for (int i = 0; i < 4; i++) {
+ if (arguments[i].getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) arguments[i]);
- //this should be connection url,username,password,query,column1[,columnn]*
- for (int i=0;i<4;i++){
- if ( arguments[i].getCategory() == ObjectInspector.Category.PRIMITIVE) {
- PrimitiveObjectInspector poi = ((PrimitiveObjectInspector)arguments[i]);
-
- if (! (poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING)){
+ if (!(poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING)) {
throw new UDFArgumentTypeException(i,
- "The argument of function should be \"" + Constants.STRING_TYPE_NAME
- + "\", but \"" + arguments[i].getTypeName() + "\" is found");
+ "The argument of function should be \""
+ + Constants.STRING_TYPE_NAME + "\", but \""
+ + arguments[i].getTypeName() + "\" is found");
}
}
}
- for (int i=4;i<arguments.length;i++){
- if ( arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ for (int i = 4; i < arguments.length; i++) {
+ if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(i,
- "The argument of function should be primative" +
- ", but \"" + arguments[i].getTypeName() + "\" is found");
+ "The argument of function should be primative" + ", but \""
+ + arguments[i].getTypeName() + "\" is found");
}
}
-
+
return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
}
/**
- * @return 0 on success -1 on failure
- */
+ * @return 0 on success -1 on failure
+ */
+ @Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
-
- url = ((StringObjectInspector)argumentOI[0]).getPrimitiveJavaObject(arguments[0].get());
- user = ((StringObjectInspector)argumentOI[1]).getPrimitiveJavaObject(arguments[1].get()) ;
- pass = ((StringObjectInspector)argumentOI[2]).getPrimitiveJavaObject(arguments[2].get()) ;
- try {
+ url = ((StringObjectInspector) argumentOI[0])
+ .getPrimitiveJavaObject(arguments[0].get());
+ user = ((StringObjectInspector) argumentOI[1])
+ .getPrimitiveJavaObject(arguments[1].get());
+ pass = ((StringObjectInspector) argumentOI[2])
+ .getPrimitiveJavaObject(arguments[2].get());
+
+ try {
connection = DriverManager.getConnection(url, user, pass);
- } catch (SQLException ex) {
+ } catch (SQLException ex) {
LOG.error("Driver loading or connection issue", ex);
result.set(2);
}
-
- if (connection != null){
+
+ if (connection != null) {
try {
- PreparedStatement ps = connection.prepareStatement(
- ((StringObjectInspector)argumentOI[3]).getPrimitiveJavaObject(arguments[3].get()) );
+ PreparedStatement ps = connection
+ .prepareStatement(((StringObjectInspector) argumentOI[3])
+ .getPrimitiveJavaObject(arguments[3].get()));
for (int i = 4; i < arguments.length; ++i) {
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) argumentOI[i]);
ps.setObject(i - 3, poi.getPrimitiveJavaObject(arguments[i].get()));
@@ -145,18 +149,18 @@
LOG.error("Underlying SQL exception during close", ex);
}
}
- }
+ }
return result;
}
-
+ @Override
public String getDisplayString(String[] children) {
StringBuilder sb = new StringBuilder();
sb.append("dboutput(");
if (children.length > 0) {
sb.append(children[0]);
- for(int i=1; i<children.length; i++) {
+ for (int i = 1; i < children.length; i++) {
sb.append(",");
sb.append(children[i]);
}
@@ -166,4 +170,3 @@
}
}
-
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/GenericMR.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/GenericMR.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/GenericMR.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/GenericMR.java Thu Jan 21 07:29:29 2010
@@ -40,45 +40,50 @@
*
* As an example, here's the wordcount reduce:
*
- * new GenericMR().reduce(System.in, System.out, new Reducer() {
- * public void reduce(String key, Iterator<String[]> records, Output output) throws Exception {
- * int count = 0;
+ * new GenericMR().reduce(System.in, System.out, new Reducer() { public void
+ * reduce(String key, Iterator<String[]> records, Output output) throws
+ * Exception { int count = 0;
*
- * while (records.hasNext()) {
- * count += Integer.parseInt(records.next()[1]);
- * }
+ * while (records.hasNext()) { count += Integer.parseInt(records.next()[1]); }
*
- * output.collect(new String[] { key, String.valueOf(count) });
- * }});
+ * output.collect(new String[] { key, String.valueOf(count) }); }});
*/
public final class GenericMR {
- public void map(final InputStream in, final OutputStream out, final Mapper mapper) throws Exception {
+ public void map(final InputStream in, final OutputStream out,
+ final Mapper mapper) throws Exception {
map(new InputStreamReader(in), new OutputStreamWriter(out), mapper);
}
- public void map(final Reader in, final Writer out, final Mapper mapper) throws Exception {
+ public void map(final Reader in, final Writer out, final Mapper mapper)
+ throws Exception {
handle(in, out, new RecordProcessor() {
@Override
- public void processNext(RecordReader reader, Output output) throws Exception {
+ public void processNext(RecordReader reader, Output output)
+ throws Exception {
mapper.map(reader.next(), output);
}
});
}
- public void reduce(final InputStream in, final OutputStream out, final Reducer reducer) throws Exception {
+ public void reduce(final InputStream in, final OutputStream out,
+ final Reducer reducer) throws Exception {
reduce(new InputStreamReader(in), new OutputStreamWriter(out), reducer);
}
- public void reduce(final Reader in, final Writer out, final Reducer reducer) throws Exception {
+ public void reduce(final Reader in, final Writer out, final Reducer reducer)
+ throws Exception {
handle(in, out, new RecordProcessor() {
@Override
- public void processNext(RecordReader reader, Output output) throws Exception {
- reducer.reduce(reader.peek()[0], new KeyRecordIterator(reader.peek()[0], reader), output);
+ public void processNext(RecordReader reader, Output output)
+ throws Exception {
+ reducer.reduce(reader.peek()[0], new KeyRecordIterator(
+ reader.peek()[0], reader), output);
}
});
}
- private void handle(final Reader in, final Writer out, final RecordProcessor processor) throws Exception {
+ private void handle(final Reader in, final Writer out,
+ final RecordProcessor processor) throws Exception {
final RecordReader reader = new RecordReader(in);
final OutputStreamOutput output = new OutputStreamOutput(out);
@@ -96,7 +101,8 @@
}
private static interface RecordProcessor {
- void processNext(final RecordReader reader, final Output output) throws Exception;
+ void processNext(final RecordReader reader, final Output output)
+ throws Exception;
}
private static final class KeyRecordIterator implements Iterator<String[]> {
@@ -110,7 +116,7 @@
@Override
public boolean hasNext() {
- return (this.reader.hasNext() && this.key.equals(this.reader.peek()[0]));
+ return (reader.hasNext() && key.equals(reader.peek()[0]));
}
@Override
@@ -119,7 +125,7 @@
throw new NoSuchElementException();
}
- return this.reader.next();
+ return reader.next();
}
@Override
@@ -137,21 +143,21 @@
}
private RecordReader(final Reader in) {
- this.reader = new BufferedReader(in);
- this.next = readNext();
+ reader = new BufferedReader(in);
+ next = readNext();
}
private String[] next() {
final String[] ret = next;
- this.next = readNext();
+ next = readNext();
return ret;
}
private String[] readNext() {
try {
- final String line = this.reader.readLine();
+ final String line = reader.readLine();
return (line == null ? null : line.split("\t"));
} catch (final Exception e) {
throw new RuntimeException(e);
@@ -167,7 +173,7 @@
}
private void close() throws Exception {
- this.reader.close();
+ reader.close();
}
}
@@ -190,13 +196,13 @@
public void collect(String[] record) throws Exception {
out.println(_join(record, "\t"));
}
-
+
private static String _join(final String[] record, final String separator) {
if (record == null || record.length == 0) {
return "";
}
final StringBuilder sb = new StringBuilder();
- for (int i=0; i< record.length; i++) {
+ for (int i = 0; i < record.length; i++) {
if (i > 0) {
sb.append(separator);
}
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Mapper.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Mapper.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Mapper.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Mapper.java Thu Jan 21 07:29:29 2010
@@ -16,16 +16,20 @@
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.mr;
+
/**
* Mapper.
*/
public interface Mapper {
/**
- * Maps a single row into an intermediate rows.
+ * Maps a single row into an intermediate rows.
*
- * @param record input record
- * @param output collect mapped rows.
- * @throws Exception on error
+ * @param record
+ * input record
+ * @param output
+ * collect mapped rows.
+ * @throws Exception
+ * on error
*/
void map(String[] record, Output output) throws Exception;
}
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Output.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Output.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Output.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Output.java Thu Jan 21 07:29:29 2010
@@ -16,11 +16,12 @@
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.mr;
+
/**
* Collects output.
*
- * It's the responsibility of the caller to ensure the output
- * is in the correct format (contains the correct number of columns, etc.)
+ * It's the responsibility of the caller to ensure the output is in the correct
+ * format (contains the correct number of columns, etc.)
*/
public interface Output {
/**
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Reducer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Reducer.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Reducer.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/Reducer.java Thu Jan 21 07:29:29 2010
@@ -26,13 +26,17 @@
/**
* Reduce.
*
- * Note that it is assumed that the key is the first column. Additionally, the key
- * will be repeated as the first column in the records[] array.
+ * Note that it is assumed that the key is the first column. Additionally, the
+ * key will be repeated as the first column in the records[] array.
*
- * @param key key (first column) for this set of records.
- * @param records Iterator of records for this key. Note that the first column of record will also be the key.
+ * @param key
+ * key (first column) for this set of records.
+ * @param records
+ * Iterator of records for this key. Note that the first column of
+ * record will also be the key.
* @param output
* @throws Exception
*/
- void reduce(String key, Iterator<String[]> records, Output output) throws Exception;
+ void reduce(String key, Iterator<String[]> records, Output output)
+ throws Exception;
}
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/IdentityMapper.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/IdentityMapper.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/IdentityMapper.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/IdentityMapper.java Thu Jan 21 07:29:29 2010
@@ -22,13 +22,14 @@
import org.apache.hadoop.hive.contrib.mr.Output;
/**
- * Example Mapper (Identity).
+ * Example Mapper (Identity).
*/
public final class IdentityMapper {
public static void main(final String[] args) throws Exception {
new GenericMR().map(System.in, System.out, new Mapper() {
@Override
- public void map(final String[] record, final Output output) throws Exception {
+ public void map(final String[] record, final Output output)
+ throws Exception {
output.collect(record);
}
});
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/WordCountReduce.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/WordCountReduce.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/WordCountReduce.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/mr/example/WordCountReduce.java Thu Jan 21 07:29:29 2010
@@ -24,19 +24,20 @@
import org.apache.hadoop.hive.contrib.mr.Reducer;
/**
- * Example Reducer (WordCount).
+ * Example Reducer (WordCount).
*/
public final class WordCountReduce {
public static void main(final String[] args) throws Exception {
new GenericMR().reduce(System.in, System.out, new Reducer() {
- public void reduce(String key, Iterator<String[]> records, Output output) throws Exception {
+ public void reduce(String key, Iterator<String[]> records, Output output)
+ throws Exception {
int count = 0;
-
+
while (records.hasNext()) {
// note we use col[1] -- the key is provided again as col[0]
count += Integer.parseInt(records.next()[1]);
}
-
+
output.collect(new String[] { key, String.valueOf(count) });
}
});
Modified: hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java?rev=901581&r1=901580&r2=901581&view=diff
==============================================================================
--- hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java (original)
+++ hadoop/hive/trunk/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/RegexSerDe.java Thu Jan 21 07:29:29 2010
@@ -46,56 +46,55 @@
/**
* RegexSerDe uses regular expression (regex) to serialize/deserialize.
*
- * It can deserialize the data using regex and extracts groups as columns.
- * It can also serialize the row object using a format string.
+ * It can deserialize the data using regex and extracts groups as columns. It
+ * can also serialize the row object using a format string.
*
- * In deserialization stage, if a row does not match the regex, then all
- * columns in the row will be NULL. If a row matches the regex but has
- * less than expected groups, the missing groups will be NULL. If a row
- * matches the regex but has more than expected groups, the additional
- * groups are just ignored.
- *
- * In serialization stage, it uses java string formatter to format the
- * columns into a row. If the output type of the column in a query is
- * not a string, it will be automatically converted to String by Hive.
- *
- * For the format of the format String, please refer to
- * {@link http://java.sun.com/j2se/1.5.0/docs/api/java/util/Formatter.html#syntax}
- *
- * NOTE: Obviously, all columns have to be strings.
- * Users can use "CAST(a AS INT)" to convert columns to other types.
- *
- * NOTE: This implementation is using String, and javaStringObjectInspector.
- * A more efficient implementation should use UTF-8 encoded Text and
- * writableStringObjectInspector. We should switch to that when we have a
- * UTF-8 based Regex library.
+ * In deserialization stage, if a row does not match the regex, then all columns
+ * in the row will be NULL. If a row matches the regex but has less than
+ * expected groups, the missing groups will be NULL. If a row matches the regex
+ * but has more than expected groups, the additional groups are just ignored.
+ *
+ * In serialization stage, it uses java string formatter to format the columns
+ * into a row. If the output type of the column in a query is not a string, it
+ * will be automatically converted to String by Hive.
+ *
+ * For the format of the format String, please refer to {@link http
+ * ://java.sun.com/j2se/1.5.0/docs/api/java/util/Formatter.html#syntax}
+ *
+ * NOTE: Obviously, all columns have to be strings. Users can use
+ * "CAST(a AS INT)" to convert columns to other types.
+ *
+ * NOTE: This implementation is using String, and javaStringObjectInspector. A
+ * more efficient implementation should use UTF-8 encoded Text and
+ * writableStringObjectInspector. We should switch to that when we have a UTF-8
+ * based Regex library.
*/
public class RegexSerDe implements SerDe {
public static final Log LOG = LogFactory.getLog(RegexSerDe.class.getName());
-
+
int numColumns;
String inputRegex;
String outputFormatString;
-
+
Pattern inputPattern;
-
+
StructObjectInspector rowOI;
ArrayList<String> row;
-
+
@Override
public void initialize(Configuration conf, Properties tbl)
throws SerDeException {
-
+
// We can get the table definition from tbl.
-
+
// Read the configuration parameters
inputRegex = tbl.getProperty("input.regex");
outputFormatString = tbl.getProperty("output.format.string");
String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
- boolean inputRegexIgnoreCase =
- "true".equalsIgnoreCase(tbl.getProperty("input.regex.case.insensitive"));
+ boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl
+ .getProperty("input.regex.case.insensitive"));
// Parse the configuration parameters
if (inputRegex != null) {
@@ -108,28 +107,29 @@
List<TypeInfo> columnTypes = TypeInfoUtils
.getTypeInfosFromTypeString(columnTypeProperty);
assert columnNames.size() == columnTypes.size();
- numColumns = columnNames.size();
-
+ numColumns = columnNames.size();
+
// All columns have to be of type STRING.
for (int c = 0; c < numColumns; c++) {
if (!columnTypes.get(c).equals(TypeInfoFactory.stringTypeInfo)) {
- throw new SerDeException(getClass().getName()
- + " only accepts string columns, but column[" + c
- + "] named " + columnNames.get(c) + " has type "
- + columnTypes.get(c));
+ throw new SerDeException(getClass().getName()
+ + " only accepts string columns, but column[" + c + "] named "
+ + columnNames.get(c) + " has type " + columnTypes.get(c));
}
}
-
+
// Constructing the row ObjectInspector:
- // The row consists of some string columns, each column will be a java
+ // The row consists of some string columns, each column will be a java
// String object.
- List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
+ List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(
+ columnNames.size());
for (int c = 0; c < numColumns; c++) {
columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
- // StandardStruct uses ArrayList to store the row.
- rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
-
+ // StandardStruct uses ArrayList to store the row.
+ rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(
+ columnNames, columnOIs);
+
// Constructing the row object, etc, which will be reused for all rows.
row = new ArrayList<String>(numColumns);
for (int c = 0; c < numColumns; c++) {
@@ -149,7 +149,7 @@
return Text.class;
}
- // Number of rows not matching the regex
+ // Number of rows not matching the regex
long unmatchedRows = 0;
long nextUnmatchedRows = 1;
// Number of rows that match the regex but have missing groups.
@@ -157,44 +157,45 @@
long nextPartialMatchedRows = 1;
long getNextNumberToDisplay(long now) {
- return now*10;
+ return now * 10;
}
-
+
@Override
public Object deserialize(Writable blob) throws SerDeException {
if (inputPattern == null) {
- throw new SerDeException("This table does not have serde property \"input.regex\"!");
+ throw new SerDeException(
+ "This table does not have serde property \"input.regex\"!");
}
- Text rowText = (Text)blob;
-
+ Text rowText = (Text) blob;
+
Matcher m = inputPattern.matcher(rowText.toString());
-
+
// If do not match, ignore the line, return a row with all nulls.
if (!m.matches()) {
- unmatchedRows ++;
+ unmatchedRows++;
if (unmatchedRows >= nextUnmatchedRows) {
nextUnmatchedRows = getNextNumberToDisplay(nextUnmatchedRows);
// Report the row
- LOG.warn("" + unmatchedRows + " unmatched rows are found: "
- + rowText);
+ LOG.warn("" + unmatchedRows + " unmatched rows are found: " + rowText);
}
return null;
}
-
+
// Otherwise, return the row.
for (int c = 0; c < numColumns; c++) {
try {
row.set(c, m.group(c + 1));
} catch (RuntimeException e) {
- partialMatchedRows ++;
+ partialMatchedRows++;
if (partialMatchedRows >= nextPartialMatchedRows) {
nextPartialMatchedRows = getNextNumberToDisplay(nextPartialMatchedRows);
// Report the row
- LOG.warn("" + partialMatchedRows + " partially unmatched rows are found, "
- + " cannot find group " + c + ": " + rowText);
+ LOG.warn("" + partialMatchedRows
+ + " partially unmatched rows are found, " + " cannot find group "
+ + c + ": " + rowText);
}
- row.set(c, null);
+ row.set(c, null);
}
}
return row;
@@ -202,50 +203,58 @@
Object[] outputFields;
Text outputRowText;
-
+
@Override
public Writable serialize(Object obj, ObjectInspector objInspector)
throws SerDeException {
-
+
if (outputFormatString == null) {
- throw new SerDeException("Cannot write data into table because \"output.format.string\""
- + " is not specified in serde properties of the table.");
+ throw new SerDeException(
+ "Cannot write data into table because \"output.format.string\""
+ + " is not specified in serde properties of the table.");
}
-
+
// Get all the fields out.
- // NOTE: The correct way to get fields out of the row is to use objInspector.
- // The obj can be a Java ArrayList, or a Java class, or a byte[] or whatever.
- // The only way to access the data inside the obj is through ObjectInspector.
-
- StructObjectInspector outputRowOI = (StructObjectInspector)objInspector;
- List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();
+ // NOTE: The correct way to get fields out of the row is to use
+ // objInspector.
+ // The obj can be a Java ArrayList, or a Java class, or a byte[] or
+ // whatever.
+ // The only way to access the data inside the obj is through
+ // ObjectInspector.
+
+ StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
+ List<? extends StructField> outputFieldRefs = outputRowOI
+ .getAllStructFieldRefs();
if (outputFieldRefs.size() != numColumns) {
throw new SerDeException("Cannot serialize the object because there are "
- + outputFieldRefs.size() + " fields but the table has " + numColumns +
- " columns.");
+ + outputFieldRefs.size() + " fields but the table has " + numColumns
+ + " columns.");
}
-
+
// Get all data out.
for (int c = 0; c < numColumns; c++) {
- Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
- ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();
+ Object field = outputRowOI
+ .getStructFieldData(obj, outputFieldRefs.get(c));
+ ObjectInspector fieldOI = outputFieldRefs.get(c)
+ .getFieldObjectInspector();
// The data must be of type String
- StringObjectInspector fieldStringOI = (StringObjectInspector)fieldOI;
- // Convert the field to Java class String, because objects of String type can be
- // stored in String, Text, or some other classes.
- outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
+ StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
+ // Convert the field to Java class String, because objects of String type
+ // can be
+ // stored in String, Text, or some other classes.
+ outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
}
-
+
// Format the String
String outputRowString = null;
try {
outputRowString = String.format(outputFormatString, outputFields);
} catch (MissingFormatArgumentException e) {
- throw new SerDeException("The table contains " + numColumns
+ throw new SerDeException("The table contains " + numColumns
+ " columns, but the outputFormatString is asking for more.", e);
}
outputRowText.set(outputRowString);
return outputRowText;
}
-
+
}