You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/10/08 21:11:52 UTC
svn commit: r1530394 - in /hive/trunk: eclipse-templates/.classpath
ivy/libraries.properties ql/ivy.xml
ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java
ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java
Author: brock
Date: Tue Oct 8 19:11:52 2013
New Revision: 1530394
URL: http://svn.apache.org/r1530394
Log:
HIVE-5253: Create component to compile and jar dynamic code (Edward Capriolo via Brock Noland)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java
Modified:
hive/trunk/eclipse-templates/.classpath
hive/trunk/ivy/libraries.properties
hive/trunk/ql/ivy.xml
Modified: hive/trunk/eclipse-templates/.classpath
URL: http://svn.apache.org/viewvc/hive/trunk/eclipse-templates/.classpath?rev=1530394&r1=1530393&r2=1530394&view=diff
==============================================================================
--- hive/trunk/eclipse-templates/.classpath (original)
+++ hive/trunk/eclipse-templates/.classpath Tue Oct 8 19:11:52 2013
@@ -88,6 +88,7 @@
<classpathentry kind="lib" path="build/ivy/lib/default/mockito-all-@mockito-all.version@.jar"/>
<classpathentry kind="lib" path="build/ivy/lib/default/ST4-@ST4.version@.jar"/>
<classpathentry kind="lib" path="build/ivy/lib/default/snappy-@snappy.version@.jar"/>
+ <classpathentry kind="lib" path="build/ivy/lib/default/groovy-all-@groovy.version@.jar"/>
<classpathentry kind="lib" path="build/beeline/hive-beeline-@HIVE_VERSION@.jar"/>
<classpathentry kind="lib" path="build/ivy/lib/default/tempus-fugit-@tempus-fugit.version@.jar"/>
<classpathentry kind="src" path="build/contrib/test/src"/>
Modified: hive/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hive/trunk/ivy/libraries.properties?rev=1530394&r1=1530393&r2=1530394&view=diff
==============================================================================
--- hive/trunk/ivy/libraries.properties (original)
+++ hive/trunk/ivy/libraries.properties Tue Oct 8 19:11:52 2013
@@ -42,6 +42,7 @@ commons-logging-api.version=1.0.4
commons-pool.version=1.5.4
derby.version=10.4.2.0
guava.version=11.0.2
+groovy.version=2.1.6
hbase.version=0.94.6.1
httpclient.version=4.2.5
httpcore.version=4.2.4
Modified: hive/trunk/ql/ivy.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/ivy.xml?rev=1530394&r1=1530393&r2=1530394&view=diff
==============================================================================
--- hive/trunk/ql/ivy.xml (original)
+++ hive/trunk/ql/ivy.xml Tue Oct 8 19:11:52 2013
@@ -49,6 +49,9 @@
<dependency org="com.esotericsoftware.kryo" name="kryo"
rev="${kryo.version}" />
+ <dependency org="org.codehaus.groovy" name="groovy-all"
+ rev="${groovy.version}" />
+
<dependency org="org.json" name="json" rev="${json.version}"/>
<dependency org="commons-collections" name="commons-collections" rev="${commons-collections.version}"/>
<dependency org="commons-configuration" name="commons-configuration" rev="${commons-configuration.version}"
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java?rev=1530394&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java Tue Oct 8 19:11:52 2013
@@ -0,0 +1,278 @@
+package org.apache.hadoop.hive.ql.processors;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.StringTokenizer;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.compress.archivers.jar.JarArchiveEntry;
+import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.CommandNeedRetryException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
+import org.apache.tools.ant.Project;
+import org.apache.tools.ant.types.Path;
+import org.codehaus.groovy.ant.Groovyc;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.io.Files;
+
+/**
+ * Processor allows users to build code inside a hive session, then
+ * use this code as a UDF, Serde, or even a more complex entity like an
+ * input format or hook.
+ *
+ * Note: This class is stateful and not thread safe. Create a new instance for
+ * each invocation of CompileProcessor.
+ *
+ */
+public class CompileProcessor implements CommandProcessor {
+
+ public static final Log LOG = LogFactory.getLog(CompileProcessor.class.getName());
+ public static final LogHelper console = new LogHelper(LOG);
+ public static final String IO_TMP_DIR = "java.io.tmpdir";
+ public static final String GROOVY = "GROOVY";
+ public static final String AS = "AS";
+ public static final String NAMED = "NAMED";
+ private static final String SYNTAX = "syntax: COMPILE ` some code here ` AS groovy NAMED something.groovy";
+ private static final AtomicInteger runCount;
+
+ /**
+ * The language of the compiled code. Used to select the appropriate compiler.
+ */
+ private String lang;
+ /**
+ * The code to be compiled
+ */
+ private String code;
+ /**
+ * The name of the file the code will be written to
+ */
+ private String named;
+ /**
+ * The entire command sent to the processor
+ */
+ private String command;
+ /**
+ * Used as part of a file name to help avoid collisions.
+ */
+ private int myId;
+
+ static {
+ runCount = new AtomicInteger(0);
+ }
+
+ @Override
+ public void init() {
+ //no init needed
+ }
+
+ /**
+ * User supplies dynamic code in this format:
+ * COMPILE ` some code here ` AS groovy NAMED something.groovy;
+ * CompileProcessor will compile and package this code into a jar. The jar
+ * will be added to the session state via the session state's
+ * ADD RESOURCE command.
+ * @param command a String to be compiled
+ * @return CommandProcessorResponse with 0 for success and 1 for failure
+ */
+ @Override
+ public CommandProcessorResponse run(String command) throws CommandNeedRetryException {
+ SessionState ss = SessionState.get();
+ myId = runCount.getAndIncrement();
+ this.command = command;
+ try {
+ parse(ss);
+ } catch (CompileProcessorException e) {
+ return new CommandProcessorResponse(1, e.getMessage(), null);
+ }
+ CommandProcessorResponse result = null;
+ try {
+ result = compile(ss);
+ } catch (CompileProcessorException e) {
+ result = new CommandProcessorResponse(1, e.getMessage(), null);
+ }
+ return result;
+ }
+
+ /**
+ * Parses the supplied command
+ * @param ss
+ * @throws CompileProcessorException if the code can not be compiled or the jar can not be made
+ */
+ @VisibleForTesting
+ void parse(SessionState ss) throws CompileProcessorException {
+ if (ss != null){
+ command = new VariableSubstitution().substitute(ss.getConf(), command);
+ }
+ if (command == null || command.length() == 0) {
+ throw new CompileProcessorException("Command was empty");
+ }
+ StringBuilder toCompile = new StringBuilder();
+ int startPosition = 0;
+ int endPosition = -1;
+ /* TODO Escape handling may be changed by a follow on.
+ * The largest issue is ; which are treated as statement
+ * terminators for the cli. Once the cli is fixed this
+ * code should be re-investigated
+ */
+ while (command.charAt(startPosition++) != '`' && startPosition < command.length()){
+
+ }
+ if (startPosition == command.length()){
+ throw new CompileProcessorException(SYNTAX);
+ }
+ for (int i = startPosition; i < command.length(); i++) {
+ if (command.charAt(i) == '\\') {
+ toCompile.append(command.charAt(i + 1));
+ i = i + 1;
+ continue;
+ } else if (command.charAt(i) == '`'){
+ endPosition = i;
+ break;
+ } else {
+ toCompile.append(command.charAt(i));
+ }
+ }
+ if (endPosition == -1){
+ throw new CompileProcessorException(SYNTAX);
+ }
+ StringTokenizer st = new StringTokenizer(command.substring(endPosition+1), " ");
+ if (st.countTokens() != 4){
+ throw new CompileProcessorException(SYNTAX);
+ }
+ String shouldBeAs = st.nextToken();
+ if (!shouldBeAs.equalsIgnoreCase(AS)){
+ throw new CompileProcessorException(SYNTAX);
+ }
+ setLang(st.nextToken());
+ if (!lang.equalsIgnoreCase(GROOVY)){
+ throw new CompileProcessorException("Can not compile " + lang + ". Hive can only compile " + GROOVY);
+ }
+ String shouldBeNamed = st.nextToken();
+ if (!shouldBeNamed.equalsIgnoreCase(NAMED)){
+ throw new CompileProcessorException(SYNTAX);
+ }
+ setNamed(st.nextToken());
+ setCode(toCompile.toString());
+ }
+
+ @VisibleForTesting
+ /**
+ * Method converts statement into a file, compiles the file and then packages the file.
+ * @param ss
+ * @return Response code of 0 for success 1 for failure
+ * @throws CompileProcessorException
+ */
+ CommandProcessorResponse compile(SessionState ss) throws CompileProcessorException {
+ Project proj = new Project();
+ String ioTempDir = System.getProperty(IO_TMP_DIR);
+ File ioTempFile = new File(ioTempDir);
+ if (!ioTempFile.exists()){
+ throw new CompileProcessorException(ioTempDir + " does not exists");
+ }
+ if (!ioTempFile.isDirectory() || !ioTempFile.canWrite()){
+ throw new CompileProcessorException(ioTempDir + " is not a writable directory");
+ }
+ Groovyc g = new Groovyc();
+ long runStamp = System.currentTimeMillis();
+ String jarId = myId + "_" + runStamp;
+ g.setProject(proj);
+ Path sourcePath = new Path(proj);
+ File destination = new File(ioTempFile, jarId + "out");
+ g.setDestdir(destination);
+ File input = new File(ioTempFile, jarId + "in");
+ sourcePath.setLocation(input);
+ g.setSrcdir(sourcePath);
+ input.mkdir();
+
+ File fileToWrite = new File(input, this.named);
+ try {
+ Files.write(this.code, fileToWrite, Charset.forName("UTF-8"));
+ } catch (IOException e1) {
+ throw new CompileProcessorException("writing file", e1);
+ }
+ destination.mkdir();
+ g.execute();
+
+ File testArchive = new File(ioTempFile, jarId + ".jar");
+ JarArchiveOutputStream out = null;
+ try {
+ out = new JarArchiveOutputStream(new FileOutputStream(testArchive));
+ for (File f: destination.listFiles()){
+ JarArchiveEntry jentry = new JarArchiveEntry(f.getName());
+ FileInputStream fis = new FileInputStream(f);
+ out.putArchiveEntry(jentry);
+ IOUtils.copy(fis, out);
+ fis.close();
+ out.closeArchiveEntry();
+ }
+ out.finish();
+ } catch (IOException e) {
+ throw new CompileProcessorException("Exception while writing jar", e);
+ } finally {
+ if (out!=null){
+ try {
+ out.close();
+ } catch (IOException WhatCanYouDo) {
+ }
+ }
+ }
+
+ if (ss != null){
+ ss.add_resource(ResourceType.JAR, testArchive.getAbsolutePath());
+ }
+ CommandProcessorResponse good = new CommandProcessorResponse(0, testArchive.getAbsolutePath(), null);
+ return good;
+ }
+
+ public String getLang() {
+ return lang;
+ }
+
+ public void setLang(String lang) {
+ this.lang = lang;
+ }
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public String getNamed() {
+ return named;
+ }
+
+ public void setNamed(String named) {
+ this.named = named;
+ }
+
+ public String getCommand() {
+ return command;
+ }
+
+ class CompileProcessorException extends HiveException {
+
+ private static final long serialVersionUID = 1L;
+
+ CompileProcessorException(String s, Throwable t) {
+ super(s, t);
+ }
+
+ CompileProcessorException(String s) {
+ super(s);
+ }
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java?rev=1530394&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java Tue Oct 8 19:11:52 2013
@@ -0,0 +1,29 @@
+package org.apache.hadoop.hive.ql.processors;
+
+import java.io.File;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestCompileProcessor {
+
+ @Test
+ public void testSyntax() throws Exception {
+ CompileProcessor cp = new CompileProcessor();
+ Assert.assertEquals(0, cp.run("` public class x { \n }` AS GROOVY NAMED x.groovy").getResponseCode());
+ Assert.assertEquals("GROOVY", cp.getLang());
+ Assert.assertEquals(" public class x { \n }", cp.getCode());
+ Assert.assertEquals("x.groovy", cp.getNamed());
+ Assert.assertEquals(1, cp.run("").getResponseCode());
+ Assert.assertEquals(1, cp.run("bla bla ").getResponseCode());
+ CompileProcessor cp2 = new CompileProcessor();
+ CommandProcessorResponse response = cp2.run(
+ "` import org.apache.hadoop.hive.ql.exec.UDF \n public class x { \n }` AS GROOVY NAMED x.groovy");
+ Assert.assertEquals(0, response.getResponseCode());
+ File f = new File(response.getErrorMessage());
+ Assert.assertTrue(f.exists());
+ f.delete();
+ }
+
+}