You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/10/08 21:11:52 UTC

svn commit: r1530394 - in /hive/trunk: eclipse-templates/.classpath ivy/libraries.properties ql/ivy.xml ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java

Author: brock
Date: Tue Oct  8 19:11:52 2013
New Revision: 1530394

URL: http://svn.apache.org/r1530394
Log:
HIVE-5253: Create component to compile and jar dynamic code (Edward Capriolo via Brock Noland)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java
Modified:
    hive/trunk/eclipse-templates/.classpath
    hive/trunk/ivy/libraries.properties
    hive/trunk/ql/ivy.xml

Modified: hive/trunk/eclipse-templates/.classpath
URL: http://svn.apache.org/viewvc/hive/trunk/eclipse-templates/.classpath?rev=1530394&r1=1530393&r2=1530394&view=diff
==============================================================================
--- hive/trunk/eclipse-templates/.classpath (original)
+++ hive/trunk/eclipse-templates/.classpath Tue Oct  8 19:11:52 2013
@@ -88,6 +88,7 @@
   <classpathentry kind="lib" path="build/ivy/lib/default/mockito-all-@mockito-all.version@.jar"/>
   <classpathentry kind="lib" path="build/ivy/lib/default/ST4-@ST4.version@.jar"/>
   <classpathentry kind="lib" path="build/ivy/lib/default/snappy-@snappy.version@.jar"/>
+  <classpathentry kind="lib" path="build/ivy/lib/default/groovy-all-@groovy.version@.jar"/>
   <classpathentry kind="lib" path="build/beeline/hive-beeline-@HIVE_VERSION@.jar"/>
   <classpathentry kind="lib" path="build/ivy/lib/default/tempus-fugit-@tempus-fugit.version@.jar"/>
   <classpathentry kind="src" path="build/contrib/test/src"/>

Modified: hive/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hive/trunk/ivy/libraries.properties?rev=1530394&r1=1530393&r2=1530394&view=diff
==============================================================================
--- hive/trunk/ivy/libraries.properties (original)
+++ hive/trunk/ivy/libraries.properties Tue Oct  8 19:11:52 2013
@@ -42,6 +42,7 @@ commons-logging-api.version=1.0.4
 commons-pool.version=1.5.4
 derby.version=10.4.2.0
 guava.version=11.0.2
+groovy.version=2.1.6
 hbase.version=0.94.6.1
 httpclient.version=4.2.5
 httpcore.version=4.2.4

Modified: hive/trunk/ql/ivy.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/ivy.xml?rev=1530394&r1=1530393&r2=1530394&view=diff
==============================================================================
--- hive/trunk/ql/ivy.xml (original)
+++ hive/trunk/ql/ivy.xml Tue Oct  8 19:11:52 2013
@@ -49,6 +49,9 @@
     <dependency org="com.esotericsoftware.kryo" name="kryo" 
                 rev="${kryo.version}" />
 
+    <dependency org="org.codehaus.groovy" name="groovy-all"
+                rev="${groovy.version}" />
+
     <dependency org="org.json" name="json" rev="${json.version}"/>
     <dependency org="commons-collections" name="commons-collections" rev="${commons-collections.version}"/>
     <dependency org="commons-configuration" name="commons-configuration" rev="${commons-configuration.version}"

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java?rev=1530394&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/processors/CompileProcessor.java Tue Oct  8 19:11:52 2013
@@ -0,0 +1,278 @@
+package org.apache.hadoop.hive.ql.processors;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.StringTokenizer;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.compress.archivers.jar.JarArchiveEntry;
+import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.CommandNeedRetryException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
+import org.apache.tools.ant.Project;
+import org.apache.tools.ant.types.Path;
+import org.codehaus.groovy.ant.Groovyc;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.io.Files;
+
+/**
+ * Processor allows users to build code inside a hive session, then
+ * use this code as a UDF, Serde, or even a more complex entity like an
+ * input format or hook.
+ *
+ * Note: This class is stateful and not thread safe. Create a new instance for
+ * each invocation of CompileProcessor.
+ *
+ */
+public class CompileProcessor implements CommandProcessor {
+
+  public static final Log LOG = LogFactory.getLog(CompileProcessor.class.getName());
+  public static final LogHelper console = new LogHelper(LOG);
+  public static final String IO_TMP_DIR = "java.io.tmpdir";
+  public static final String GROOVY = "GROOVY";
+  public static final String AS = "AS";
+  public static final String NAMED = "NAMED";
+  private static final String SYNTAX = "syntax: COMPILE ` some code here ` AS groovy NAMED something.groovy";
+  private static final AtomicInteger runCount;
+
+  /**
+   * The language of the compiled code. Used to select the appropriate compiler.
+   */
+  private String lang;
+  /**
+   * The code to be compiled
+   */
+  private String code;
+  /**
+   * The name of the file the code will be written to
+   */
+  private String named;
+  /**
+   * The entire command sent to the processor
+   */
+  private String command;
+  /**
+   * Used as part of a file name to help avoid collisions.
+   */
+  private int myId;
+
+  static {
+    runCount = new AtomicInteger(0);
+  }
+
+  @Override
+  public void init() {
+    //no init needed
+  }
+
+  /**
+   * User supplies dynamic code in this format:
+   * COMPILE ` some code here ` AS groovy NAMED something.groovy;
+   * CompileProcessor will compile and package this code into a jar. The jar
+   * will be added to the session state via the session state's
+   * ADD RESOURCE command.
+   * @param command a String to be compiled
+   * @return CommandProcessorResponse with 0 for success and 1 for failure
+   */
+  @Override
+  public CommandProcessorResponse run(String command) throws CommandNeedRetryException {
+    SessionState ss = SessionState.get();
+    myId = runCount.getAndIncrement();
+    this.command = command;
+    try {
+      parse(ss);
+    } catch (CompileProcessorException e) {
+      return new CommandProcessorResponse(1, e.getMessage(), null);
+    }
+    CommandProcessorResponse result = null;
+    try {
+      result = compile(ss);
+    } catch (CompileProcessorException e) {
+      result = new CommandProcessorResponse(1, e.getMessage(), null);
+    }
+    return result;
+  }
+
+  /**
+   * Parses the supplied command
+   * @param ss
+   * @throws CompileProcessorException if the code can not be compiled or the jar can not be made
+   */
+  @VisibleForTesting
+  void parse(SessionState ss) throws CompileProcessorException {
+    if (ss != null){
+      command = new VariableSubstitution().substitute(ss.getConf(), command);
+    }
+    if (command == null || command.length() == 0) {
+      throw new CompileProcessorException("Command was empty");
+    }
+    StringBuilder toCompile = new StringBuilder();
+    int startPosition = 0;
+    int endPosition = -1;
+    /* TODO Escape handling may be changed by a follow on.
+     * The largest issue is ; which are treated as statement
+     * terminators for the cli. Once the cli is fixed this
+     * code should be re-investigated
+     */
+    while (command.charAt(startPosition++) != '`' && startPosition < command.length()){
+
+    }
+    if (startPosition == command.length()){
+      throw new CompileProcessorException(SYNTAX);
+    }
+    for (int i = startPosition; i < command.length(); i++) {
+      if (command.charAt(i) == '\\') {
+        toCompile.append(command.charAt(i + 1));
+        i = i + 1;
+        continue;
+      } else if (command.charAt(i) == '`'){
+        endPosition = i;
+        break;
+      } else {
+        toCompile.append(command.charAt(i));
+      }
+    }
+    if (endPosition == -1){
+      throw new CompileProcessorException(SYNTAX);
+    }
+    StringTokenizer st = new StringTokenizer(command.substring(endPosition+1), " ");
+    if (st.countTokens() != 4){
+      throw new CompileProcessorException(SYNTAX);
+    }
+    String shouldBeAs = st.nextToken();
+    if (!shouldBeAs.equalsIgnoreCase(AS)){
+      throw new CompileProcessorException(SYNTAX);
+    }
+    setLang(st.nextToken());
+    if (!lang.equalsIgnoreCase(GROOVY)){
+      throw new CompileProcessorException("Can not compile " + lang + ". Hive can only compile " + GROOVY);
+    }
+    String shouldBeNamed = st.nextToken();
+    if (!shouldBeNamed.equalsIgnoreCase(NAMED)){
+      throw new CompileProcessorException(SYNTAX);
+    }
+    setNamed(st.nextToken());
+    setCode(toCompile.toString());
+  }
+
+  @VisibleForTesting
+  /**
+   * Method converts statement into a file, compiles the file and then packages the file.
+   * @param ss
+   * @return Response code of 0 for success 1 for failure
+   * @throws CompileProcessorException
+   */
+  CommandProcessorResponse compile(SessionState ss) throws CompileProcessorException {
+    Project proj = new Project();
+    String ioTempDir = System.getProperty(IO_TMP_DIR);
+    File ioTempFile = new File(ioTempDir);
+    if (!ioTempFile.exists()){
+      throw new CompileProcessorException(ioTempDir + " does not exists");
+    }
+    if (!ioTempFile.isDirectory() || !ioTempFile.canWrite()){
+      throw new CompileProcessorException(ioTempDir + " is not a writable directory");
+    }
+    Groovyc g = new Groovyc();
+    long runStamp = System.currentTimeMillis();
+    String jarId = myId + "_" + runStamp;
+    g.setProject(proj);
+    Path sourcePath = new Path(proj);
+    File destination = new File(ioTempFile, jarId + "out");
+    g.setDestdir(destination);
+    File input = new File(ioTempFile, jarId + "in");
+    sourcePath.setLocation(input);
+    g.setSrcdir(sourcePath);
+    input.mkdir();
+
+    File fileToWrite = new File(input, this.named);
+    try {
+      Files.write(this.code, fileToWrite, Charset.forName("UTF-8"));
+    } catch (IOException e1) {
+      throw new CompileProcessorException("writing file", e1);
+    }
+    destination.mkdir();
+    g.execute();
+
+    File testArchive = new File(ioTempFile, jarId + ".jar");
+    JarArchiveOutputStream out = null;
+    try {
+      out = new JarArchiveOutputStream(new FileOutputStream(testArchive));
+      for (File f: destination.listFiles()){
+        JarArchiveEntry jentry = new JarArchiveEntry(f.getName());
+        FileInputStream fis = new FileInputStream(f);
+        out.putArchiveEntry(jentry);
+        IOUtils.copy(fis, out);
+        fis.close();
+        out.closeArchiveEntry();
+      }
+      out.finish();
+    } catch (IOException e) {
+      throw new CompileProcessorException("Exception while writing jar", e);
+    } finally {
+      if (out!=null){
+        try {
+          out.close();
+        } catch (IOException WhatCanYouDo) {
+        }
+      }
+    }
+
+    if (ss != null){
+      ss.add_resource(ResourceType.JAR, testArchive.getAbsolutePath());
+    }
+    CommandProcessorResponse good = new CommandProcessorResponse(0, testArchive.getAbsolutePath(), null);
+    return good;
+  }
+
+  public String getLang() {
+    return lang;
+  }
+
+  public void setLang(String lang) {
+    this.lang = lang;
+  }
+
+  public String getCode() {
+    return code;
+  }
+
+  public void setCode(String code) {
+    this.code = code;
+  }
+
+  public String getNamed() {
+    return named;
+  }
+
+  public void setNamed(String named) {
+    this.named = named;
+  }
+
+  public String getCommand() {
+    return command;
+  }
+
+  class CompileProcessorException extends HiveException {
+
+    private static final long serialVersionUID = 1L;
+
+    CompileProcessorException(String s, Throwable t) {
+      super(s, t);
+    }
+
+    CompileProcessorException(String s) {
+      super(s);
+    }
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java?rev=1530394&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/processors/TestCompileProcessor.java Tue Oct  8 19:11:52 2013
@@ -0,0 +1,29 @@
+package org.apache.hadoop.hive.ql.processors;
+
+import java.io.File;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestCompileProcessor {
+
+  @Test
+  public void testSyntax() throws Exception {
+    CompileProcessor cp = new CompileProcessor();
+    Assert.assertEquals(0, cp.run("` public class x { \n }` AS GROOVY NAMED x.groovy").getResponseCode());
+    Assert.assertEquals("GROOVY", cp.getLang());
+    Assert.assertEquals(" public class x { \n }", cp.getCode());
+    Assert.assertEquals("x.groovy", cp.getNamed());
+    Assert.assertEquals(1, cp.run("").getResponseCode());
+    Assert.assertEquals(1, cp.run("bla bla ").getResponseCode());
+    CompileProcessor cp2 = new CompileProcessor();
+    CommandProcessorResponse response = cp2.run(
+        "` import org.apache.hadoop.hive.ql.exec.UDF \n public class x { \n }` AS GROOVY NAMED x.groovy");
+    Assert.assertEquals(0, response.getResponseCode());
+    File f = new File(response.getErrorMessage());
+    Assert.assertTrue(f.exists());
+    f.delete();
+  }
+
+}