You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by pr...@apache.org on 2015/08/17 14:46:49 UTC

incubator-lens git commit: LENS-734: Query names in persisted output result files

Repository: incubator-lens
Updated Branches:
  refs/heads/master 7b2cb4ec0 -> df5567b78


LENS-734: Query names in persisted output result files


Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/df5567b7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/df5567b7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/df5567b7

Branch: refs/heads/master
Commit: df5567b788d526d03d3ea8c2e77315b6ac49d5a8
Parents: 7b2cb4e
Author: Nitin Gupta <ni...@gmail.com>
Authored: Mon Aug 17 18:16:13 2015 +0530
Committer: Rajat Khandelwal <pr...@apache.org>
Committed: Mon Aug 17 18:16:13 2015 +0530

----------------------------------------------------------------------
 .../lens/lib/query/HadoopFileFormatter.java     |  8 +-
 .../lens/lib/query/LensFileOutputFormat.java    | 21 +++++
 .../apache/lens/lib/query/ZipFileFormatter.java | 11 ++-
 .../lib/query/TestAbstractFileFormatter.java    | 80 ++++++++++++++++----
 .../lib/query/TestFilePersistentFormatter.java  | 76 +++++++++++++++++++
 5 files changed, 176 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/df5567b7/lens-query-lib/src/main/java/org/apache/lens/lib/query/HadoopFileFormatter.java
----------------------------------------------------------------------
diff --git a/lens-query-lib/src/main/java/org/apache/lens/lib/query/HadoopFileFormatter.java b/lens-query-lib/src/main/java/org/apache/lens/lib/query/HadoopFileFormatter.java
index 6f55c79..87aff69 100644
--- a/lens-query-lib/src/main/java/org/apache/lens/lib/query/HadoopFileFormatter.java
+++ b/lens-query-lib/src/main/java/org/apache/lens/lib/query/HadoopFileFormatter.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.Reporter;
 
+import com.google.common.base.Strings;
+
 /**
  * A hadoop file formatter
  * <p></p>
@@ -55,8 +57,10 @@ public class HadoopFileFormatter extends AbstractFileFormatter {
     if (StringUtils.isBlank(pathStr)) {
       throw new IllegalArgumentException("No output path specified");
     }
-    outputPath = new Path(pathStr, ctx.getQueryHandle().toString());
-    Path tmpWorkPath = new Path(outputPath + ".tmp");
+    String outputPathStr = Strings.isNullOrEmpty(ctx.getQueryName()) ? ""
+      : LensFileOutputFormat.getValidOutputFileName(ctx.getQueryName()) + "-";
+    outputPath = new Path(pathStr, outputPathStr + ctx.getQueryHandle().toString());
+    Path tmpWorkPath = new Path(pathStr, ctx.getQueryHandle().toString() + ".tmp");
     try {
       rowWriter = LensFileOutputFormat.createRecordWriter(ctx.getConf(), tmpWorkPath, Reporter.NULL,
         ctx.getCompressOutput(), ctx.getOuptutFileExtn(), ctx.getResultEncoding());

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/df5567b7/lens-query-lib/src/main/java/org/apache/lens/lib/query/LensFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/lens-query-lib/src/main/java/org/apache/lens/lib/query/LensFileOutputFormat.java b/lens-query-lib/src/main/java/org/apache/lens/lib/query/LensFileOutputFormat.java
index 034b88f..b801959 100644
--- a/lens-query-lib/src/main/java/org/apache/lens/lib/query/LensFileOutputFormat.java
+++ b/lens-query-lib/src/main/java/org/apache/lens/lib/query/LensFileOutputFormat.java
@@ -22,6 +22,8 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.lens.server.api.LensConfConstants;
 
@@ -60,6 +62,11 @@ public class LensFileOutputFormat extends FileOutputFormat<NullWritable, Text> {
   public static final String NEWLINE = "\n";
 
   /**
+   * Regex pattern for valid file path name characters
+   */
+  public static final Pattern VALID_PATTERN = Pattern.compile("[^a-zA-Z0-9_\\-\\.]");
+
+  /**
    * The Class LensRowWriter.
    */
   public static class LensRowWriter implements RecordWriter<NullWritable, Text> {
@@ -242,4 +249,18 @@ public class LensFileOutputFormat extends FileOutputFormat<NullWritable, Text> {
     return conf.getBoolean(LensConfConstants.QUERY_OUTPUT_ENABLE_COMPRESSION,
       LensConfConstants.DEFAULT_OUTPUT_ENABLE_COMPRESSION);
   }
+
+  /**
+   * Generates a valid output file name from the given name
+   * @param name name to be converted
+   * @return Valid output file name
+   */
+  public static String getValidOutputFileName(String name) {
+    Matcher matcher = VALID_PATTERN.matcher(name);
+    String validName = matcher.replaceAll("_");
+    if (validName.length() > 50) {
+      validName = validName.substring(0, 50);
+    }
+    return validName;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/df5567b7/lens-query-lib/src/main/java/org/apache/lens/lib/query/ZipFileFormatter.java
----------------------------------------------------------------------
diff --git a/lens-query-lib/src/main/java/org/apache/lens/lib/query/ZipFileFormatter.java b/lens-query-lib/src/main/java/org/apache/lens/lib/query/ZipFileFormatter.java
index a3614fe..8e9859f 100644
--- a/lens-query-lib/src/main/java/org/apache/lens/lib/query/ZipFileFormatter.java
+++ b/lens-query-lib/src/main/java/org/apache/lens/lib/query/ZipFileFormatter.java
@@ -27,6 +27,8 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
+import com.google.common.base.Strings;
+
 /**
  * Zip file formatter.
  * <p></p>
@@ -99,7 +101,10 @@ public class ZipFileFormatter extends AbstractFileFormatter {
     if (StringUtils.isBlank(pathStr)) {
       throw new IllegalArgumentException("No output path specified");
     }
-    finalPath = new Path(pathStr, ctx.getQueryHandle().toString() + ".zip");
+
+    String finalPathStr = Strings.isNullOrEmpty(ctx.getQueryName()) ? ""
+      : LensFileOutputFormat.getValidOutputFileName(ctx.getQueryName()) + "-";
+    finalPath = new Path(pathStr, finalPathStr + ctx.getQueryHandle().toString() + ".zip");
     tmpPath = new Path(pathStr, ctx.getQueryHandle().toString() + ".tmp.zip");
 
     fs = finalPath.getFileSystem(ctx.getConf());
@@ -119,7 +124,9 @@ public class ZipFileFormatter extends AbstractFileFormatter {
   }
 
   private String getQueryResultFileName() {
-    return ctx.getQueryHandle().toString() + PART_SUFFIX + currentPart + resultFileExtn;
+    String pathStr = Strings.isNullOrEmpty(ctx.getQueryName()) ? ""
+      : LensFileOutputFormat.getValidOutputFileName(ctx.getQueryName()) + "-";
+    return pathStr + ctx.getQueryHandle().toString() + PART_SUFFIX + currentPart + resultFileExtn;
   }
 
   /*

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/df5567b7/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestAbstractFileFormatter.java
----------------------------------------------------------------------
diff --git a/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestAbstractFileFormatter.java b/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestAbstractFileFormatter.java
index 307e075..35d7a00 100644
--- a/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestAbstractFileFormatter.java
+++ b/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestAbstractFileFormatter.java
@@ -207,36 +207,47 @@ public abstract class TestAbstractFileFormatter {
   }
 
   /**
-   * Test formatter.
-   *
-   * @param conf            the conf
-   * @param charsetEncoding the charset encoding
-   * @param outputParentDir the output parent dir
-   * @param fileExtn        the file extn
-   * @param columnNames     the column names
-   * @throws IOException Signals that an I/O exception has occurred.
+   * Creates the query context
+   * @param conf      the conf
+   * @param queryName the name of query
+   * @return the query context
    */
-  protected void testFormatter(Configuration conf, String charsetEncoding, String outputParentDir, String fileExtn,
-    LensResultSetMetadata columnNames) throws IOException {
-
+  protected QueryContext createContext(Configuration conf, String queryName) {
     final LensDriver mockDriver = new MockDriver();
     try {
       mockDriver.configure(conf);
     } catch (LensException e) {
       Assert.fail(e.getMessage());
     }
-    QueryContext ctx = QueryContext.createContextWithSingleDriver("test writer query", "testuser", new LensConf(),
-        conf, mockDriver, null, false);
+    QueryContext ctx = QueryContext.createContextWithSingleDriver("test writer query", "testuser",
+      new LensConf(), conf, mockDriver, null, false);
 
     ctx.setSelectedDriver(mockDriver);
-    formatter = createFormatter();
+    ctx.setQueryName(queryName);
+    return ctx;
+  }
 
+  /**
+   * Validates the formatter
+   * @param conf              the conf
+   * @param charsetEncoding   the charset encoding
+   * @param outputParentDir   the output parent dir
+   * @param fileExtn          the file extn
+   * @param columnNames       the column names
+   * @param ctx               the query context
+   * @param expectedFinalPath the final path of output
+   * @throws IOException Signals that an I/O exception has occurred.
+   */
+  public void validateFormatter(Configuration conf, String charsetEncoding, String outputParentDir, String fileExtn,
+    LensResultSetMetadata columnNames, QueryContext ctx, Path expectedFinalPath) throws IOException {
+    formatter = createFormatter();
     formatter.init(ctx, columnNames);
 
     // check output spec
     Assert.assertEquals(formatter.getEncoding(), charsetEncoding);
     Path tmpPath = formatter.getTmpPath();
-    Path expectedTmpPath = new Path(outputParentDir, ctx.getQueryHandle() + ".tmp" + fileExtn);
+    Path expectedTmpPath = new Path(outputParentDir, ctx.getQueryHandle()
+      + ".tmp" + fileExtn);
     Assert.assertEquals(tmpPath, expectedTmpPath);
 
     // write header, rows and footer;
@@ -251,10 +262,47 @@ public abstract class TestAbstractFileFormatter {
     formatter.close();
     Assert.assertFalse(fs.exists(tmpPath));
     Path finalPath = new Path(formatter.getFinalOutputPath());
-    Path expectedFinalPath = new Path(outputParentDir, ctx.getQueryHandle() + fileExtn).makeQualified(fs);
     Assert.assertEquals(finalPath, expectedFinalPath);
     Assert.assertTrue(fs.exists(finalPath));
   }
+  /**
+   * Test formatter.
+   *
+   * @param conf            the conf
+   * @param charsetEncoding the charset encoding
+   * @param outputParentDir the output parent dir
+   * @param fileExtn        the file extn
+   * @param columnNames     the column names
+   * @throws IOException Signals that an I/O exception has occurred.
+   */
+  protected void testFormatter(Configuration conf, String charsetEncoding, String outputParentDir, String fileExtn,
+    LensResultSetMetadata columnNames) throws IOException {
+
+    QueryContext ctx = createContext(conf, null);
+
+    Path expectedFinalPath = new Path(outputParentDir, ctx.getQueryHandle() + fileExtn);
+    FileSystem fs = expectedFinalPath.getFileSystem(conf);
+    expectedFinalPath = expectedFinalPath.makeQualified(fs);
+    validateFormatter(conf, charsetEncoding, outputParentDir, fileExtn, columnNames, ctx, expectedFinalPath);
+  }
+
+  /**
+   * Test Formatter with a different final path
+   * @param conf              the conf
+   * @param charsetEncoding   the charset encoding
+   * @param outputParentDir   the output parent dir
+   * @param fileExtn          the file extn
+   * @param columnNames       the column names
+   * @param queryName         the name of the query
+   * @param expectedFinalPath Final path of the output
+   * @throws IOException Signals that an I/O exception has occurred.
+   */
+  protected void testFormatterWithFinalPath(Configuration conf, String charsetEncoding, String outputParentDir,
+    String fileExtn, LensResultSetMetadata columnNames, String queryName, Path expectedFinalPath) throws IOException {
+    QueryContext ctx = createContext(conf, queryName);
+
+    validateFormatter(conf, charsetEncoding, outputParentDir, fileExtn, columnNames, ctx, expectedFinalPath);
+  }
 
   /**
    * Read final output file.

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/df5567b7/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestFilePersistentFormatter.java
----------------------------------------------------------------------
diff --git a/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestFilePersistentFormatter.java b/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestFilePersistentFormatter.java
index 7617e06..d02e0a9 100644
--- a/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestFilePersistentFormatter.java
+++ b/lens-query-lib/src/test/java/org/apache/lens/lib/query/TestFilePersistentFormatter.java
@@ -23,9 +23,12 @@ import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
 
 import org.apache.lens.server.api.LensConfConstants;
 import org.apache.lens.server.api.query.PersistedOutputFormatter;
+import org.apache.lens.server.api.query.QueryContext;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -253,6 +256,79 @@ public class TestFilePersistentFormatter extends TestAbstractFileFormatter {
     Assert.assertEquals(actual, getExpectedCSVRowsWithMultiple());
   }
 
+  /**
+   * Test text files output path.
+   *
+   * @throws IOException Signals that an I/O exception has occurred.
+   */
+  @Test
+  public void testTextFileOutputPath() throws IOException {
+    Configuration conf = new Configuration();
+    setConf(conf);
+    conf.set("test.partfile.dir", partFileTextDir.toString());
+    conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
+    conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
+      "firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
+    QueryContext ctx = createContext(conf, "test.Query_1 name");
+    Path expectedFinalPath = new Path(LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT,
+      "test.Query_1_name-" + ctx.getQueryHandle() + ".txt");
+    FileSystem fs = expectedFinalPath.getFileSystem(conf);
+    expectedFinalPath = expectedFinalPath.makeQualified(fs);
+    validateFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
+      getMockedResultSetWithoutComma(), ctx, expectedFinalPath);
+  }
+
+  /**
+   * Test text files with a long output path.
+   *
+   * @throws IOException Signals that an I/O exception has occurred.
+   */
+  @Test
+  public void testTextFileLongOutputPath() throws IOException {
+    Configuration conf = new Configuration();
+    setConf(conf);
+    conf.set("test.partfile.dir", partFileTextDir.toString());
+    conf.set(LensConfConstants.QUERY_OUTPUT_FILE_EXTN, ".txt");
+    conf.set(LensConfConstants.QUERY_OUTPUT_HEADER,
+      "firstcolsecondcolthirdcolfourthcolfifthcolsixthcolseventhcol");
+    QueryContext ctx = createContext(conf, "test-Query 1^name12345678901234567890123456789012345678901234567890");
+    Path expectedFinalPath = new Path(LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT,
+      "test-Query_1_name123456789012345678901234567890123-" + ctx.getQueryHandle() + ".txt");
+    FileSystem fs = expectedFinalPath.getFileSystem(conf);
+    expectedFinalPath = expectedFinalPath.makeQualified(fs);
+    validateFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".txt",
+      getMockedResultSetWithoutComma(), ctx, expectedFinalPath);
+  }
+
+  /**
+   * Test zip csv files output path.
+   *
+   * @throws IOException Signals that an I/O exception has occurred.
+   */
+  @Test
+  public void testCSVZipFileOutputPath() throws IOException {
+    Configuration conf = new Configuration();
+    setConf(conf);
+    conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
+    conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
+    QueryContext ctx = createContext(conf, "Test.query_1 name");
+    Path expectedFinalPath = new Path(LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT,
+      "Test.query_1_name-" + ctx.getQueryHandle() + ".zip");
+    FileSystem fs = expectedFinalPath.getFileSystem(conf);
+    expectedFinalPath = expectedFinalPath.makeQualified(fs);
+    validateFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip",
+      getMockedResultSetWithoutComma(), ctx, expectedFinalPath);
+    ZipEntry ze = null;
+    ZipInputStream zin = new ZipInputStream(fs.open(expectedFinalPath));
+    int i = 0;
+    while ((ze = zin.getNextEntry()) != null) {
+      Assert.assertEquals(ze.getName(), "Test.query_1_name-" + ctx.getQueryHandle() + "_part-" + i + ".csv");
+      i++;
+      zin.closeEntry();
+    }
+    zin.close();
+  }
+
   protected List<String> getExpectedCSVRows() {
     return new ArrayList<String>() {
       {