You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by so...@apache.org on 2018/08/30 21:45:31 UTC

[drill] 02/04: DRILL-6640: Drill takes long time in planning when there are large number of files in views/tables DFS parent directory Modifying DotDrillUtil implementation to avoid using globStatus calls with GLOB for dot drill files

This is an automated email from the ASF dual-hosted git repository.

sorabh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit a801e1330cdc665edb4efa6569646ac29fcef17b
Author: kr-arjun <ar...@outlook.com>
AuthorDate: Thu Jul 26 16:52:48 2018 -0700

    DRILL-6640: Drill takes long time in planning when there are large number of files in views/tables DFS parent directory
    Modifying DotDrillUtil implementation to avoid using globStatus calls with GLOB for dot drill files
    
    Includes
    - Modified DotDrillUtil.getDotDrills implementation to avoid using DFS globStatus call with GLOB for a given base file name.
    - Added unit test cases for the new method.
    - Code refactoring to include additional comments.
    - Updated logic to use globStatus call for  path with wildcards and not ending with .drill
    - Modified Testcase implementation to use BaseDirTestWatcher.
    
    closes #1405
---
 .../apache/drill/exec/dotdrill/DotDrillType.java   |  22 +++++
 .../apache/drill/exec/dotdrill/DotDrillUtil.java   |  92 +++++++++++++++++--
 .../drill/exec/dotdrill/TestDotDrillUtil.java      | 102 +++++++++++++++++++++
 3 files changed, 209 insertions(+), 7 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
index a8b5f4b..673e1c7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
@@ -56,6 +56,28 @@ public enum DotDrillType {
     return ending;
   }
 
+  /**
+   * Return Glob pattern for given Dot Drill Types.
+   * @param types
+   * @return Glob pattern representing For Dot Drill Types provided as types param
+   */
+  public static String getDrillFileGlobPattern(DotDrillType[] types) {
+    if (types.length == 1) {
+      return "." + types[0].name().toLowerCase() + ".drill";
+    }
+
+    StringBuffer b = new StringBuffer();
+    b.append(".{");
+    for (DotDrillType d : types) {
+      if (b.length() > 2) {
+        b.append(',');
+      }
+      b.append(d.name().toLowerCase());
+    }
+    b.append("}.drill");
+    return b.toString();
+  }
+
   public static final String DOT_DRILL_GLOB;
 
   static{
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
index 226aa24..b6571df 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
@@ -18,10 +18,14 @@
 package org.apache.drill.exec.dotdrill;
 
 import java.io.IOException;
+import java.io.FileNotFoundException;
 import java.util.List;
+import java.util.Arrays;
+import java.util.ArrayList;
 
 import org.apache.drill.exec.store.dfs.DrillFileSystem;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.GlobPattern;
 import org.apache.hadoop.fs.Path;
 
 import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
@@ -29,7 +33,15 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
 public class DotDrillUtil {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DotDrillUtil.class);
 
-  private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, FileStatus[] statuses, DotDrillType... types){
+  /**
+   * Returns List of DotDrillFile objects for given list of FileStatus objects matching the given Dot Drill File Types.
+   * Return an empty list if no FileStatus matches the given Dot Drill File Types.
+   * @param fs DrillFileSystem instance
+   * @param statuses List of FileStatus objects
+   * @param types Dot Drill Types to be matched
+   * @return List of matched DotDrillFile objects
+   */
+  private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, List<FileStatus> statuses, DotDrillType... types){
     List<DotDrillFile> files = Lists.newArrayList();
     for(FileStatus s : statuses){
       DotDrillFile f = DotDrillFile.create(fs, s);
@@ -48,16 +60,82 @@ public class DotDrillUtil {
     }
     return files;
   }
-
+  /**
+   * Return list of DotDrillFile objects whose file name ends with .drill and matches the provided Drill Dot files types
+   * in a given parent Path.
+   * Return an empty list if no files matches the given Dot Drill File Types.
+   * @param fs DrillFileSystem instance
+   * @param root parent Path
+   * @param types Dot Drill Types to be matched
+   * @return List of matched DotDrillFile objects
+   * @throws IOException
+   */
   public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, DotDrillType... types) throws IOException{
-    return getDrillFiles(fs, fs.globStatus(new Path(root, "*.drill")), types);
+    return getDrillFiles(fs, getDrillFileStatus(fs, root,"*.drill"), types);
   }
 
+  /**
+   * Return list of DotDrillFile objects whose file name matches the provided name pattern and Drill Dot files types
+   * in a given parent Path.
+   * Return an empty list if no files matches the given file name and Dot Drill File Types.
+   * @param fs DrillFileSystem instance
+   * @param root parent Path
+   * @param name name/pattern of the file
+   * @param types Dot Drill Types to be matched
+   * @return List of matched DotDrillFile objects
+   * @throws IOException
+   */
   public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException{
-    if(!name.endsWith(".drill")) {
-      name = name + DotDrillType.DOT_DRILL_GLOB;
-    }
+   return getDrillFiles(fs, getDrillFileStatus(fs, root, name, types), types);
+  }
 
-    return getDrillFiles(fs, fs.globStatus(new Path(root, name)), types);
+  /**
+   * Return list of FileStatus objects matching '.drill' files for a given name in the parent path.
+   *   a) If given name ends with '.drill', it return all '.drill' files's status matching the name pattern.
+   *   b) If given name does not end with '.drill', it return file statues starting with name
+   *      and ending with pattern matching
+   *       1) all the valid DotDrillTypes if no DotDrillType is provided.
+   *       2) given DotDrillTypes if DotDrillType is provided.
+   * Return an empty list if no files matches the pattern and Drill Dot file types.
+   * @param fs DrillFileSystem instance
+   * @param root parent Path
+   * @param name name/pattern of the file
+   * @param types Dot Drill Types to be matched. Applies type matching only if name does not end with '.drill'
+   * @return List of FileStatuses for files matching name and  Drill Dot file types.
+   * @throws IOException  if any I/O error occurs when fetching file status
+   */
+  private static List<FileStatus> getDrillFileStatus(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException {
+    List<FileStatus> statuses = new ArrayList<FileStatus>();
+
+    if (name.endsWith(".drill")) {
+      FileStatus[] status = fs.globStatus(new Path(root, name));
+      if (status != null) {
+        statuses.addAll(Arrays.asList(status));
+      }
+    } else {
+      // If no DotDrillTypes are provided, check file status for all DotDrillTypes available.
+      // Else check the file status for provided types.
+      if (types.length == 0) {
+        types = DotDrillType.values();
+      }
+      // Check if path has glob pattern or wildcards.If yes, use globStatus with globPattern for given types.
+      GlobPattern pathGlob = new GlobPattern((new Path(root, name)).toString());
+      if (pathGlob.hasWildcard()) {
+        String patternAppliedName = name + DotDrillType.getDrillFileGlobPattern(types);
+        FileStatus[] status = fs.globStatus(new Path(root, patternAppliedName));
+        if (status != null) {
+          statuses.addAll(Arrays.asList(status));
+        }
+      } else { // use list status if no glob_pattern/wildcards exist in path
+        for (DotDrillType dotType : types) {
+          try {
+            FileStatus[] status = fs.listStatus(new Path(root, name + dotType.getEnding()));
+            statuses.addAll(Arrays.asList(status));
+          } catch (FileNotFoundException ex) {
+          }
+        }
+      }
+    }
+    return statuses;
   }
 }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java
new file mode 100644
index 0000000..1866c9c
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.dotdrill;
+
+import java.io.File;
+import java.nio.file.Paths;
+import java.nio.file.Files;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+
+public class TestDotDrillUtil {
+
+  private static File tempDir;
+  private static Path tempPath;
+  private static DrillFileSystem dfs;
+
+  @ClassRule
+  public static final BaseDirTestWatcher dirTestWatcher = new BaseDirTestWatcher();
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
+    dfs = new DrillFileSystem(conf);
+    tempDir = dirTestWatcher.getTmpDir();
+    tempPath = new Path(tempDir.getAbsolutePath());
+  }
+
+
+  @Test //DRILL-6640
+  public void testViewFileStatus() throws Exception {
+    List<DotDrillFile> dotDrillFiles;
+
+    Files.createFile(Paths.get(tempDir + "/test1.view.drill"));
+    Files.createFile(Paths.get(tempDir + "/test2.view.drill"));
+    Files.createFile(Paths.get(tempDir + "/test1.txt"));
+
+
+    // Check for view file by passing file name without extension
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() == 1);
+
+    // Check for dot drill file by passing full name
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.view.drill");
+    assertTrue(dotDrillFiles.size() == 1);
+
+    // Check for dot drill files by passing pattern *.drill
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "*.drill");
+    assertTrue(dotDrillFiles.size() >= 2);
+
+    // Check for non existent file
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "junkfile", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() == 0);
+
+    // Check for existing file which is not a drill view file
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.txt", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() == 0);
+
+    // Check for views files by passing file name having glob without any extension
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test*", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() >= 2);
+  }
+
+  @Test //DRILL-6640
+  public void testDotFilesStatus() throws Exception {
+    String filePrefix = "sample";
+    //Creating different Dot Drill files supported for base file name "sample"
+    for (DotDrillType dotType : DotDrillType.values()) {
+      Files.createFile(Paths.get(tempDir + "/" + filePrefix + dotType.getEnding()));
+    }
+    // Check Dot File count for "sample" file created for available Drill dot types
+    List<DotDrillFile> dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "sample");
+    assertTrue(dotDrillFiles.size() == DotDrillType.values().length);
+  }
+
+}