You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by GitBox <gi...@apache.org> on 2018/08/30 21:45:40 UTC

[GitHub] sohami closed pull request #1405: DRILL-6640: Drill takes long time in planning when there are large number of files in views/tables DFS parent directory

sohami closed pull request #1405: DRILL-6640: Drill takes long time in planning when there are large number of files in views/tables DFS parent directory
URL: https://github.com/apache/drill/pull/1405
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
index e94d9f85983..5d52d27235c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
@@ -56,6 +56,28 @@ public String getEnding() {
     return ending;
   }
 
+  /**
+   * Return Glob pattern for given Dot Drill Types.
+   * @param types
+   * @return Glob pattern representing For Dot Drill Types provided as types param
+   */
+  public static String getDrillFileGlobPattern(DotDrillType[] types) {
+    if (types.length == 1) {
+      return "." + types[0].name().toLowerCase() + ".drill";
+    }
+
+    StringBuffer b = new StringBuffer();
+    b.append(".{");
+    for (DotDrillType d : types) {
+      if (b.length() > 2) {
+        b.append(',');
+      }
+      b.append(d.name().toLowerCase());
+    }
+    b.append("}.drill");
+    return b.toString();
+  }
+
   public static final String DOT_DRILL_GLOB;
 
   static{
@@ -70,4 +92,4 @@ public String getEnding() {
     b.append("}.drill");
     DOT_DRILL_GLOB = b.toString();
   }
-}
\ No newline at end of file
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
index e6ddc1d41a1..d29c7e94cde 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
@@ -18,10 +18,14 @@
 package org.apache.drill.exec.dotdrill;
 
 import java.io.IOException;
+import java.io.FileNotFoundException;
 import java.util.List;
+import java.util.Arrays;
+import java.util.ArrayList;
 
 import org.apache.drill.exec.store.dfs.DrillFileSystem;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.GlobPattern;
 import org.apache.hadoop.fs.Path;
 
 import com.google.common.collect.Lists;
@@ -29,7 +33,15 @@
 public class DotDrillUtil {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DotDrillUtil.class);
 
-  private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, FileStatus[] statuses, DotDrillType... types){
+  /**
+   * Returns List of DotDrillFile objects for given list of FileStatus objects matching the given Dot Drill File Types.
+   * Return an empty list if no FileStatus matches the given Dot Drill File Types.
+   * @param fs DrillFileSystem instance
+   * @param statuses List of FileStatus objects
+   * @param types Dot Drill Types to be matched
+   * @return List of matched DotDrillFile objects
+   */
+  private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, List<FileStatus> statuses, DotDrillType... types){
     List<DotDrillFile> files = Lists.newArrayList();
     for(FileStatus s : statuses){
       DotDrillFile f = DotDrillFile.create(fs, s);
@@ -48,16 +60,82 @@
     }
     return files;
   }
-
+  /**
+   * Return list of DotDrillFile objects whose file name ends with .drill and matches the provided Drill Dot files types
+   * in a given parent Path.
+   * Return an empty list if no files matches the given Dot Drill File Types.
+   * @param fs DrillFileSystem instance
+   * @param root parent Path
+   * @param types Dot Drill Types to be matched
+   * @return List of matched DotDrillFile objects
+   * @throws IOException
+   */
   public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, DotDrillType... types) throws IOException{
-    return getDrillFiles(fs, fs.globStatus(new Path(root, "*.drill")), types);
+    return getDrillFiles(fs, getDrillFileStatus(fs, root,"*.drill"), types);
   }
 
+  /**
+   * Return list of DotDrillFile objects whose file name matches the provided name pattern and Drill Dot files types
+   * in a given parent Path.
+   * Return an empty list if no files matches the given file name and Dot Drill File Types.
+   * @param fs DrillFileSystem instance
+   * @param root parent Path
+   * @param name name/pattern of the file
+   * @param types Dot Drill Types to be matched
+   * @return List of matched DotDrillFile objects
+   * @throws IOException
+   */
   public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException{
-    if(!name.endsWith(".drill")) {
-      name = name + DotDrillType.DOT_DRILL_GLOB;
-    }
+   return getDrillFiles(fs, getDrillFileStatus(fs, root, name, types), types);
+  }
 
-    return getDrillFiles(fs, fs.globStatus(new Path(root, name)), types);
+  /**
+   * Return list of FileStatus objects matching '.drill' files for a given name in the parent path.
+   *   a) If given name ends with '.drill', it return all '.drill' files's status matching the name pattern.
+   *   b) If given name does not end with '.drill', it return file statues starting with name
+   *      and ending with pattern matching
+   *       1) all the valid DotDrillTypes if no DotDrillType is provided.
+   *       2) given DotDrillTypes if DotDrillType is provided.
+   * Return an empty list if no files matches the pattern and Drill Dot file types.
+   * @param fs DrillFileSystem instance
+   * @param root parent Path
+   * @param name name/pattern of the file
+   * @param types Dot Drill Types to be matched. Applies type matching only if name does not end with '.drill'
+   * @return List of FileStatuses for files matching name and  Drill Dot file types.
+   * @throws IOException  if any I/O error occurs when fetching file status
+   */
+  private static List<FileStatus> getDrillFileStatus(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException {
+    List<FileStatus> statuses = new ArrayList<FileStatus>();
+
+    if (name.endsWith(".drill")) {
+      FileStatus[] status = fs.globStatus(new Path(root, name));
+      if (status != null) {
+        statuses.addAll(Arrays.asList(status));
+      }
+    } else {
+      // If no DotDrillTypes are provided, check file status for all DotDrillTypes available.
+      // Else check the file status for provided types.
+      if (types.length == 0) {
+        types = DotDrillType.values();
+      }
+      // Check if path has glob pattern or wildcards.If yes, use globStatus with globPattern for given types.
+      GlobPattern pathGlob = new GlobPattern((new Path(root, name)).toString());
+      if (pathGlob.hasWildcard()) {
+        String patternAppliedName = name + DotDrillType.getDrillFileGlobPattern(types);
+        FileStatus[] status = fs.globStatus(new Path(root, patternAppliedName));
+        if (status != null) {
+          statuses.addAll(Arrays.asList(status));
+        }
+      } else { // use list status if no glob_pattern/wildcards exist in path
+        for (DotDrillType dotType : types) {
+          try {
+            FileStatus[] status = fs.listStatus(new Path(root, name + dotType.getEnding()));
+            statuses.addAll(Arrays.asList(status));
+          } catch (FileNotFoundException ex) {
+          }
+        }
+      }
+    }
+    return statuses;
   }
 }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java
new file mode 100644
index 00000000000..1866c9c634b
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.dotdrill;
+
+import java.io.File;
+import java.nio.file.Paths;
+import java.nio.file.Files;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+
+public class TestDotDrillUtil {
+
+  private static File tempDir;
+  private static Path tempPath;
+  private static DrillFileSystem dfs;
+
+  @ClassRule
+  public static final BaseDirTestWatcher dirTestWatcher = new BaseDirTestWatcher();
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
+    dfs = new DrillFileSystem(conf);
+    tempDir = dirTestWatcher.getTmpDir();
+    tempPath = new Path(tempDir.getAbsolutePath());
+  }
+
+
+  @Test //DRILL-6640
+  public void testViewFileStatus() throws Exception {
+    List<DotDrillFile> dotDrillFiles;
+
+    Files.createFile(Paths.get(tempDir + "/test1.view.drill"));
+    Files.createFile(Paths.get(tempDir + "/test2.view.drill"));
+    Files.createFile(Paths.get(tempDir + "/test1.txt"));
+
+
+    // Check for view file by passing file name without extension
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() == 1);
+
+    // Check for dot drill file by passing full name
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.view.drill");
+    assertTrue(dotDrillFiles.size() == 1);
+
+    // Check for dot drill files by passing pattern *.drill
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "*.drill");
+    assertTrue(dotDrillFiles.size() >= 2);
+
+    // Check for non existent file
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "junkfile", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() == 0);
+
+    // Check for existing file which is not a drill view file
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.txt", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() == 0);
+
+    // Check for views files by passing file name having glob without any extension
+    dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test*", DotDrillType.VIEW);
+    assertTrue(dotDrillFiles.size() >= 2);
+  }
+
+  @Test //DRILL-6640
+  public void testDotFilesStatus() throws Exception {
+    String filePrefix = "sample";
+    //Creating different Dot Drill files supported for base file name "sample"
+    for (DotDrillType dotType : DotDrillType.values()) {
+      Files.createFile(Paths.get(tempDir + "/" + filePrefix + dotType.getEnding()));
+    }
+    // Check Dot File count for "sample" file created for available Drill dot types
+    List<DotDrillFile> dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "sample");
+    assertTrue(dotDrillFiles.size() == DotDrillType.values().length);
+  }
+
+}


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services