You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by so...@apache.org on 2018/08/30 21:45:31 UTC
[drill] 02/04: DRILL-6640: Drill takes long time in planning when
there are large number of files in views/tables DFS parent directory
Modifying DotDrillUtil implementation to avoid using globStatus calls with
GLOB for dot drill files
This is an automated email from the ASF dual-hosted git repository.
sorabh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit a801e1330cdc665edb4efa6569646ac29fcef17b
Author: kr-arjun <ar...@outlook.com>
AuthorDate: Thu Jul 26 16:52:48 2018 -0700
DRILL-6640: Drill takes long time in planning when there are large number of files in views/tables DFS parent directory
Modifying DotDrillUtil implementation to avoid using globStatus calls with GLOB for dot drill files
Includes
- Modified DotDrillUtil.getDotDrills implementation to avoid using DFS globStatus call with GLOB for a given base file name.
- Added unit test cases for the new method.
- Code refactoring to include additional comments.
- Updated logic to use globStatus call for path with wildcards and not ending with .drill
- Modified Testcase implementation to use BaseDirTestWatcher.
closes #1405
---
.../apache/drill/exec/dotdrill/DotDrillType.java | 22 +++++
.../apache/drill/exec/dotdrill/DotDrillUtil.java | 92 +++++++++++++++++--
.../drill/exec/dotdrill/TestDotDrillUtil.java | 102 +++++++++++++++++++++
3 files changed, 209 insertions(+), 7 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
index a8b5f4b..673e1c7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java
@@ -56,6 +56,28 @@ public enum DotDrillType {
return ending;
}
+ /**
+ * Return Glob pattern for given Dot Drill Types.
+ * @param types
+ * @return Glob pattern representing For Dot Drill Types provided as types param
+ */
+ public static String getDrillFileGlobPattern(DotDrillType[] types) {
+ if (types.length == 1) {
+ return "." + types[0].name().toLowerCase() + ".drill";
+ }
+
+ StringBuffer b = new StringBuffer();
+ b.append(".{");
+ for (DotDrillType d : types) {
+ if (b.length() > 2) {
+ b.append(',');
+ }
+ b.append(d.name().toLowerCase());
+ }
+ b.append("}.drill");
+ return b.toString();
+ }
+
public static final String DOT_DRILL_GLOB;
static{
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
index 226aa24..b6571df 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java
@@ -18,10 +18,14 @@
package org.apache.drill.exec.dotdrill;
import java.io.IOException;
+import java.io.FileNotFoundException;
import java.util.List;
+import java.util.Arrays;
+import java.util.ArrayList;
import org.apache.drill.exec.store.dfs.DrillFileSystem;
import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.GlobPattern;
import org.apache.hadoop.fs.Path;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
@@ -29,7 +33,15 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
public class DotDrillUtil {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DotDrillUtil.class);
- private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, FileStatus[] statuses, DotDrillType... types){
+ /**
+ * Returns List of DotDrillFile objects for given list of FileStatus objects matching the given Dot Drill File Types.
+ * Return an empty list if no FileStatus matches the given Dot Drill File Types.
+ * @param fs DrillFileSystem instance
+ * @param statuses List of FileStatus objects
+ * @param types Dot Drill Types to be matched
+ * @return List of matched DotDrillFile objects
+ */
+ private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, List<FileStatus> statuses, DotDrillType... types){
List<DotDrillFile> files = Lists.newArrayList();
for(FileStatus s : statuses){
DotDrillFile f = DotDrillFile.create(fs, s);
@@ -48,16 +60,82 @@ public class DotDrillUtil {
}
return files;
}
-
+ /**
+ * Return list of DotDrillFile objects whose file name ends with .drill and matches the provided Drill Dot files types
+ * in a given parent Path.
+ * Return an empty list if no files matches the given Dot Drill File Types.
+ * @param fs DrillFileSystem instance
+ * @param root parent Path
+ * @param types Dot Drill Types to be matched
+ * @return List of matched DotDrillFile objects
+ * @throws IOException
+ */
public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, DotDrillType... types) throws IOException{
- return getDrillFiles(fs, fs.globStatus(new Path(root, "*.drill")), types);
+ return getDrillFiles(fs, getDrillFileStatus(fs, root,"*.drill"), types);
}
+ /**
+ * Return list of DotDrillFile objects whose file name matches the provided name pattern and Drill Dot files types
+ * in a given parent Path.
+ * Return an empty list if no files matches the given file name and Dot Drill File Types.
+ * @param fs DrillFileSystem instance
+ * @param root parent Path
+ * @param name name/pattern of the file
+ * @param types Dot Drill Types to be matched
+ * @return List of matched DotDrillFile objects
+ * @throws IOException
+ */
public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException{
- if(!name.endsWith(".drill")) {
- name = name + DotDrillType.DOT_DRILL_GLOB;
- }
+ return getDrillFiles(fs, getDrillFileStatus(fs, root, name, types), types);
+ }
- return getDrillFiles(fs, fs.globStatus(new Path(root, name)), types);
+ /**
+ * Return list of FileStatus objects matching '.drill' files for a given name in the parent path.
+ * a) If given name ends with '.drill', it return all '.drill' files's status matching the name pattern.
+ * b) If given name does not end with '.drill', it return file statues starting with name
+ * and ending with pattern matching
+ * 1) all the valid DotDrillTypes if no DotDrillType is provided.
+ * 2) given DotDrillTypes if DotDrillType is provided.
+ * Return an empty list if no files matches the pattern and Drill Dot file types.
+ * @param fs DrillFileSystem instance
+ * @param root parent Path
+ * @param name name/pattern of the file
+ * @param types Dot Drill Types to be matched. Applies type matching only if name does not end with '.drill'
+ * @return List of FileStatuses for files matching name and Drill Dot file types.
+ * @throws IOException if any I/O error occurs when fetching file status
+ */
+ private static List<FileStatus> getDrillFileStatus(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException {
+ List<FileStatus> statuses = new ArrayList<FileStatus>();
+
+ if (name.endsWith(".drill")) {
+ FileStatus[] status = fs.globStatus(new Path(root, name));
+ if (status != null) {
+ statuses.addAll(Arrays.asList(status));
+ }
+ } else {
+ // If no DotDrillTypes are provided, check file status for all DotDrillTypes available.
+ // Else check the file status for provided types.
+ if (types.length == 0) {
+ types = DotDrillType.values();
+ }
+ // Check if path has glob pattern or wildcards.If yes, use globStatus with globPattern for given types.
+ GlobPattern pathGlob = new GlobPattern((new Path(root, name)).toString());
+ if (pathGlob.hasWildcard()) {
+ String patternAppliedName = name + DotDrillType.getDrillFileGlobPattern(types);
+ FileStatus[] status = fs.globStatus(new Path(root, patternAppliedName));
+ if (status != null) {
+ statuses.addAll(Arrays.asList(status));
+ }
+ } else { // use list status if no glob_pattern/wildcards exist in path
+ for (DotDrillType dotType : types) {
+ try {
+ FileStatus[] status = fs.listStatus(new Path(root, name + dotType.getEnding()));
+ statuses.addAll(Arrays.asList(status));
+ } catch (FileNotFoundException ex) {
+ }
+ }
+ }
+ }
+ return statuses;
}
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java
new file mode 100644
index 0000000..1866c9c
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.dotdrill;
+
+import java.io.File;
+import java.nio.file.Paths;
+import java.nio.file.Files;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+
+public class TestDotDrillUtil {
+
+ private static File tempDir;
+ private static Path tempPath;
+ private static DrillFileSystem dfs;
+
+ @ClassRule
+ public static final BaseDirTestWatcher dirTestWatcher = new BaseDirTestWatcher();
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
+ dfs = new DrillFileSystem(conf);
+ tempDir = dirTestWatcher.getTmpDir();
+ tempPath = new Path(tempDir.getAbsolutePath());
+ }
+
+
+ @Test //DRILL-6640
+ public void testViewFileStatus() throws Exception {
+ List<DotDrillFile> dotDrillFiles;
+
+ Files.createFile(Paths.get(tempDir + "/test1.view.drill"));
+ Files.createFile(Paths.get(tempDir + "/test2.view.drill"));
+ Files.createFile(Paths.get(tempDir + "/test1.txt"));
+
+
+ // Check for view file by passing file name without extension
+ dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1", DotDrillType.VIEW);
+ assertTrue(dotDrillFiles.size() == 1);
+
+ // Check for dot drill file by passing full name
+ dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.view.drill");
+ assertTrue(dotDrillFiles.size() == 1);
+
+ // Check for dot drill files by passing pattern *.drill
+ dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "*.drill");
+ assertTrue(dotDrillFiles.size() >= 2);
+
+ // Check for non existent file
+ dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "junkfile", DotDrillType.VIEW);
+ assertTrue(dotDrillFiles.size() == 0);
+
+ // Check for existing file which is not a drill view file
+ dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.txt", DotDrillType.VIEW);
+ assertTrue(dotDrillFiles.size() == 0);
+
+ // Check for views files by passing file name having glob without any extension
+ dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test*", DotDrillType.VIEW);
+ assertTrue(dotDrillFiles.size() >= 2);
+ }
+
+ @Test //DRILL-6640
+ public void testDotFilesStatus() throws Exception {
+ String filePrefix = "sample";
+ //Creating different Dot Drill files supported for base file name "sample"
+ for (DotDrillType dotType : DotDrillType.values()) {
+ Files.createFile(Paths.get(tempDir + "/" + filePrefix + dotType.getEnding()));
+ }
+ // Check Dot File count for "sample" file created for available Drill dot types
+ List<DotDrillFile> dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "sample");
+ assertTrue(dotDrillFiles.size() == DotDrillType.values().length);
+ }
+
+}