You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2022/09/08 05:12:26 UTC

[drill] branch master updated: DRILL-8283: Add a configurable recursive file listing size limit (#2632)

This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new bb58a66775 DRILL-8283: Add a configurable recursive file listing size limit (#2632)
bb58a66775 is described below

commit bb58a6677528d9833979248a506b87041eb4bb89
Author: James Turton <91...@users.noreply.github.com>
AuthorDate: Thu Sep 8 07:12:19 2022 +0200

    DRILL-8283: Add a configurable recursive file listing size limit (#2632)
---
 .../src/main/resources/core-site-example.xml       | 16 ++++-
 .../org/apache/drill/exec/util/FileSystemUtil.java | 71 ++++++++++++++++++++--
 .../apache/drill/exec/util/FileSystemUtilTest.java | 18 ++++++
 3 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/distribution/src/main/resources/core-site-example.xml b/distribution/src/main/resources/core-site-example.xml
index 0261658c01..d870172d76 100644
--- a/distribution/src/main/resources/core-site-example.xml
+++ b/distribution/src/main/resources/core-site-example.xml
@@ -172,5 +172,19 @@
       </description>
     </property>
     -->
-
+    <!--
+    Use the next property to set a limit on the numer of files that Drill
+    will list by recursing into a DFS directory tree. When the limit is
+    encountered the initiating operation will fail with an error. The
+    intended application of this limit is to allow admins to protect their
+    Drillbits from an errant or malicious SELECT * FROM dfs.huge_workspace
+    LIMIT 10 query, which will cause an OOM given a big enough workspace of
+    files. Defaults to 0 which means no limit.
+    -->
+    <!--
+    <property>
+      <name>drill.exec.recursive_file_listing_max_size</name>
+      <value>10000</value>
+    </property>
+    --->
 </configuration>
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java
index 82500da30f..488f3fb35a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java
@@ -18,6 +18,7 @@
 package org.apache.drill.exec.util;
 
 import org.apache.drill.common.exceptions.ErrorHelper;
+import org.apache.drill.common.exceptions.UserException;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -28,9 +29,11 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.CancellationException;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.ForkJoinTask;
 import java.util.concurrent.RecursiveTask;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -41,6 +44,7 @@ import java.util.stream.Stream;
 public class FileSystemUtil {
 
   private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSystemUtil.class);
+  public static final String RECURSIVE_FILE_LISTING_MAX_SIZE = "drill.exec.recursive_file_listing_max_size";
 
   /**
    * Filter that will accept all files and directories.
@@ -248,9 +252,24 @@ public class FileSystemUtil {
    */
   private static List<FileStatus> listRecursive(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) {
     ForkJoinPool pool = new ForkJoinPool();
+    AtomicInteger fileCounter = new AtomicInteger(0);
+    int recursiveListingMaxSize = fs.getConf().getInt(RECURSIVE_FILE_LISTING_MAX_SIZE, 0);
+
     try {
-      RecursiveListing task = new RecursiveListing(fs, path, scope, suppressExceptions, filter);
+      RecursiveListing task = new RecursiveListing(
+        fs,
+        path,
+        scope,
+        suppressExceptions,
+        filter,
+        fileCounter,
+        recursiveListingMaxSize,
+        pool
+      );
       return pool.invoke(task);
+    } catch (CancellationException ex) {
+      logger.debug("RecursiveListing task to list {} was cancelled.", path);
+      return Collections.<FileStatus>emptyList();
     } finally {
       pool.shutdown();
     }
@@ -287,13 +306,29 @@ public class FileSystemUtil {
     private final Scope scope;
     private final boolean suppressExceptions;
     private final PathFilter filter;
+    // Running count of files for comparison with RECURSIVE_FILE_LISTING_MAX_SIZE
+    private final AtomicInteger fileCounter;
+    private final int recursiveListingMaxSize;
+    private final ForkJoinPool pool;
 
-    RecursiveListing(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) {
+    RecursiveListing(
+      FileSystem fs,
+      Path path,
+      Scope scope,
+      boolean suppressExceptions,
+      PathFilter filter,
+      AtomicInteger fileCounter,
+      int recursiveListingMaxSize,
+      ForkJoinPool pool
+    ) {
       this.fs = fs;
       this.path = path;
       this.scope = scope;
       this.suppressExceptions = suppressExceptions;
       this.filter = filter;
+      this.fileCounter = fileCounter;
+      this.recursiveListingMaxSize = recursiveListingMaxSize;
+      this.pool = pool;
     }
 
     @Override
@@ -302,12 +337,39 @@ public class FileSystemUtil {
       List<RecursiveListing> tasks = new ArrayList<>();
 
       try {
-        for (FileStatus status : fs.listStatus(path, filter)) {
+        FileStatus[] dirFs = fs.listStatus(path, filter);
+        if (fileCounter.addAndGet(dirFs.length) > recursiveListingMaxSize && recursiveListingMaxSize > 0 ) {
+          try {
+            throw UserException
+              .resourceError()
+              .message(
+                "File listing size limit of %d exceeded recursing through path %s, see JVM system property %s",
+                recursiveListingMaxSize,
+                path,
+                RECURSIVE_FILE_LISTING_MAX_SIZE
+              )
+              .build(logger);
+          } finally {
+            // Attempt to abort all tasks
+            this.pool.shutdownNow();
+          }
+        }
+
+        for (FileStatus status : dirFs) {
           if (isStatusApplicable(status, scope)) {
             statuses.add(status);
           }
           if (status.isDirectory()) {
-            RecursiveListing task = new RecursiveListing(fs, status.getPath(), scope, suppressExceptions, filter);
+            RecursiveListing task = new RecursiveListing(
+              fs,
+              status.getPath(),
+              scope,
+              suppressExceptions,
+              filter,
+              fileCounter,
+              recursiveListingMaxSize,
+              pool
+            );
             task.fork();
             tasks.add(task);
           }
@@ -328,5 +390,4 @@ public class FileSystemUtil {
       return statuses;
     }
   }
-
 }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java
index 299f1cc65e..48ee441ab6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java
@@ -17,6 +17,8 @@
  */
 package org.apache.drill.exec.util;
 
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
@@ -26,8 +28,10 @@ import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 
+import static org.hamcrest.CoreMatchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 public class FileSystemUtilTest extends FileSystemUtilTestBase {
@@ -199,4 +203,18 @@ public class FileSystemUtilTest extends FileSystemUtilTestBase {
     assertTrue("Should return empty result", fileStatuses.isEmpty());
   }
 
+  @Test // DRILL-8283
+  public void testRecursiveListingMaxSize() throws IOException {
+    Configuration conf = fs.getConf();
+    int oldSize = conf.getInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, 0);
+    conf.setInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, 5);
+
+    try {
+      FileSystemUtil.listAll(fs, new Path(base, "a"), true);
+    } catch (UserException ex) {
+      assertThat(ex.getMessage(), containsString("RESOURCE ERROR: File listing size limit"));
+    } finally {
+      conf.setInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, oldSize);
+    }
+  }
 }