You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2022/09/08 05:12:26 UTC
[drill] branch master updated: DRILL-8283: Add a configurable recursive file listing size limit (#2632)
This is an automated email from the ASF dual-hosted git repository.
dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new bb58a66775 DRILL-8283: Add a configurable recursive file listing size limit (#2632)
bb58a66775 is described below
commit bb58a6677528d9833979248a506b87041eb4bb89
Author: James Turton <91...@users.noreply.github.com>
AuthorDate: Thu Sep 8 07:12:19 2022 +0200
DRILL-8283: Add a configurable recursive file listing size limit (#2632)
---
.../src/main/resources/core-site-example.xml | 16 ++++-
.../org/apache/drill/exec/util/FileSystemUtil.java | 71 ++++++++++++++++++++--
.../apache/drill/exec/util/FileSystemUtilTest.java | 18 ++++++
3 files changed, 99 insertions(+), 6 deletions(-)
diff --git a/distribution/src/main/resources/core-site-example.xml b/distribution/src/main/resources/core-site-example.xml
index 0261658c01..d870172d76 100644
--- a/distribution/src/main/resources/core-site-example.xml
+++ b/distribution/src/main/resources/core-site-example.xml
@@ -172,5 +172,19 @@
</description>
</property>
-->
-
+ <!--
+ Use the next property to set a limit on the numer of files that Drill
+ will list by recursing into a DFS directory tree. When the limit is
+ encountered the initiating operation will fail with an error. The
+ intended application of this limit is to allow admins to protect their
+ Drillbits from an errant or malicious SELECT * FROM dfs.huge_workspace
+ LIMIT 10 query, which will cause an OOM given a big enough workspace of
+ files. Defaults to 0 which means no limit.
+ -->
+ <!--
+ <property>
+ <name>drill.exec.recursive_file_listing_max_size</name>
+ <value>10000</value>
+ </property>
+ --->
</configuration>
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java
index 82500da30f..488f3fb35a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/util/FileSystemUtil.java
@@ -18,6 +18,7 @@
package org.apache.drill.exec.util;
import org.apache.drill.common.exceptions.ErrorHelper;
+import org.apache.drill.common.exceptions.UserException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -28,9 +29,11 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.concurrent.CancellationException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -41,6 +44,7 @@ import java.util.stream.Stream;
public class FileSystemUtil {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSystemUtil.class);
+ public static final String RECURSIVE_FILE_LISTING_MAX_SIZE = "drill.exec.recursive_file_listing_max_size";
/**
* Filter that will accept all files and directories.
@@ -248,9 +252,24 @@ public class FileSystemUtil {
*/
private static List<FileStatus> listRecursive(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) {
ForkJoinPool pool = new ForkJoinPool();
+ AtomicInteger fileCounter = new AtomicInteger(0);
+ int recursiveListingMaxSize = fs.getConf().getInt(RECURSIVE_FILE_LISTING_MAX_SIZE, 0);
+
try {
- RecursiveListing task = new RecursiveListing(fs, path, scope, suppressExceptions, filter);
+ RecursiveListing task = new RecursiveListing(
+ fs,
+ path,
+ scope,
+ suppressExceptions,
+ filter,
+ fileCounter,
+ recursiveListingMaxSize,
+ pool
+ );
return pool.invoke(task);
+ } catch (CancellationException ex) {
+ logger.debug("RecursiveListing task to list {} was cancelled.", path);
+ return Collections.<FileStatus>emptyList();
} finally {
pool.shutdown();
}
@@ -287,13 +306,29 @@ public class FileSystemUtil {
private final Scope scope;
private final boolean suppressExceptions;
private final PathFilter filter;
+ // Running count of files for comparison with RECURSIVE_FILE_LISTING_MAX_SIZE
+ private final AtomicInteger fileCounter;
+ private final int recursiveListingMaxSize;
+ private final ForkJoinPool pool;
- RecursiveListing(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) {
+ RecursiveListing(
+ FileSystem fs,
+ Path path,
+ Scope scope,
+ boolean suppressExceptions,
+ PathFilter filter,
+ AtomicInteger fileCounter,
+ int recursiveListingMaxSize,
+ ForkJoinPool pool
+ ) {
this.fs = fs;
this.path = path;
this.scope = scope;
this.suppressExceptions = suppressExceptions;
this.filter = filter;
+ this.fileCounter = fileCounter;
+ this.recursiveListingMaxSize = recursiveListingMaxSize;
+ this.pool = pool;
}
@Override
@@ -302,12 +337,39 @@ public class FileSystemUtil {
List<RecursiveListing> tasks = new ArrayList<>();
try {
- for (FileStatus status : fs.listStatus(path, filter)) {
+ FileStatus[] dirFs = fs.listStatus(path, filter);
+ if (fileCounter.addAndGet(dirFs.length) > recursiveListingMaxSize && recursiveListingMaxSize > 0 ) {
+ try {
+ throw UserException
+ .resourceError()
+ .message(
+ "File listing size limit of %d exceeded recursing through path %s, see JVM system property %s",
+ recursiveListingMaxSize,
+ path,
+ RECURSIVE_FILE_LISTING_MAX_SIZE
+ )
+ .build(logger);
+ } finally {
+ // Attempt to abort all tasks
+ this.pool.shutdownNow();
+ }
+ }
+
+ for (FileStatus status : dirFs) {
if (isStatusApplicable(status, scope)) {
statuses.add(status);
}
if (status.isDirectory()) {
- RecursiveListing task = new RecursiveListing(fs, status.getPath(), scope, suppressExceptions, filter);
+ RecursiveListing task = new RecursiveListing(
+ fs,
+ status.getPath(),
+ scope,
+ suppressExceptions,
+ filter,
+ fileCounter,
+ recursiveListingMaxSize,
+ pool
+ );
task.fork();
tasks.add(task);
}
@@ -328,5 +390,4 @@ public class FileSystemUtil {
return statuses;
}
}
-
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java
index 299f1cc65e..48ee441ab6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/util/FileSystemUtilTest.java
@@ -17,6 +17,8 @@
*/
package org.apache.drill.exec.util;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
@@ -26,8 +28,10 @@ import java.io.IOException;
import java.util.Collections;
import java.util.List;
+import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
public class FileSystemUtilTest extends FileSystemUtilTestBase {
@@ -199,4 +203,18 @@ public class FileSystemUtilTest extends FileSystemUtilTestBase {
assertTrue("Should return empty result", fileStatuses.isEmpty());
}
+ @Test // DRILL-8283
+ public void testRecursiveListingMaxSize() throws IOException {
+ Configuration conf = fs.getConf();
+ int oldSize = conf.getInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, 0);
+ conf.setInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, 5);
+
+ try {
+ FileSystemUtil.listAll(fs, new Path(base, "a"), true);
+ } catch (UserException ex) {
+ assertThat(ex.getMessage(), containsString("RESOURCE ERROR: File listing size limit"));
+ } finally {
+ conf.setInt(FileSystemUtil.RECURSIVE_FILE_LISTING_MAX_SIZE, oldSize);
+ }
+ }
}