You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ji...@apache.org on 2020/08/21 16:44:24 UTC

[druid] branch master updated: Add maxNumFiles to splitHintSpec (#10243)

This is an automated email from the ASF dual-hosted git repository.

jihoonson pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new b5b3e6e  Add maxNumFiles to splitHintSpec (#10243)
b5b3e6e is described below

commit b5b3e6ecce6f5346c4a1ed440b27656dd3b130a5
Author: Jihoon Son <ji...@apache.org>
AuthorDate: Fri Aug 21 09:43:58 2020 -0700

    Add maxNumFiles to splitHintSpec (#10243)
    
    * Add maxNumFiles to splitHintSpec
    
    * missing link
    
    * fix build failure; use maxNumFiles for integration tests
    
    * spelling
    
    * lower default
    
    * Update docs/ingestion/native-batch.md
    
    Co-authored-by: Abhishek Agarwal <14...@users.noreply.github.com>
    
    * address comments; change default maxSplitSize
    
    * spelling
    
    * typos and doc
    
    * same change for segments splitHintSpec
    
    * fix build
    
    * fix build
    
    Co-authored-by: Abhishek Agarwal <14...@users.noreply.github.com>
---
 .../druid/data/input/MaxSizeSplitHintSpec.java     | 54 ++++++++++++---
 .../druid/data/input/SegmentsSplitHintSpec.java    | 27 ++++++--
 .../data/input/impl/SplittableInputSource.java     |  2 +-
 .../druid/data/input/MaxSizeSplitHintSpecTest.java | 77 ++++++++++++++++++++--
 .../data/input/impl/LocalInputSourceTest.java      |  9 +--
 docs/ingestion/native-batch.md                     |  6 +-
 .../data/input/aliyun/OssInputSourceTest.java      |  5 +-
 .../data/input/azure/AzureInputSourceTest.java     |  2 +-
 .../google/GoogleCloudStorageInputSourceTest.java  |  5 +-
 .../inputsource/hdfs/HdfsInputSourceTest.java      |  9 ++-
 .../druid/data/input/s3/S3InputSourceTest.java     |  5 +-
 .../firehose/IngestSegmentFirehoseFactory.java     | 10 ++-
 .../druid/indexing/input/DruidInputSource.java     |  8 ++-
 .../task/ClientCompactionTaskQuerySerdeTest.java   |  7 +-
 .../common/task/CompactionTaskParallelRunTest.java |  2 +-
 .../AbstractParallelIndexSupervisorTaskTest.java   |  2 +-
 .../ITBestEffortRollupParallelIndexTest.java       |  3 +-
 .../indexer/ITPerfectRollupParallelIndexTest.java  |  3 +-
 .../indexer/wikipedia_cloud_index_task.json        |  2 +-
 .../wikipedia_local_input_source_index_task.json   |  2 +-
 ...kipedia_parallel_ingest_segment_index_task.json |  2 +-
 .../indexer/wikipedia_parallel_reindex_task.json   |  2 +-
 .../DataSourceCompactionConfigTest.java            |  3 +-
 .../UserCompactionTaskQueryTuningConfigTest.java   |  3 +-
 website/.spelling                                  |  4 ++
 25 files changed, 197 insertions(+), 57 deletions(-)

diff --git a/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java b/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java
index 4b834c1..d680615 100644
--- a/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java
+++ b/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java
@@ -22,7 +22,9 @@ package org.apache.druid.data.input;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 
 import javax.annotation.Nullable;
 import java.util.ArrayList;
@@ -43,22 +45,55 @@ public class MaxSizeSplitHintSpec implements SplitHintSpec
   public static final String TYPE = "maxSize";
 
   @VisibleForTesting
-  static final long DEFAULT_MAX_SPLIT_SIZE = 512 * 1024 * 1024;
+  static final HumanReadableBytes DEFAULT_MAX_SPLIT_SIZE = new HumanReadableBytes("1GiB");
 
-  private final long maxSplitSize;
+  /**
+   * There are two known issues when a split contains a large list of files.
+   *
+   * - 'jute.maxbuffer' in ZooKeeper. This system property controls the max size of ZNode. As its default is 500KB,
+   *   task allocation can fail if the serialized ingestion spec is larger than this limit.
+   * - 'max_allowed_packet' in MySQL. This is the max size of a communication packet sent to a MySQL server.
+   *   The default is either 64MB or 4MB depending on MySQL version. Updating metadata store can fail if the serialized
+   *   ingestion spec is larger than this limit.
+   *
+   * The default is conservatively chosen as 1000.
+   */
+  @VisibleForTesting
+  static final int DEFAULT_MAX_NUM_FILES = 1000;
+
+  private final HumanReadableBytes maxSplitSize;
+  private final int maxNumFiles;
 
   @JsonCreator
-  public MaxSizeSplitHintSpec(@JsonProperty("maxSplitSize") @Nullable Long maxSplitSize)
+  public MaxSizeSplitHintSpec(
+      @JsonProperty("maxSplitSize") @Nullable HumanReadableBytes maxSplitSize,
+      @JsonProperty("maxNumFiles") @Nullable Integer maxNumFiles
+  )
   {
     this.maxSplitSize = maxSplitSize == null ? DEFAULT_MAX_SPLIT_SIZE : maxSplitSize;
+    this.maxNumFiles = maxNumFiles == null ? DEFAULT_MAX_NUM_FILES : maxNumFiles;
+    Preconditions.checkArgument(this.maxSplitSize.getBytes() > 0, "maxSplitSize should be larger than 0");
+    Preconditions.checkArgument(this.maxNumFiles > 0, "maxNumFiles should be larger than 0");
+  }
+
+  @VisibleForTesting
+  public MaxSizeSplitHintSpec(long maxSplitSize, @Nullable Integer maxNumFiles)
+  {
+    this(new HumanReadableBytes(maxSplitSize), maxNumFiles);
   }
 
   @JsonProperty
-  public long getMaxSplitSize()
+  public HumanReadableBytes getMaxSplitSize()
   {
     return maxSplitSize;
   }
 
+  @JsonProperty
+  public int getMaxNumFiles()
+  {
+    return maxNumFiles;
+  }
+
   @Override
   public <T> Iterator<List<T>> split(Iterator<T> inputIterator, Function<T, InputFileAttribute> inputAttributeExtractor)
   {
@@ -68,6 +103,7 @@ public class MaxSizeSplitHintSpec implements SplitHintSpec
     );
     return new Iterator<List<T>>()
     {
+      private final long maxSplitSizeBytes = maxSplitSize.getBytes();
       private T peeking;
 
       @Override
@@ -84,12 +120,13 @@ public class MaxSizeSplitHintSpec implements SplitHintSpec
         }
         final List<T> current = new ArrayList<>();
         long splitSize = 0;
-        while (splitSize < maxSplitSize && (peeking != null || nonEmptyFileOnlyIterator.hasNext())) {
+        while (splitSize < maxSplitSizeBytes && (peeking != null || nonEmptyFileOnlyIterator.hasNext())) {
           if (peeking == null) {
             peeking = nonEmptyFileOnlyIterator.next();
           }
           final long size = inputAttributeExtractor.apply(peeking).getSize();
-          if (current.isEmpty() || splitSize + size < maxSplitSize) {
+          if (current.isEmpty() // each split should have at least one file even if the file is larger than maxSplitSize
+              || (splitSize + size < maxSplitSizeBytes && current.size() < maxNumFiles)) {
             current.add(peeking);
             splitSize += size;
             peeking = null;
@@ -113,12 +150,13 @@ public class MaxSizeSplitHintSpec implements SplitHintSpec
       return false;
     }
     MaxSizeSplitHintSpec that = (MaxSizeSplitHintSpec) o;
-    return maxSplitSize == that.maxSplitSize;
+    return maxNumFiles == that.maxNumFiles &&
+           Objects.equals(maxSplitSize, that.maxSplitSize);
   }
 
   @Override
   public int hashCode()
   {
-    return Objects.hash(maxSplitSize);
+    return Objects.hash(maxSplitSize, maxNumFiles);
   }
 }
diff --git a/core/src/main/java/org/apache/druid/data/input/SegmentsSplitHintSpec.java b/core/src/main/java/org/apache/druid/data/input/SegmentsSplitHintSpec.java
index bfb4e8e..c3b0711 100644
--- a/core/src/main/java/org/apache/druid/data/input/SegmentsSplitHintSpec.java
+++ b/core/src/main/java/org/apache/druid/data/input/SegmentsSplitHintSpec.java
@@ -21,6 +21,7 @@ package org.apache.druid.data.input;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 
 import javax.annotation.Nullable;
 import java.util.Iterator;
@@ -41,28 +42,40 @@ public class SegmentsSplitHintSpec implements SplitHintSpec
 {
   public static final String TYPE = "segments";
 
-  private static final long DEFAULT_MAX_INPUT_SEGMENT_BYTES_PER_TASK = 500 * 1024 * 1024;
+  private static final HumanReadableBytes DEFAULT_MAX_INPUT_SEGMENT_BYTES_PER_TASK = new HumanReadableBytes("1GiB");
+  private static final int DEFAULT_MAX_NUM_SEGMENTS = 1000;
 
   /**
    * Maximum number of bytes of input segments to process in a single task.
    * If a single segment is larger than this number, it will be processed by itself in a single task.
    */
-  private final long maxInputSegmentBytesPerTask;
+  private final HumanReadableBytes maxInputSegmentBytesPerTask;
+  private final int maxNumSegments;
 
   @JsonCreator
-  public SegmentsSplitHintSpec(@JsonProperty("maxInputSegmentBytesPerTask") @Nullable Long maxInputSegmentBytesPerTask)
+  public SegmentsSplitHintSpec(
+      @JsonProperty("maxInputSegmentBytesPerTask") @Nullable HumanReadableBytes maxInputSegmentBytesPerTask,
+      @JsonProperty("maxNumSegments") @Nullable Integer maxNumSegments
+  )
   {
     this.maxInputSegmentBytesPerTask = maxInputSegmentBytesPerTask == null
                                        ? DEFAULT_MAX_INPUT_SEGMENT_BYTES_PER_TASK
                                        : maxInputSegmentBytesPerTask;
+    this.maxNumSegments = maxNumSegments == null ? DEFAULT_MAX_NUM_SEGMENTS : maxNumSegments;
   }
 
   @JsonProperty
-  public long getMaxInputSegmentBytesPerTask()
+  public HumanReadableBytes getMaxInputSegmentBytesPerTask()
   {
     return maxInputSegmentBytesPerTask;
   }
 
+  @JsonProperty
+  public int getMaxNumSegments()
+  {
+    return maxNumSegments;
+  }
+
   @Override
   public <T> Iterator<List<T>> split(Iterator<T> inputIterator, Function<T, InputFileAttribute> inputAttributeExtractor)
   {
@@ -80,13 +93,14 @@ public class SegmentsSplitHintSpec implements SplitHintSpec
       return false;
     }
     SegmentsSplitHintSpec that = (SegmentsSplitHintSpec) o;
-    return maxInputSegmentBytesPerTask == that.maxInputSegmentBytesPerTask;
+    return maxNumSegments == that.maxNumSegments &&
+           Objects.equals(maxInputSegmentBytesPerTask, that.maxInputSegmentBytesPerTask);
   }
 
   @Override
   public int hashCode()
   {
-    return Objects.hash(maxInputSegmentBytesPerTask);
+    return Objects.hash(maxInputSegmentBytesPerTask, maxNumSegments);
   }
 
   @Override
@@ -94,6 +108,7 @@ public class SegmentsSplitHintSpec implements SplitHintSpec
   {
     return "SegmentsSplitHintSpec{" +
            "maxInputSegmentBytesPerTask=" + maxInputSegmentBytesPerTask +
+           ", maxNumSegments=" + maxNumSegments +
            '}';
   }
 }
diff --git a/core/src/main/java/org/apache/druid/data/input/impl/SplittableInputSource.java b/core/src/main/java/org/apache/druid/data/input/impl/SplittableInputSource.java
index 28b6256..e983941 100644
--- a/core/src/main/java/org/apache/druid/data/input/impl/SplittableInputSource.java
+++ b/core/src/main/java/org/apache/druid/data/input/impl/SplittableInputSource.java
@@ -35,7 +35,7 @@ import java.util.stream.Stream;
  */
 public interface SplittableInputSource<T> extends InputSource
 {
-  SplitHintSpec DEFAULT_SPLIT_HINT_SPEC = new MaxSizeSplitHintSpec(null);
+  SplitHintSpec DEFAULT_SPLIT_HINT_SPEC = new MaxSizeSplitHintSpec(null, null);
 
   @JsonIgnore
   @Override
diff --git a/core/src/test/java/org/apache/druid/data/input/MaxSizeSplitHintSpecTest.java b/core/src/test/java/org/apache/druid/data/input/MaxSizeSplitHintSpecTest.java
index 5ec57c0b..0fc4f9a 100644
--- a/core/src/test/java/org/apache/druid/data/input/MaxSizeSplitHintSpecTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/MaxSizeSplitHintSpecTest.java
@@ -19,11 +19,15 @@
 
 package org.apache.druid.data.input;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Lists;
 import nl.jqno.equalsverifier.EqualsVerifier;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import java.io.IOException;
 import java.util.List;
@@ -32,27 +36,66 @@ import java.util.stream.IntStream;
 
 public class MaxSizeSplitHintSpecTest
 {
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
+
   @Test
   public void testSerde() throws IOException
   {
     final ObjectMapper mapper = new ObjectMapper();
-    final MaxSizeSplitHintSpec original = new MaxSizeSplitHintSpec(1024L);
+    final MaxSizeSplitHintSpec original = new MaxSizeSplitHintSpec(new HumanReadableBytes(1024L), 20_000);
     final byte[] bytes = mapper.writeValueAsBytes(original);
     final MaxSizeSplitHintSpec fromJson = (MaxSizeSplitHintSpec) mapper.readValue(bytes, SplitHintSpec.class);
     Assert.assertEquals(original, fromJson);
   }
 
   @Test
-  public void testCreateWithNullReturningDefaultMaxSplitSize()
+  public void testReadFromJson() throws JsonProcessingException
+  {
+    final ObjectMapper mapper = new ObjectMapper();
+    final String json = "{"
+                        + "  \"type\":\"maxSize\","
+                        + "  \"maxSplitSize\":1024,"
+                        + "  \"maxNumFiles\":20000"
+                        + "}\n";
+    final MaxSizeSplitHintSpec fromJson = (MaxSizeSplitHintSpec) mapper.readValue(json, SplitHintSpec.class);
+    Assert.assertEquals(new MaxSizeSplitHintSpec(new HumanReadableBytes(1024L), 20_000), fromJson);
+  }
+
+  @Test
+  public void testConstructorWith0MaxNumFiles()
   {
-    Assert.assertEquals(MaxSizeSplitHintSpec.DEFAULT_MAX_SPLIT_SIZE, new MaxSizeSplitHintSpec(null).getMaxSplitSize());
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("maxNumFiles should be larger than 0");
+    new MaxSizeSplitHintSpec(null, 0);
+  }
+
+  @Test
+  public void testConstructorWith0MaxSplitSize()
+  {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("maxSplitSize should be larger than 0");
+    new MaxSizeSplitHintSpec(0, null);
+  }
+
+  @Test
+  public void testDefaults()
+  {
+    Assert.assertEquals(
+        MaxSizeSplitHintSpec.DEFAULT_MAX_SPLIT_SIZE,
+        new MaxSizeSplitHintSpec(null, null).getMaxSplitSize()
+    );
+    Assert.assertEquals(
+        MaxSizeSplitHintSpec.DEFAULT_MAX_NUM_FILES,
+        new MaxSizeSplitHintSpec(null, null).getMaxNumFiles()
+    );
   }
 
   @Test
   public void testSplitSmallInputsGroupingIntoLargerSplits()
   {
     final int eachInputSize = 3;
-    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(10L);
+    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(new HumanReadableBytes(10L), 10_000);
     final Function<Integer, InputFileAttribute> inputAttributeExtractor = InputFileAttribute::new;
     final List<List<Integer>> splits = Lists.newArrayList(
         splitHintSpec.split(IntStream.generate(() -> eachInputSize).limit(10).iterator(), inputAttributeExtractor)
@@ -68,7 +111,7 @@ public class MaxSizeSplitHintSpecTest
   public void testSplitLargeInputsReturningSplitsOfSingleInput()
   {
     final int eachInputSize = 15;
-    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(10L);
+    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(new HumanReadableBytes(10L), 10_000);
     final Function<Integer, InputFileAttribute> inputAttributeExtractor = InputFileAttribute::new;
     final List<List<Integer>> splits = Lists.newArrayList(
         splitHintSpec.split(IntStream.generate(() -> eachInputSize).limit(10).iterator(), inputAttributeExtractor)
@@ -80,10 +123,26 @@ public class MaxSizeSplitHintSpecTest
   }
 
   @Test
+  public void testSplitSmallInputsWithMaxNumFilesEachSplitShouldHaveLessFilesAssigned()
+  {
+    final int eachInputSize = 3;
+    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(new HumanReadableBytes("500M"), 3);
+    final Function<Integer, InputFileAttribute> inputAttributeExtractor = InputFileAttribute::new;
+    final List<List<Integer>> splits = Lists.newArrayList(
+        splitHintSpec.split(IntStream.generate(() -> eachInputSize).limit(10).iterator(), inputAttributeExtractor)
+    );
+    Assert.assertEquals(4, splits.size());
+    Assert.assertEquals(3, splits.get(0).size());
+    Assert.assertEquals(3, splits.get(1).size());
+    Assert.assertEquals(3, splits.get(2).size());
+    Assert.assertEquals(1, splits.get(3).size());
+  }
+
+  @Test
   public void testSplitSkippingEmptyInputs()
   {
     final int nonEmptyInputSize = 3;
-    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(10L);
+    final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(new HumanReadableBytes(10L), null);
     final Function<Integer, InputFileAttribute> inputAttributeExtractor = InputFileAttribute::new;
     final IntStream dataStream = IntStream.concat(
         IntStream.concat(
@@ -105,6 +164,10 @@ public class MaxSizeSplitHintSpecTest
   @Test
   public void testEquals()
   {
-    EqualsVerifier.forClass(MaxSizeSplitHintSpec.class).withNonnullFields("maxSplitSize").usingGetClass().verify();
+    EqualsVerifier.forClass(MaxSizeSplitHintSpec.class)
+                  .withNonnullFields("maxSplitSize")
+                  .withNonnullFields("maxNumFiles")
+                  .usingGetClass()
+                  .verify();
   }
 }
diff --git a/core/src/test/java/org/apache/druid/data/input/impl/LocalInputSourceTest.java b/core/src/test/java/org/apache/druid/data/input/impl/LocalInputSourceTest.java
index 9781435..526035d 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/LocalInputSourceTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/LocalInputSourceTest.java
@@ -25,6 +25,7 @@ import nl.jqno.equalsverifier.EqualsVerifier;
 import org.apache.druid.data.input.InputSource;
 import org.apache.druid.data.input.InputSplit;
 import org.apache.druid.data.input.MaxSizeSplitHintSpec;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.utils.Streams;
 import org.easymock.EasyMock;
 import org.junit.Assert;
@@ -69,11 +70,11 @@ public class LocalInputSourceTest
   public void testCreateSplitsRespectingSplitHintSpec()
   {
     final long fileSize = 15;
-    final long maxSplitSize = 50;
+    final HumanReadableBytes maxSplitSize = new HumanReadableBytes(50L);
     final Set<File> files = mockFiles(10, fileSize);
     final LocalInputSource inputSource = new LocalInputSource(null, null, files);
     final List<InputSplit<List<File>>> splits = inputSource
-        .createSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(maxSplitSize))
+        .createSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(maxSplitSize, null))
         .collect(Collectors.toList());
     Assert.assertEquals(4, splits.size());
     Assert.assertEquals(3, splits.get(0).get().size());
@@ -86,12 +87,12 @@ public class LocalInputSourceTest
   public void testEstimateNumSplitsRespectingSplitHintSpec()
   {
     final long fileSize = 13;
-    final long maxSplitSize = 40;
+    final HumanReadableBytes maxSplitSize = new HumanReadableBytes(40L);
     final Set<File> files = mockFiles(10, fileSize);
     final LocalInputSource inputSource = new LocalInputSource(null, null, files);
     Assert.assertEquals(
         4,
-        inputSource.estimateNumSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(maxSplitSize))
+        inputSource.estimateNumSplits(new NoopInputFormat(), new MaxSizeSplitHintSpec(maxSplitSize, null))
     );
   }
 
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 8538145..0ecd952 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -232,7 +232,8 @@ The size-based split hint spec is respected by all splittable input sources exce
 |property|description|default|required?|
 |--------|-----------|-------|---------|
 |type|This should always be `maxSize`.|none|yes|
-|maxSplitSize|Maximum number of bytes of input files to process in a single task. If a single file is larger than this number, it will be processed by itself in a single task (Files are never split across tasks yet).|500MB|no|
+|maxSplitSize|Maximum number of bytes of input files to process in a single subtask. If a single file is larger than this number, it will be processed by itself in a single subtask (Files are never split across tasks yet). Note that one subtask will not process more files than `maxNumFiles` even when their total size is smaller than `maxSplitSize`. [Human-readable format](../configuration/human-readable-byte.md) is supported.|1GiB|no|
+|maxNumFiles|Maximum number of input files to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (`jute.maxbuffer`) and the max packet size in MySQL (`max_allowed_packet`). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them. Note that one subtask will not process more data than `maxSplitS [...]
 
 #### Segments Split Hint Spec
 
@@ -241,7 +242,8 @@ The segments split hint spec is used only for [`DruidInputSource`](#druid-input-
 |property|description|default|required?|
 |--------|-----------|-------|---------|
 |type|This should always be `segments`.|none|yes|
-|maxInputSegmentBytesPerTask|Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks).|500MB|no|
+|maxInputSegmentBytesPerTask|Maximum number of bytes of input segments to process in a single subtask. If a single segment is larger than this number, it will be processed by itself in a single subtask (input segments are never split across tasks). Note that one subtask will not process more segments than `maxNumSegments` even when their total size is smaller than `maxInputSegmentBytesPerTask`. [Human-readable format](../configuration/human-readable-byte.md) is supported.|1GiB|no|
+|maxNumSegments|Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (`jute.maxbuffer`) and the max packet size in MySQL (`max_allowed_packet`). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them. Note that one subtask will not process more data than `max [...]
 
 ### `partitionsSpec`
 
diff --git a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
index 2bd9d58..d38bc66 100644
--- a/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
+++ b/extensions-contrib/aliyun-oss-extensions/src/test/java/org/apache/druid/data/input/aliyun/OssInputSourceTest.java
@@ -51,6 +51,7 @@ import org.apache.druid.data.input.impl.JsonInputFormat;
 import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.initialization.DruidModule;
 import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.parsers.CloseableIterator;
 import org.apache.druid.java.util.common.parsers.JSONPathSpec;
@@ -339,7 +340,7 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
 
     Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(
         new JsonInputFormat(JSONPathSpec.DEFAULT, null, null),
-        new MaxSizeSplitHintSpec(1L) // set maxSplitSize to 1 so that each inputSplit has only one object
+        new MaxSizeSplitHintSpec(null, 1)
     );
 
     Assert.assertEquals(EXPECTED_COORDS, splits.map(InputSplit::get).collect(Collectors.toList()));
@@ -365,7 +366,7 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
 
     Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(
         new JsonInputFormat(JSONPathSpec.DEFAULT, null, null),
-        new MaxSizeSplitHintSpec(CONTENT.length * 3L)
+        new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null)
     );
 
     Assert.assertEquals(
diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
index 8a7f16c..4a9773a 100644
--- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
+++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/data/input/azure/AzureInputSourceTest.java
@@ -163,7 +163,7 @@ public class AzureInputSourceTest extends EasyMockSupport
     );
 
     Stream<InputSplit<List<CloudObjectLocation>>> cloudObjectStream = azureInputSource.getPrefixesSplitStream(
-        new MaxSizeSplitHintSpec(1L)
+        new MaxSizeSplitHintSpec(null, 1)
     );
 
     List<List<CloudObjectLocation>> actualCloudLocationList = cloudObjectStream.map(InputSplit::get)
diff --git a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
index 3888398..e68a9a4 100644
--- a/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
+++ b/extensions-core/google-extensions/src/test/java/org/apache/druid/data/input/google/GoogleCloudStorageInputSourceTest.java
@@ -44,6 +44,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.initialization.DruidModule;
 import org.apache.druid.jackson.DefaultObjectMapper;
 import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.parsers.CloseableIterator;
 import org.apache.druid.java.util.common.parsers.JSONPathSpec;
@@ -170,7 +171,7 @@ public class GoogleCloudStorageInputSourceTest extends InitializedNullHandlingTe
 
     Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(
         new JsonInputFormat(JSONPathSpec.DEFAULT, null, null),
-        new MaxSizeSplitHintSpec(1L) // set maxSplitSize to 1 so that each inputSplit has only one object
+        new MaxSizeSplitHintSpec(null, 1)
     );
 
     Assert.assertEquals(EXPECTED_OBJECTS, splits.map(InputSplit::get).collect(Collectors.toList()));
@@ -192,7 +193,7 @@ public class GoogleCloudStorageInputSourceTest extends InitializedNullHandlingTe
 
     Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(
         new JsonInputFormat(JSONPathSpec.DEFAULT, null, null),
-        new MaxSizeSplitHintSpec(CONTENT.length * 3L)
+        new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null)
     );
 
     Assert.assertEquals(
diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
index 044930f..170791e 100644
--- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
+++ b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
@@ -251,7 +251,7 @@ public class HdfsInputSourceTest extends InitializedNullHandlingTest
     public void hasCorrectSplits() throws IOException
     {
       // Set maxSplitSize to 1 so that each inputSplit has only one object
-      List<InputSplit<List<Path>>> splits = target.createSplits(null, new MaxSizeSplitHintSpec(1L))
+      List<InputSplit<List<Path>>> splits = target.createSplits(null, new MaxSizeSplitHintSpec(1L, null))
                                                   .collect(Collectors.toList());
       splits.forEach(split -> Assert.assertEquals(1, split.get().size()));
       Set<Path> actualPaths = splits.stream()
@@ -264,7 +264,7 @@ public class HdfsInputSourceTest extends InitializedNullHandlingTest
     @Test
     public void createSplitsRespectSplitHintSpec() throws IOException
     {
-      List<InputSplit<List<Path>>> splits = target.createSplits(null, new MaxSizeSplitHintSpec(7L))
+      List<InputSplit<List<Path>>> splits = target.createSplits(null, new MaxSizeSplitHintSpec(7L, null))
                                                   .collect(Collectors.toList());
       Assert.assertEquals(2, splits.size());
       Assert.assertEquals(2, splits.get(0).get().size());
@@ -275,15 +275,14 @@ public class HdfsInputSourceTest extends InitializedNullHandlingTest
     public void hasCorrectNumberOfSplits() throws IOException
     {
       // Set maxSplitSize to 1 so that each inputSplit has only one object
-      int numSplits = target.estimateNumSplits(null, new MaxSizeSplitHintSpec(1L));
+      int numSplits = target.estimateNumSplits(null, new MaxSizeSplitHintSpec(1L, null));
       Assert.assertEquals(NUM_FILE, numSplits);
     }
 
     @Test
     public void createCorrectInputSourceWithSplit() throws Exception
     {
-      // Set maxSplitSize to 1 so that each inputSplit has only one object
-      List<InputSplit<List<Path>>> splits = target.createSplits(null, new MaxSizeSplitHintSpec(1L))
+      List<InputSplit<List<Path>>> splits = target.createSplits(null, new MaxSizeSplitHintSpec(null, 1))
                                                   .collect(Collectors.toList());
 
       for (InputSplit<List<Path>> split : splits) {
diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
index 8dd82a9..9efaf4f 100644
--- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
+++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/data/input/s3/S3InputSourceTest.java
@@ -52,6 +52,7 @@ import org.apache.druid.data.input.impl.JsonInputFormat;
 import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.initialization.DruidModule;
 import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.parsers.CloseableIterator;
 import org.apache.druid.java.util.common.parsers.JSONPathSpec;
@@ -390,7 +391,7 @@ public class S3InputSourceTest extends InitializedNullHandlingTest
 
     Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(
         new JsonInputFormat(JSONPathSpec.DEFAULT, null, null),
-        new MaxSizeSplitHintSpec(1L) // set maxSplitSize to 1 so that each inputSplit has only one object
+        new MaxSizeSplitHintSpec(null, 1)
     );
 
     Assert.assertEquals(EXPECTED_COORDS, splits.map(InputSplit::get).collect(Collectors.toList()));
@@ -417,7 +418,7 @@ public class S3InputSourceTest extends InitializedNullHandlingTest
 
     Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(
         new JsonInputFormat(JSONPathSpec.DEFAULT, null, null),
-        new MaxSizeSplitHintSpec(CONTENT.length * 3L)
+        new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null)
     );
 
     Assert.assertEquals(
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
index 6248828..3ba936e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
@@ -38,6 +38,7 @@ import org.apache.druid.indexing.common.ReingestionTimelineUtils;
 import org.apache.druid.indexing.common.RetryPolicyFactory;
 import org.apache.druid.indexing.common.SegmentLoaderFactory;
 import org.apache.druid.indexing.input.DruidInputSource;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.parsers.ParseException;
 import org.apache.druid.java.util.emitter.EmittingLogger;
@@ -289,7 +290,14 @@ public class IngestSegmentFirehoseFactory implements FiniteFirehoseFactory<Input
             retryPolicyFactory,
             dataSource,
             interval,
-            splitHintSpec == null ? new SegmentsSplitHintSpec(maxInputSegmentBytesPerTask) : splitHintSpec
+            splitHintSpec == null
+            ? new SegmentsSplitHintSpec(
+                maxInputSegmentBytesPerTask == null
+                ? null
+                : new HumanReadableBytes(maxInputSegmentBytesPerTask),
+                null
+            )
+            : splitHintSpec
         )
     );
   }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index b9cc575..4388c70 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -233,7 +233,7 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
               retryPolicyFactory,
               dataSource,
               interval,
-              splitHintSpec == null ? new MaxSizeSplitHintSpec(null) : splitHintSpec
+              splitHintSpec == null ? SplittableInputSource.DEFAULT_SPLIT_HINT_SPEC : splitHintSpec
           )
       );
     } else {
@@ -253,7 +253,7 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
               retryPolicyFactory,
               dataSource,
               interval,
-              splitHintSpec == null ? new MaxSizeSplitHintSpec(null) : splitHintSpec
+              splitHintSpec == null ? SplittableInputSource.DEFAULT_SPLIT_HINT_SPEC : splitHintSpec
           )
       );
     } else {
@@ -294,8 +294,10 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
   {
     final SplitHintSpec convertedSplitHintSpec;
     if (splitHintSpec instanceof SegmentsSplitHintSpec) {
+      final SegmentsSplitHintSpec segmentsSplitHintSpec = (SegmentsSplitHintSpec) splitHintSpec;
       convertedSplitHintSpec = new MaxSizeSplitHintSpec(
-          ((SegmentsSplitHintSpec) splitHintSpec).getMaxInputSegmentBytesPerTask()
+          segmentsSplitHintSpec.getMaxInputSegmentBytesPerTask(),
+          segmentsSplitHintSpec.getMaxNumSegments()
       );
     } else {
       convertedSplitHintSpec = splitHintSpec;
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java
index e17119a..14838d3 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/ClientCompactionTaskQuerySerdeTest.java
@@ -44,6 +44,7 @@ import org.apache.druid.indexing.common.TestUtils;
 import org.apache.druid.indexing.common.stats.RowIngestionMetersFactory;
 import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig;
 import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.data.BitmapSerde.DefaultBitmapSerdeFactory;
@@ -85,7 +86,7 @@ public class ClientCompactionTaskQuerySerdeTest
             40000,
             2000L,
             30000L,
-            new SegmentsSplitHintSpec(100000L),
+            new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10),
             new IndexSpec(
                 new DefaultBitmapSerdeFactory(),
                 CompressionStrategy.LZ4,
@@ -174,7 +175,7 @@ public class ClientCompactionTaskQuerySerdeTest
                 2000L,
                 null,
                 null,
-                new SegmentsSplitHintSpec(100000L),
+                new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10),
                 new DynamicPartitionsSpec(100, 30000L),
                 new IndexSpec(
                     new DefaultBitmapSerdeFactory(),
@@ -217,7 +218,7 @@ public class ClientCompactionTaskQuerySerdeTest
             40000,
             2000L,
             30000L,
-            new SegmentsSplitHintSpec(100000L),
+            new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10),
             new IndexSpec(
                 new DefaultBitmapSerdeFactory(),
                 CompressionStrategy.LZ4,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java
index 8e078e4..f56f6d4 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java
@@ -270,7 +270,7 @@ public class CompactionTaskParallelRunTest extends AbstractParallelIndexSupervis
             RETRY_POLICY_FACTORY,
             DATA_SOURCE,
             INTERVAL_TO_INDEX,
-            new SegmentsSplitHintSpec(1L) // each segment gets its own split with this config
+            new SegmentsSplitHintSpec(null, 1)
         )
     );
 
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
index 858c6c9..3ba9441 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java
@@ -239,7 +239,7 @@ public class AbstractParallelIndexSupervisorTaskTest extends IngestionTestBase
         null,
         null,
         null,
-        new MaxSizeSplitHintSpec(1L), // set maxSplitSize to 1 so that each split has only one file.
+        new MaxSizeSplitHintSpec(null, 1),
         partitionsSpec,
         null,
         null,
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java
index 18bc3fc..373bef3 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java
@@ -37,7 +37,8 @@ import java.util.function.Function;
 @Guice(moduleFactory = DruidTestModuleFactory.class)
 public class ITBestEffortRollupParallelIndexTest extends AbstractITBatchIndexTest
 {
-  // The task specs here use the MaxSizeSplitHintSpec with maxSplitSize of 1. This is to create splits per file.
+  // This ingestion spec has a splitHintSpec of maxSplitSize of 1 to test whether or not the task can handle
+  // maxSplitSize of 1 properly.
   private static final String INDEX_TASK = "/indexer/wikipedia_parallel_index_task.json";
   private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_parallel_index_queries.json";
   private static final String REINDEX_TASK = "/indexer/wikipedia_parallel_reindex_task.json";
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java
index 99bc1e1..c49d5a9 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java
@@ -38,7 +38,8 @@ import java.util.function.Function;
 @Guice(moduleFactory = DruidTestModuleFactory.class)
 public class ITPerfectRollupParallelIndexTest extends AbstractITBatchIndexTest
 {
-  // The task specs here use the MaxSizeSplitHintSpec with maxSplitSize of 1. This is to create splits per file.
+  // This ingestion spec has a splitHintSpec of maxSplitSize of 1 to test whether or not the task can handle
+  // maxSplitSize of 1 properly.
   private static final String INDEX_TASK = "/indexer/wikipedia_parallel_index_task.json";
   private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_parallel_index_queries.json";
   private static final String INDEX_DATASOURCE = "wikipedia_parallel_index_test";
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json
index d38c09f..8c4242c 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json
@@ -80,7 +80,7 @@
       "partitionsSpec": %%PARTITIONS_SPEC%%,
       "splitHintSpec": {
         "type": "maxSize",
-        "maxSplitSize": 1
+        "maxNumFiles": 1
       }
     }
   }
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json
index a533a84..46de14c 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json
@@ -79,7 +79,7 @@
       "maxNumConcurrentSubTasks": 4,
       "splitHintSpec": {
         "type": "maxSize",
-        "maxSplitSize": 1
+        "maxNumFiles": 1
       },
       "forceGuaranteedRollup": %%FORCE_GUARANTEED_ROLLUP%%,
       "partitionsSpec": %%PARTITIONS_SPEC%%
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json
index 87cb498..e995a0f 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_ingest_segment_index_task.json
@@ -61,7 +61,7 @@
       "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%",
       "splitHintSpec": {
         "type": "maxSize",
-        "maxSplitSize": 1
+        "maxNumFiles": 1
       },
       "partitionsSpec": %%PARTITIONS_SPEC%%
     }
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json
index 84842b4..9ab153c 100644
--- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json
@@ -67,7 +67,7 @@
             "forceGuaranteedRollup": "%%FORCE_GUARANTEED_ROLLUP%%",
             "splitHintSpec": {
                 "type": "maxSize",
-                "maxSplitSize": 1
+                "maxNumFiles": 1
             },
             "partitionsSpec": %%PARTITIONS_SPEC%%
         }
diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java
index a2d36c1..a04f286 100644
--- a/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java
+++ b/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableMap;
 import org.apache.druid.data.input.SegmentsSplitHintSpec;
 import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.data.CompressionFactory;
 import org.apache.druid.segment.data.CompressionStrategy;
@@ -164,7 +165,7 @@ public class DataSourceCompactionConfigTest
         1000,
         10000L,
         2000L,
-        new SegmentsSplitHintSpec(10000L),
+        new SegmentsSplitHintSpec(new HumanReadableBytes(10000L), null),
         new IndexSpec(
             new RoaringBitmapSerdeFactory(false),
             CompressionStrategy.LZF,
diff --git a/server/src/test/java/org/apache/druid/server/coordinator/UserCompactionTaskQueryTuningConfigTest.java b/server/src/test/java/org/apache/druid/server/coordinator/UserCompactionTaskQueryTuningConfigTest.java
index a1e6e86..56344e5 100644
--- a/server/src/test/java/org/apache/druid/server/coordinator/UserCompactionTaskQueryTuningConfigTest.java
+++ b/server/src/test/java/org/apache/druid/server/coordinator/UserCompactionTaskQueryTuningConfigTest.java
@@ -22,6 +22,7 @@ package org.apache.druid.server.coordinator;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.druid.data.input.SegmentsSplitHintSpec;
 import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.HumanReadableBytes;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.data.CompressionFactory;
 import org.apache.druid.segment.data.CompressionStrategy;
@@ -55,7 +56,7 @@ public class UserCompactionTaskQueryTuningConfigTest
         1000,
         10000L,
         2000L,
-        new SegmentsSplitHintSpec(42L),
+        new SegmentsSplitHintSpec(new HumanReadableBytes(42L), null),
         new IndexSpec(
             new RoaringBitmapSerdeFactory(false),
             CompressionStrategy.LZF,
diff --git a/website/.spelling b/website/.spelling
index 50e27f0..292476a 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -19,6 +19,7 @@
 # one word per line, to define a file override use ' - filename'
 # where filename is relative to this configuration file
 32-bit
+500MiB
 64-bit
 ACL
 APIs
@@ -281,6 +282,8 @@ lookback
 lookups
 mapreduce
 masse
+maxNumFiles
+maxNumSegments
 max_map_count
 memcached
 mergeable
@@ -373,6 +376,7 @@ subqueries
 subquery
 subsecond
 substring
+subtask
 subtasks
 symlink
 tiering


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org